From 12fdcd5d0700daea299b8d429d3cf00b08ff9adf Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 6 Dec 2025 22:35:50 -0500 Subject: [PATCH 1/4] jitlayers: Enable FastISel on AArch64 at -O0/-O1 FastISel was disabled on AArch64 in 2015 (PR #13393) to fix issue #13321, but that issue was specifically about 32-bit ARM (ARMv7) segfaults during bootstrap. The AArch64 exclusion was added conservatively alongside the ARM fix. AArch64 FastISel has been actively maintained upstream with recent bug fixes: - https://github.com/llvm/llvm-project/pull/75993 (Jan 2024) - https://github.com/llvm/llvm-project/pull/133987 (May 2025) This enables faster instruction selection for JIT compilation on AArch64 at lower optimization levels, reducing compilation latency. --- src/jitlayers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index 90091cc1f38db..dc41de7c3a853 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1897,7 +1897,7 @@ void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { void fixupTM(TargetMachine &TM) { auto TheTriple = TM.getTargetTriple(); if (jl_options.opt_level < 2) { - if (!TheTriple.isARM() && !TheTriple.isPPC64() && !TheTriple.isAArch64()) + if (!TheTriple.isARM() && !TheTriple.isPPC64()) TM.setFastISel(true); else // FastISel seems to be buggy Ref #13321 TM.setFastISel(false); From dc0539badaa1770209605dd353062b194f386040 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Sat, 6 Dec 2025 23:06:15 -0500 Subject: [PATCH 2/4] jitlayers: Use GlobalISel on AArch64 at -O0/-O1 GlobalISel is LLVM's modern instruction selector that is designed to replace both FastISel and SelectionDAG. On AArch64, it is mature and enabled by default at -O0 in upstream LLVM. This enables GlobalISel with fallback mode on AArch64, which provides faster instruction selection than SelectionDAG while maintaining correctness by falling back to SelectionDAG for unsupported patterns. Note: This requires RemoveJuliaAddrspacesPass to run before codegen, which is already the case in the current pipeline (see pipeline.cpp comment about GlobalISel not liking Julia's address spaces). Co-Authored-By: Claude --- src/jitlayers.cpp | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/jitlayers.cpp b/src/jitlayers.cpp index dc41de7c3a853..a60396cfca5df 100644 --- a/src/jitlayers.cpp +++ b/src/jitlayers.cpp @@ -1897,7 +1897,17 @@ void optimizeDLSyms(Module &M) JL_NOTSAFEPOINT_LEAVE JL_NOTSAFEPOINT_ENTER { void fixupTM(TargetMachine &TM) { auto TheTriple = TM.getTargetTriple(); if (jl_options.opt_level < 2) { - if (!TheTriple.isARM() && !TheTriple.isPPC64()) + // Try GlobalISel on AArch64 - it's the default in LLVM at -O0 and + // is apparently generally faster than SelectionDAG while producing good code. + // Use fallback mode so unsupported patterns fall back to SelectionDAG. + // Note: Requires RemoveJuliaAddrspacesPass to run before codegen + // because GlobalISel doesn't handle Julia's custom address spaces. + if (TheTriple.isAArch64()) { + TM.setGlobalISel(true); + TM.setGlobalISelAbort(GlobalISelAbortMode::Disable); + TM.setFastISel(false); + } + else if (!TheTriple.isARM() && !TheTriple.isPPC64()) TM.setFastISel(true); else // FastISel seems to be buggy Ref #13321 TM.setFastISel(false); From cc950eaa00b574c009fa53a4efeb4a50bdfc0f20 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 9 Dec 2025 09:40:15 -0500 Subject: [PATCH 3/4] add test --- test/atomics.jl | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/test/atomics.jl b/test/atomics.jl index 369a63f7d5fbf..58e518dbdc7e2 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1115,3 +1115,15 @@ function add_one57190!() end @test add_one57190!() == 1 + +# Test atomic Float16 operations at all optimization levels (GlobalISel miscompile on AArch64) +# See https://github.com/JuliaLang/julia/pull/54140#issuecomment-2855794363 +for opt in 0:3 + @test success(run(```$(Base.julia_cmd()) --startup-file=no -O$opt -e ' + a = Threads.Atomic{Float16}(Float16(0)) + a[] = Float16(1.5) + @assert a[] === Float16(1.5) "atomic Float16 store failed: got \$(a[]) expected 1.5" + a[] = Float16(3.25) + @assert a[] === Float16(3.25) "atomic Float16 store failed: got \$(a[]) expected 3.25" + '```)) +end From e49df8d4124b9f3a1a5e166134d677ce4c059788 Mon Sep 17 00:00:00 2001 From: Ian Butterworth Date: Tue, 9 Dec 2025 10:49:51 -0500 Subject: [PATCH 4/4] make test clearer --- test/atomics.jl | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/test/atomics.jl b/test/atomics.jl index 58e518dbdc7e2..b212bd118becb 100644 --- a/test/atomics.jl +++ b/test/atomics.jl @@ -1122,8 +1122,8 @@ for opt in 0:3 @test success(run(```$(Base.julia_cmd()) --startup-file=no -O$opt -e ' a = Threads.Atomic{Float16}(Float16(0)) a[] = Float16(1.5) - @assert a[] === Float16(1.5) "atomic Float16 store failed: got \$(a[]) expected 1.5" + @assert a[] === Float16(1.5) "atomic Float16 store failed: got \$(a[]) expected 1.5 (opt level = -O$(Base.JLOptions().opt_level))" a[] = Float16(3.25) - @assert a[] === Float16(3.25) "atomic Float16 store failed: got \$(a[]) expected 3.25" + @assert a[] === Float16(3.25) "atomic Float16 store failed: got \$(a[]) expected 3.25 (opt level = -O$(Base.JLOptions().opt_level))" '```)) end