From 792bd9c3b91ba2ac5a22ddeb6952cfbbc870665d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Wed, 26 Feb 2025 16:58:53 +0100
Subject: [PATCH 01/12] support `#[target_feature]` on `#[naked]` functions

---
 .../rustc_codegen_ssa/src/mir/naked_asm.rs    | 224 ++++++++++++++++++
 tests/assembly/naked-fn-target-feature.rs     | 173 ++++++++++++++
 2 files changed, 397 insertions(+)
 create mode 100644 tests/assembly/naked-fn-target-feature.rs

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 0593fb420c306..0d7c5652b5ac3 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -1,6 +1,8 @@
+use object::{Architecture, SubArchitecture};
 use rustc_abi::{BackendRepr, Float, Integer, Primitive, RegKind};
 use rustc_attr_parsing::InstructionSetAttr;
 use rustc_hir::def_id::DefId;
+use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrs, TargetFeature};
 use rustc_middle::mir::mono::{Linkage, MonoItem, MonoItemData, Visibility};
 use rustc_middle::mir::{Body, InlineAsmOperand};
 use rustc_middle::ty::layout::{FnAbiOf, HasTyCtxt, HasTypingEnv, LayoutOf};
@@ -104,6 +106,215 @@ fn inline_to_global_operand<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
     }
 }
 
+// FIXME share code with `create_object_file`
+fn parse_architecture(
+    sess: &rustc_session::Session,
+) -> Option<(Architecture, Option<SubArchitecture>)> {
+    let (architecture, subarchitecture) = match &sess.target.arch[..] {
+        "arm" => (Architecture::Arm, None),
+        "aarch64" => (
+            if sess.target.pointer_width == 32 {
+                Architecture::Aarch64_Ilp32
+            } else {
+                Architecture::Aarch64
+            },
+            None,
+        ),
+        "x86" => (Architecture::I386, None),
+        "s390x" => (Architecture::S390x, None),
+        "mips" | "mips32r6" => (Architecture::Mips, None),
+        "mips64" | "mips64r6" => (Architecture::Mips64, None),
+        "x86_64" => (
+            if sess.target.pointer_width == 32 {
+                Architecture::X86_64_X32
+            } else {
+                Architecture::X86_64
+            },
+            None,
+        ),
+        "powerpc" => (Architecture::PowerPc, None),
+        "powerpc64" => (Architecture::PowerPc64, None),
+        "riscv32" => (Architecture::Riscv32, None),
+        "riscv64" => (Architecture::Riscv64, None),
+        "sparc" => {
+            if sess.unstable_target_features.contains(&sym::v8plus) {
+                // Target uses V8+, aka EM_SPARC32PLUS, aka 64-bit V9 but in 32-bit mode
+                (Architecture::Sparc32Plus, None)
+            } else {
+                // Target uses V7 or V8, aka EM_SPARC
+                (Architecture::Sparc, None)
+            }
+        }
+        "sparc64" => (Architecture::Sparc64, None),
+        "avr" => (Architecture::Avr, None),
+        "msp430" => (Architecture::Msp430, None),
+        "hexagon" => (Architecture::Hexagon, None),
+        "bpf" => (Architecture::Bpf, None),
+        "loongarch64" => (Architecture::LoongArch64, None),
+        "csky" => (Architecture::Csky, None),
+        "arm64ec" => (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)),
+
+        // added here
+        "wasm32" => (Architecture::Wasm32, None),
+        "wasm64" => (Architecture::Wasm64, None),
+        "m68k" => (Architecture::M68k, None),
+
+        // Unsupported architecture.
+        _ => return None,
+    };
+
+    Some((architecture, subarchitecture))
+}
+
+/// Enable the function's target features in the body of the function, then disable them again
+fn enable_disable_target_features<'tcx>(
+    tcx: TyCtxt<'tcx>,
+    attrs: &CodegenFnAttrs,
+) -> Option<(String, String)> {
+    use std::fmt::Write;
+
+    let mut begin = String::new();
+    let mut end = String::new();
+
+    let (architecture, _subarchitecture) = parse_architecture(tcx.sess)?;
+    let features = attrs.target_features.iter().filter(|attr| !attr.implied);
+
+    match architecture {
+        Architecture::X86_64 | Architecture::X86_64_X32 => { /* do nothing */ }
+
+        Architecture::Aarch64 | Architecture::Aarch64_Ilp32 | Architecture::Arm => {
+            // https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
+
+            for feature in features {
+                writeln!(begin, ".arch_extension {}", feature.name).unwrap();
+
+                writeln!(end, ".arch_extension no{}", feature.name).unwrap();
+            }
+        }
+        Architecture::Riscv32 | Architecture::Riscv64 => {
+            // https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
+
+            for feature in features {
+                writeln!(begin, ".option arch, +{}", feature.name).unwrap();
+
+                writeln!(end, ".option arch, -{}", feature.name).unwrap();
+            }
+        }
+        Architecture::Mips | Architecture::Mips64 | Architecture::Mips64_N32 => {
+            // https://sourceware.org/binutils/docs/as/MIPS-ISA.html
+            // https://sourceware.org/binutils/docs/as/MIPS-ASE-Instruction-Generation-Overrides.html
+
+            for feature in features {
+                writeln!(begin, ".set {}", feature.name).unwrap();
+
+                writeln!(end, ".set no{}", feature.name).unwrap();
+            }
+        }
+
+        Architecture::S390x => {
+            // https://sourceware.org/binutils/docs/as/s390-Directives.html
+
+            // based on src/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
+            let isa_revision_for_feature = |feature: &TargetFeature| match feature.name.as_str() {
+                "backchain" => None, // does not define any instructions
+                "deflate-conversion" => Some(13),
+                "enhanced-sort" => Some(13),
+                "guarded-storage" => Some(12),
+                "high-word" => None, // technically 9, but LLVM supports only >= 10
+                "nnp-assist" => Some(14),
+                "transactional-execution" => Some(10),
+                "vector" => Some(11),
+                "vector-enhancements-1" => Some(12),
+                "vector-enhancements-2" => Some(13),
+                "vector-packed-decimal" => Some(12),
+                "vector-packed-decimal-enhancement" => Some(13),
+                "vector-packed-decimal-enhancement-2" => Some(14),
+                _ => None,
+            };
+
+            if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() {
+                writeln!(begin, ".machine arch{minimum_isa}").unwrap();
+
+                // NOTE: LLVM does not support `.machine push` and `.machine pop`, so we rely on these
+                // target features only being applied to this ASM block (LLVM clears them for the next)
+                //
+                // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp#L1362
+            }
+        }
+        Architecture::PowerPc | Architecture::PowerPc64 => {
+            // https://www.ibm.com/docs/en/ssw_aix_71/assembler/assembler_pdf.pdf
+
+            // based on src/llvm-project/llvm/lib/Target/PowerPC/PPC.td
+            let isa_revision_for_feature = |feature: &TargetFeature| match feature.name.as_str() {
+                "altivec" => Some(7),
+                "partword-atomics" => Some(8),
+                "power10-vector" => Some(10),
+                "power8-altivec" => Some(8),
+                "power8-crypto" => Some(8),
+                "power8-vector" => Some(9),
+                "power9-altivec" => Some(9),
+                "power9-vector" => Some(9),
+                "quadword-atomics" => Some(8),
+                "vsx" => Some(7),
+                _ => None,
+            };
+
+            if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() {
+                writeln!(begin, ".machine push").unwrap();
+
+                // LLVM currently ignores the .machine directive, and allows all instructions regardless
+                // of the machine. This may be fixed in the future.
+                //
+                // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp#L1799
+                writeln!(begin, ".machine pwr{minimum_isa}").unwrap();
+
+                writeln!(end, ".machine pop").unwrap();
+            }
+        }
+
+        Architecture::M68k => {
+            // https://sourceware.org/binutils/docs/as/M68K_002dDirectives.html#index-directives_002c-M680x0
+
+            // FIXME support m64k
+            // return None;
+        }
+
+        Architecture::Wasm32 | Architecture::Wasm64 => {
+            // LLVM does not appear to accept any directive to enable target features
+            //
+            // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp#L909
+            return None;
+        }
+
+        Architecture::LoongArch64 => {
+            // LLVM does not appear to accept any directive to enable target features
+            //
+            // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp#L1918
+        }
+
+        // FIXME: support naked_asm! on more architectures
+        Architecture::Avr => return None,
+        Architecture::Bpf => return None,
+        Architecture::Csky => return None,
+        Architecture::E2K32 => return None,
+        Architecture::E2K64 => return None,
+        Architecture::I386 => return None,
+        Architecture::Hexagon => return None,
+        Architecture::Msp430 => return None,
+        Architecture::Sbf => return None,
+        Architecture::Sharc => return None,
+        Architecture::Sparc => return None,
+        Architecture::Sparc32Plus => return None,
+        Architecture::Sparc64 => return None,
+        Architecture::Xtensa => return None,
+
+        // the Architecture enum is non-exhaustive
+        Architecture::Unknown | _ => return None,
+    }
+
+    Some((begin, end))
+}
+
 fn prefix_and_suffix<'tcx>(
     tcx: TyCtxt<'tcx>,
     instance: Instance<'tcx>,
@@ -186,6 +397,12 @@ fn prefix_and_suffix<'tcx>(
         Ok(())
     };
 
+    let Some((target_feature_begin, target_feature_end)) =
+        enable_disable_target_features(tcx, attrs)
+    else {
+        panic!("target features on naked functions are not supported for this architecture");
+    };
+
     let mut begin = String::new();
     let mut end = String::new();
     match asm_binary_format {
@@ -205,6 +422,8 @@ fn prefix_and_suffix<'tcx>(
             writeln!(begin, ".pushsection {section},\"ax\", {progbits}").unwrap();
             writeln!(begin, ".balign {align}").unwrap();
             write_linkage(&mut begin).unwrap();
+            begin.push_str(&target_feature_begin);
+
             if let Visibility::Hidden = item_data.visibility {
                 writeln!(begin, ".hidden {asm_name}").unwrap();
             }
@@ -215,6 +434,7 @@ fn prefix_and_suffix<'tcx>(
             writeln!(begin, "{asm_name}:").unwrap();
 
             writeln!(end).unwrap();
+            end.push_str(&target_feature_end);
             writeln!(end, ".size {asm_name}, . - {asm_name}").unwrap();
             writeln!(end, ".popsection").unwrap();
             if !arch_suffix.is_empty() {
@@ -226,12 +446,14 @@ fn prefix_and_suffix<'tcx>(
             writeln!(begin, ".pushsection {},regular,pure_instructions", section).unwrap();
             writeln!(begin, ".balign {align}").unwrap();
             write_linkage(&mut begin).unwrap();
+            begin.push_str(&target_feature_begin);
             if let Visibility::Hidden = item_data.visibility {
                 writeln!(begin, ".private_extern {asm_name}").unwrap();
             }
             writeln!(begin, "{asm_name}:").unwrap();
 
             writeln!(end).unwrap();
+            end.push_str(&target_feature_end);
             writeln!(end, ".popsection").unwrap();
             if !arch_suffix.is_empty() {
                 writeln!(end, "{}", arch_suffix).unwrap();
@@ -242,6 +464,7 @@ fn prefix_and_suffix<'tcx>(
             writeln!(begin, ".pushsection {},\"xr\"", section).unwrap();
             writeln!(begin, ".balign {align}").unwrap();
             write_linkage(&mut begin).unwrap();
+            begin.push_str(&target_feature_begin);
             writeln!(begin, ".def {asm_name}").unwrap();
             writeln!(begin, ".scl 2").unwrap();
             writeln!(begin, ".type 32").unwrap();
@@ -249,6 +472,7 @@ fn prefix_and_suffix<'tcx>(
             writeln!(begin, "{asm_name}:").unwrap();
 
             writeln!(end).unwrap();
+            end.push_str(&target_feature_end);
             writeln!(end, ".popsection").unwrap();
             if !arch_suffix.is_empty() {
                 writeln!(end, "{}", arch_suffix).unwrap();
diff --git a/tests/assembly/naked-fn-target-feature.rs b/tests/assembly/naked-fn-target-feature.rs
new file mode 100644
index 0000000000000..fc42bbea5c7b2
--- /dev/null
+++ b/tests/assembly/naked-fn-target-feature.rs
@@ -0,0 +1,173 @@
+//@ revisions: aarch64-elf aarch64-macho aarch64-coff x86_64 s390x riscv64 powerpc64 loongarch64
+//@ add-core-stubs
+//@ assembly-output: emit-asm
+//
+//@ [x86_64] compile-flags: --target x86_64-unknown-linux-gnu
+//@ [x86_64] needs-llvm-components: x86
+//
+//@ [aarch64-elf] compile-flags: --target aarch64-unknown-linux-gnu
+//@ [aarch64-elf] needs-llvm-components: aarch64
+//@ [aarch64-macho] compile-flags: --target aarch64-apple-darwin
+//@ [aarch64-macho] needs-llvm-components: aarch64
+//@ [aarch64-coff] compile-flags: --target aarch64-pc-windows-gnullvm
+//@ [aarch64-coff] needs-llvm-components: aarch64
+//
+//@ [s390x] compile-flags: --target s390x-unknown-linux-gnu
+//@ [s390x] needs-llvm-components: systemz
+//
+//@ [powerpc64] compile-flags: --target powerpc64-unknown-linux-gnu
+//@ [powerpc64] needs-llvm-components: powerpc
+//
+//@ [riscv64] compile-flags: --target riscv64gc-unknown-linux-gnu
+//@ [riscv64] needs-llvm-components: riscv
+//
+// NOTE: there currently no logic for handling target features for loongarch,
+// because it does not seem to support a feature-setting directive.
+// we effectively assume all instructions are accepted regardless of target feature.
+//@ [loongarch64] compile-flags: --target loongarch64-unknown-linux-gnu
+//@ [loongarch64] needs-llvm-components: loongarch
+//
+// NOTE: wasm32 is skipped because it does not work
+// [wasm32] compile-flags: --target wasm32-wasip1
+// [wasm32] needs-llvm-components: webassembly
+
+// Test that the #[target_feature(enable = ...)]` works on naked functions.
+//
+// For most targets, a directive needs to be applied to enable, and then disable the target feature.
+
+#![crate_type = "lib"]
+#![feature(no_core, naked_functions, asm_experimental_arch)]
+#![feature(
+    avx512_target_feature,
+    s390x_target_feature,
+    powerpc_target_feature,
+    loongarch_target_feature,
+    m68k_target_feature
+)]
+#![no_core]
+
+extern crate minicore;
+use minicore::*;
+
+// x86_64-LABEL: vpclmulqdq:
+// x86_64: vpclmulqdq
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "vpclmulqdq")]
+unsafe extern "C" fn vpclmulqdq() {
+    naked_asm!("vpclmulqdq zmm1, zmm2, zmm3, 4")
+}
+
+// aarch64-elf-LABEL: aes:
+// aarch64-elf: aese
+// aarch64-macho-LABEL: aes:
+// aarch64-macho: aese
+// aarch64-coff-LABEL: aes:
+// aarch64-coff: aese
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "aarch64")]
+#[target_feature(enable = "aes")]
+unsafe extern "C" fn aes() {
+    naked_asm!("aese.16b v0, v1")
+}
+
+// riscv64: sh1add:
+// riscv64: sh1add
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "riscv64")]
+#[target_feature(enable = "zba")]
+unsafe extern "C" fn sh1add() {
+    naked_asm!("sh1add a0, a1, a2", "ret");
+}
+
+#[cfg(target_arch = "s390x")]
+mod s390x {
+    use super::*;
+
+    // s390x: vector:
+    // s390x: vavglg
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector")]
+    unsafe extern "C" fn vector() {
+        naked_asm!("vavglg  %v0, %v0, %v0")
+    }
+
+    // s390x: vector_enhancements_1:
+    // s390x: vfcesbs
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector-enhancements-1")]
+    unsafe extern "C" fn vector_enhancements_1() {
+        naked_asm!("vfcesbs %v0, %v0, %v0")
+    }
+
+    // s390x: vector_enhancements_2:
+    // s390x: vclfp
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector-enhancements-2")]
+    unsafe extern "C" fn vector_enhancements_2() {
+        naked_asm!("vclfp   %v0, %v0, 0, 0, 0")
+    }
+
+    // s390x: vector_packed_decimal:
+    // s390x: vlrlr
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector-packed-decimal")]
+    unsafe extern "C" fn vector_packed_decimal() {
+        naked_asm!("vlrlr   %v24, %r3, 0(%r2)", "br      %r14")
+    }
+
+    // s390x: vector_packed_decimal_enhancement:
+    // s390x: vcvbg
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector-packed-decimal-enhancement")]
+    unsafe extern "C" fn vector_packed_decimal_enhancement() {
+        naked_asm!("vcvbg   %r0, %v0, 0, 1")
+    }
+
+    // s390x: vector_packed_decimal_enhancement_2:
+    // s390x: vupkzl
+    #[no_mangle]
+    #[naked]
+    #[target_feature(enable = "vector-packed-decimal-enhancement-2")]
+    unsafe extern "C" fn vector_packed_decimal_enhancement_2() {
+        naked_asm!("vupkzl  %v0, %v0, 0")
+    }
+}
+
+// powerpc64: altivec:
+// powerpc64: vsumsws
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "powerpc64")]
+#[target_feature(enable = "altivec")]
+unsafe extern "C" fn altivec() {
+    naked_asm!("vsumsws   %v0, %v1, %v2", "blr")
+}
+
+// loongarch64: lasx:
+// loongarch64: xvadd.b
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "loongarch64")]
+#[target_feature(enable = "lasx")]
+unsafe extern "C" fn lasx() {
+    naked_asm!("xvadd.b  $xr0, $xr0, $xr1", "ret")
+}
+
+// wasm32: simd128:
+// wasm32: i8x16.shuffle
+#[no_mangle]
+#[naked]
+#[cfg(target_arch = "wasm32")]
+#[target_feature(enable = "simd128")]
+unsafe extern "C" fn simd128() {
+    naked_asm!("i8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15", "return");
+}

From 9f3d4c696ace3223f3f519ef8aabec9dc36407e3 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 12:50:59 +0100
Subject: [PATCH 02/12] properly `push` and `pop` on riscv and mips

---
 compiler/rustc_codegen_ssa/src/mir/naked_asm.rs | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 0d7c5652b5ac3..d2d9b22a6a1d0 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -194,21 +194,23 @@ fn enable_disable_target_features<'tcx>(
         Architecture::Riscv32 | Architecture::Riscv64 => {
             // https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch
 
+            writeln!(begin, ".option push").unwrap();
             for feature in features {
                 writeln!(begin, ".option arch, +{}", feature.name).unwrap();
-
-                writeln!(end, ".option arch, -{}", feature.name).unwrap();
             }
+
+            writeln!(end, ".option pop").unwrap();
         }
         Architecture::Mips | Architecture::Mips64 | Architecture::Mips64_N32 => {
             // https://sourceware.org/binutils/docs/as/MIPS-ISA.html
             // https://sourceware.org/binutils/docs/as/MIPS-ASE-Instruction-Generation-Overrides.html
 
+            writeln!(begin, ".set push").unwrap();
             for feature in features {
                 writeln!(begin, ".set {}", feature.name).unwrap();
-
-                writeln!(end, ".set no{}", feature.name).unwrap();
             }
+
+            writeln!(end, ".set pop").unwrap();
         }
 
         Architecture::S390x => {

From 14acaf1d3eb38b6dd7e8ebcf7859b380c580a41a Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 12:51:50 +0100
Subject: [PATCH 03/12] ignore target features for wasm32 and loongarch

---
 compiler/rustc_codegen_ssa/src/mir/naked_asm.rs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index d2d9b22a6a1d0..1608f224c20cc 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -285,13 +285,16 @@ fn enable_disable_target_features<'tcx>(
             // LLVM does not appear to accept any directive to enable target features
             //
             // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/WebAssembly/AsmParser/WebAssemblyAsmParser.cpp#L909
-            return None;
+
+            /* fallthrough */
         }
 
         Architecture::LoongArch64 => {
             // LLVM does not appear to accept any directive to enable target features
             //
             // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp#L1918
+
+            /* fallthrough */
         }
 
         // FIXME: support naked_asm! on more architectures

From 4626bcc648a60961f0d84b8b8f1c37f2ce374403 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 12:52:02 +0100
Subject: [PATCH 04/12] group I386 with x86

---
 compiler/rustc_codegen_ssa/src/mir/naked_asm.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 1608f224c20cc..66b5e64eea155 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -180,7 +180,9 @@ fn enable_disable_target_features<'tcx>(
     let features = attrs.target_features.iter().filter(|attr| !attr.implied);
 
     match architecture {
-        Architecture::X86_64 | Architecture::X86_64_X32 => { /* do nothing */ }
+        Architecture::X86_64 | Architecture::X86_64_X32 | Architecture::I386 => {
+            // no action is needed, all instructions are accepted regardless of target feature
+        }
 
         Architecture::Aarch64 | Architecture::Aarch64_Ilp32 | Architecture::Arm => {
             // https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
@@ -303,7 +305,6 @@ fn enable_disable_target_features<'tcx>(
         Architecture::Csky => return None,
         Architecture::E2K32 => return None,
         Architecture::E2K64 => return None,
-        Architecture::I386 => return None,
         Architecture::Hexagon => return None,
         Architecture::Msp430 => return None,
         Architecture::Sbf => return None,

From 6eaad40070cc1668c8a602dd7c4b37b837f486f3 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 13:24:24 +0100
Subject: [PATCH 05/12] use non-default powerpc feature

---
 tests/assembly/naked-fn-target-feature.rs | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/tests/assembly/naked-fn-target-feature.rs b/tests/assembly/naked-fn-target-feature.rs
index fc42bbea5c7b2..b98dbb23d4482 100644
--- a/tests/assembly/naked-fn-target-feature.rs
+++ b/tests/assembly/naked-fn-target-feature.rs
@@ -142,14 +142,14 @@ mod s390x {
     }
 }
 
-// powerpc64: altivec:
-// powerpc64: vsumsws
+// powerpc64: power10_vector:
+// powerpc64: xxpermx
 #[no_mangle]
 #[naked]
 #[cfg(target_arch = "powerpc64")]
-#[target_feature(enable = "altivec")]
-unsafe extern "C" fn altivec() {
-    naked_asm!("vsumsws   %v0, %v1, %v2", "blr")
+#[target_feature(enable = "power10-vector")]
+unsafe extern "C" fn power10_vector() {
+    naked_asm!("xxpermx 34, 0, 1, 2, 0", "blr")
 }
 
 // loongarch64: lasx:

From d02b18ad04b3cb6b849e9e3153e3b8a109d4b68b Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 14:21:39 +0100
Subject: [PATCH 06/12] don't unset target features for aarch64

---
 compiler/rustc_codegen_ssa/src/mir/naked_asm.rs | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 66b5e64eea155..3135d15b05ccf 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -190,7 +190,11 @@ fn enable_disable_target_features<'tcx>(
             for feature in features {
                 writeln!(begin, ".arch_extension {}", feature.name).unwrap();
 
-                writeln!(end, ".arch_extension no{}", feature.name).unwrap();
+                // aarch does not have the push/pop mechanism like riscv below.
+                //
+                // > The .arch_extension directive is effective until the end of the assembly block and is not propagated to subsequent code
+                //
+                // https://github.com/taiki-e/portable-atomic/blob/75a36c33b38c4c68f4095e95f106cfbedce9a914/src/imp/atomic128/aarch64.rs#L330
             }
         }
         Architecture::Riscv32 | Architecture::Riscv64 => {

From 9e7dae791dc37c393a02725bcaea56a9beab4b03 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 14:21:49 +0100
Subject: [PATCH 07/12] more comments

---
 .../rustc_codegen_ssa/src/mir/naked_asm.rs    | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 3135d15b05ccf..c9e14713b41e3 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -154,7 +154,7 @@ fn parse_architecture(
         "csky" => (Architecture::Csky, None),
         "arm64ec" => (Architecture::Aarch64, Some(SubArchitecture::Arm64EC)),
 
-        // added here
+        // These architecutres are added here, and not present in `create_object_file`
         "wasm32" => (Architecture::Wasm32, None),
         "wasm64" => (Architecture::Wasm64, None),
         "m68k" => (Architecture::M68k, None),
@@ -243,10 +243,10 @@ fn enable_disable_target_features<'tcx>(
             if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() {
                 writeln!(begin, ".machine arch{minimum_isa}").unwrap();
 
-                // NOTE: LLVM does not support `.machine push` and `.machine pop`, so we rely on these
+                // NOTE: LLVM does not currently support `.machine push` and `.machine pop`, so we rely on these
                 // target features only being applied to this ASM block (LLVM clears them for the next)
                 //
-                // https://github.com/llvm/llvm-project/blob/74306afe87b85cb9b5734044eb6c74b8290098b3/llvm/lib/Target/SystemZ/AsmParser/SystemZAsmParser.cpp#L1362
+                // https://github.com/llvm/llvm-project/issues/129053
             }
         }
         Architecture::PowerPc | Architecture::PowerPc64 => {
@@ -283,8 +283,18 @@ fn enable_disable_target_features<'tcx>(
         Architecture::M68k => {
             // https://sourceware.org/binutils/docs/as/M68K_002dDirectives.html#index-directives_002c-M680x0
 
-            // FIXME support m64k
-            // return None;
+            // M68k suports the .cpu and .arch directives, but they both can only be applied once
+            //
+            // > If it is given multiple times, or in conjunction with the -march option,
+            // > all uses must be for the same architecture and extension set.
+            //
+            // That is not flexible enough for us, because different functions might want different
+            // features.
+            //
+            // So far, we've not found any cases where ignoring the target features causes issues,
+            // so that's what we do for now.
+
+            /* fallthrough */
         }
 
         Architecture::Wasm32 | Architecture::Wasm64 => {

From 9fc124212797da20245d83a23f3956cd6fd7478d Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 15:05:42 +0100
Subject: [PATCH 08/12] only enable/disable aarch64 flags that are not already
 globally enabled

---
 .../rustc_codegen_ssa/src/mir/naked_asm.rs    | 12 ++++----
 .../naked-fn-aarch64-global-target-feature.rs | 29 +++++++++++++++++++
 2 files changed, 35 insertions(+), 6 deletions(-)
 create mode 100644 tests/assembly/naked-fn-aarch64-global-target-feature.rs

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index c9e14713b41e3..768cbc47e66c6 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -188,13 +188,13 @@ fn enable_disable_target_features<'tcx>(
             // https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
 
             for feature in features {
-                writeln!(begin, ".arch_extension {}", feature.name).unwrap();
+                // only enable/disable a feature if it is not already globally enabled.
+                // so that we don't infuence subsequent asm blocks
+                if !tcx.sess.unstable_target_features.contains(&feature.name) {
+                    writeln!(begin, ".arch_extension {}", feature.name).unwrap();
 
-                // aarch does not have the push/pop mechanism like riscv below.
-                //
-                // > The .arch_extension directive is effective until the end of the assembly block and is not propagated to subsequent code
-                //
-                // https://github.com/taiki-e/portable-atomic/blob/75a36c33b38c4c68f4095e95f106cfbedce9a914/src/imp/atomic128/aarch64.rs#L330
+                    writeln!(end, ".arch_extension no{}", feature.name).unwrap();
+                }
             }
         }
         Architecture::Riscv32 | Architecture::Riscv64 => {
diff --git a/tests/assembly/naked-fn-aarch64-global-target-feature.rs b/tests/assembly/naked-fn-aarch64-global-target-feature.rs
new file mode 100644
index 0000000000000..da18278ce5a2e
--- /dev/null
+++ b/tests/assembly/naked-fn-aarch64-global-target-feature.rs
@@ -0,0 +1,29 @@
+//@ add-core-stubs
+//@ assembly-output: emit-asm
+//@ compile-flags: --target aarch64-unknown-linux-gnu -Ctarget-feature=+lse
+//@ needs-llvm-components: aarch64
+
+#![crate_type = "lib"]
+#![feature(no_core, naked_functions)]
+#![no_core]
+
+extern crate minicore;
+use minicore::*;
+
+// check that a naked function using target features does not disable these features for subsequent
+// asm blocks.
+
+// CHECK-LABEL: a:
+#[no_mangle]
+#[naked]
+#[target_feature(enable = "lse")]
+unsafe extern "C" fn a() {
+    naked_asm!("casp x2, x3, x2, x3, [x1]")
+}
+
+// CHECK-LABEL: b:
+#[no_mangle]
+#[naked]
+unsafe extern "C" fn b() {
+    naked_asm!("casp x2, x3, x2, x3, [x1]")
+}

From 5e370a261b73daabc3a29799a7395e4ecaf34031 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 15:20:03 +0100
Subject: [PATCH 09/12] use an aarch64 target feature that apple does not
 enable by default

---
 .../naked-fn-aarch64-global-target-feature.rs |  4 ++--
 tests/assembly/naked-fn-target-feature.rs     | 22 +++++++++++--------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/tests/assembly/naked-fn-aarch64-global-target-feature.rs b/tests/assembly/naked-fn-aarch64-global-target-feature.rs
index da18278ce5a2e..a3a467ab48a80 100644
--- a/tests/assembly/naked-fn-aarch64-global-target-feature.rs
+++ b/tests/assembly/naked-fn-aarch64-global-target-feature.rs
@@ -1,7 +1,7 @@
 //@ add-core-stubs
 //@ assembly-output: emit-asm
-//@ compile-flags: --target aarch64-unknown-linux-gnu -Ctarget-feature=+lse
-//@ needs-llvm-components: aarch64
+//@ only-aarch64
+//@ compile-flags: -Ctarget-feature=+lse
 
 #![crate_type = "lib"]
 #![feature(no_core, naked_functions)]
diff --git a/tests/assembly/naked-fn-target-feature.rs b/tests/assembly/naked-fn-target-feature.rs
index b98dbb23d4482..fece411cf37d9 100644
--- a/tests/assembly/naked-fn-target-feature.rs
+++ b/tests/assembly/naked-fn-target-feature.rs
@@ -59,18 +59,22 @@ unsafe extern "C" fn vpclmulqdq() {
     naked_asm!("vpclmulqdq zmm1, zmm2, zmm3, 4")
 }
 
-// aarch64-elf-LABEL: aes:
-// aarch64-elf: aese
-// aarch64-macho-LABEL: aes:
-// aarch64-macho: aese
-// aarch64-coff-LABEL: aes:
-// aarch64-coff: aese
+// i8mm is not enabled by default
+//
+// note that aarch64-apple-darwin enables more features than aarch64-unknown-linux-gnu
+//
+// aarch64-elf-LABEL: i8mm:
+// aarch64-elf: usdot
+// aarch64-macho-LABEL: i8mm:
+// aarch64-macho: usdot
+// aarch64-coff-LABEL: i8mm:
+// aarch64-coff: usdot
 #[no_mangle]
 #[naked]
 #[cfg(target_arch = "aarch64")]
-#[target_feature(enable = "aes")]
-unsafe extern "C" fn aes() {
-    naked_asm!("aese.16b v0, v1")
+#[target_feature(enable = "i8mm")]
+unsafe extern "C" fn i8mm() {
+    naked_asm!("usdot   v0.4s, v1.16b, v2.4b[3]")
 }
 
 // riscv64: sh1add:

From 7e763e9846b46dd7fceeabdd770402b9cc9627c5 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 15:54:25 +0100
Subject: [PATCH 10/12] check that target features don't leak out of a naked
 function

---
 ...unctions-no-leak-target-feature-aarch64.rs | 27 +++++++++++++++++++
 ...ions-no-leak-target-feature-aarch64.stderr | 18 +++++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.rs
 create mode 100644 tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.stderr

diff --git a/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.rs b/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.rs
new file mode 100644
index 0000000000000..d401c30c15246
--- /dev/null
+++ b/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.rs
@@ -0,0 +1,27 @@
+//@ add-core-stubs
+//@ compile-flags: --target aarch64-unknown-linux-gnu
+//@ build-fail
+//@ needs-llvm-components: arm
+
+#![crate_type = "lib"]
+#![feature(no_core, naked_functions)]
+#![no_core]
+
+extern crate minicore;
+use minicore::*;
+
+// check that a naked function using target features does not keep these features enabled
+// for subsequent asm blocks.
+
+#[no_mangle]
+#[naked]
+#[target_feature(enable = "i8mm")]
+unsafe extern "C" fn a() {
+    naked_asm!("usdot   v0.4s, v1.16b, v2.4b[3]")
+}
+
+#[no_mangle]
+#[naked]
+unsafe extern "C" fn b() {
+    naked_asm!("usdot   v0.4s, v1.16b, v2.4b[3]")
+}
diff --git a/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.stderr b/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.stderr
new file mode 100644
index 0000000000000..3778dff4a70f5
--- /dev/null
+++ b/tests/ui/asm/naked-functions-no-leak-target-feature-aarch64.stderr
@@ -0,0 +1,18 @@
+error: instruction requires: i8mm
+   |
+note: instantiated into assembly here
+  --> <inline asm>:16:1
+   |
+LL | usdot   v0.4s, v1.16b, v2.4b[3]
+   | ^
+
+error: instruction requires: i8mm
+   |
+note: instantiated into assembly here
+  --> <inline asm>:16:1
+   |
+LL | usdot   v0.4s, v1.16b, v2.4b[3]
+   | ^
+
+error: aborting due to 2 previous errors
+

From 1ce4d18b80f48fc32ecdb07cab15d5d9dc225ce5 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Thu, 27 Feb 2025 16:18:20 +0100
Subject: [PATCH 11/12] attempt to not let features escape for s390x

---
 .../rustc_codegen_ssa/src/mir/naked_asm.rs    | 26 ++++++++++++++----
 ...-functions-no-leak-target-feature-s390x.rs | 27 +++++++++++++++++++
 ...ctions-no-leak-target-feature-s390x.stderr | 18 +++++++++++++
 3 files changed, 66 insertions(+), 5 deletions(-)
 create mode 100644 tests/ui/asm/naked-functions-no-leak-target-feature-s390x.rs
 create mode 100644 tests/ui/asm/naked-functions-no-leak-target-feature-s390x.stderr

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index 768cbc47e66c6..f436cd4b19a92 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -223,7 +223,7 @@ fn enable_disable_target_features<'tcx>(
             // https://sourceware.org/binutils/docs/as/s390-Directives.html
 
             // based on src/llvm-project/llvm/lib/Target/SystemZ/SystemZFeatures.td
-            let isa_revision_for_feature = |feature: &TargetFeature| match feature.name.as_str() {
+            let isa_revision_for_feature_name = |feature_name| match feature_name {
                 "backchain" => None, // does not define any instructions
                 "deflate-conversion" => Some(13),
                 "enhanced-sort" => Some(13),
@@ -240,13 +240,29 @@ fn enable_disable_target_features<'tcx>(
                 _ => None,
             };
 
-            if let Some(minimum_isa) = features.filter_map(isa_revision_for_feature).max() {
+            let target_feature_isa = features
+                .filter_map(|feature| isa_revision_for_feature_name(feature.name.as_str()))
+                .max();
+
+            if let Some(minimum_isa) = target_feature_isa {
                 writeln!(begin, ".machine arch{minimum_isa}").unwrap();
 
-                // NOTE: LLVM does not currently support `.machine push` and `.machine pop`, so we rely on these
-                // target features only being applied to this ASM block (LLVM clears them for the next)
+                // NOTE: LLVM does not currently support `.machine push` and `.machine pop`
+                // this is tracked in https://github.com/llvm/llvm-project/issues/129053.
+                //
+                // So instead we have to try revert to the previous state manually.
                 //
-                // https://github.com/llvm/llvm-project/issues/129053
+                // However, this may still be observable if the user explicitly set the machine to
+                // a higher value using global assembly.
+                let global_isa = tcx
+                    .sess
+                    .unstable_target_features
+                    .iter()
+                    .filter_map(|feature| isa_revision_for_feature_name(feature.as_str()))
+                    .max()
+                    .unwrap_or(10);
+
+                writeln!(end, ".machine arch{global_isa}").unwrap();
             }
         }
         Architecture::PowerPc | Architecture::PowerPc64 => {
diff --git a/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.rs b/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.rs
new file mode 100644
index 0000000000000..0a7debab93d71
--- /dev/null
+++ b/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.rs
@@ -0,0 +1,27 @@
+//@ add-core-stubs
+//@ compile-flags: --target s390x-unknown-linux-gnu
+//@ build-fail
+//@ needs-llvm-components: systemz
+
+#![crate_type = "lib"]
+#![feature(no_core, naked_functions, s390x_target_feature)]
+#![no_core]
+
+extern crate minicore;
+use minicore::*;
+
+// check that a naked function using target features does not keep these features enabled
+// for subsequent asm blocks.
+
+#[no_mangle]
+#[naked]
+#[target_feature(enable = "vector-packed-decimal")]
+unsafe extern "C" fn a() {
+    naked_asm!("vlrlr   %v24, %r3, 0(%r2)")
+}
+
+#[no_mangle]
+#[naked]
+unsafe extern "C" fn b() {
+    naked_asm!("vlrlr   %v24, %r3, 0(%r2)")
+}
diff --git a/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.stderr b/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.stderr
new file mode 100644
index 0000000000000..17462632e4f90
--- /dev/null
+++ b/tests/ui/asm/naked-functions-no-leak-target-feature-s390x.stderr
@@ -0,0 +1,18 @@
+error: instruction requires: vector-packed-decimal
+   |
+note: instantiated into assembly here
+  --> <inline asm>:16:1
+   |
+LL | vlrlr   %v24, %r3, 0(%r2)
+   | ^
+
+error: instruction requires: vector-packed-decimal
+   |
+note: instantiated into assembly here
+  --> <inline asm>:16:1
+   |
+LL | vlrlr   %v24, %r3, 0(%r2)
+   | ^
+
+error: aborting due to 2 previous errors
+

From aea36f431ee578a08ca55f109a30ccd212de7bf9 Mon Sep 17 00:00:00 2001
From: Folkert de Vries <folkert@folkertdev.nl>
Date: Fri, 28 Feb 2025 18:18:24 +0100
Subject: [PATCH 12/12] skip arm for now (it is a mess)

some incomplete code is here https://gist.github.com/folkertdev/fe99874c466e598d0fb2dadf13b91b6f
---
 compiler/rustc_codegen_ssa/src/mir/naked_asm.rs | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
index f436cd4b19a92..17b6e5ce3d73d 100644
--- a/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
+++ b/compiler/rustc_codegen_ssa/src/mir/naked_asm.rs
@@ -184,7 +184,7 @@ fn enable_disable_target_features<'tcx>(
             // no action is needed, all instructions are accepted regardless of target feature
         }
 
-        Architecture::Aarch64 | Architecture::Aarch64_Ilp32 | Architecture::Arm => {
+        Architecture::Aarch64 | Architecture::Aarch64_Ilp32 => {
             // https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
 
             for feature in features {
@@ -197,6 +197,17 @@ fn enable_disable_target_features<'tcx>(
                 }
             }
         }
+        Architecture::Arm => {
+            // https://developer.arm.com/documentation/100067/0611/armclang-Integrated-Assembler/AArch32-Target-selection-directives?lang=en
+
+            // FIXME: implement the target feature handling. Contrary to most other targets, there
+            // is no convenient push/pop mechanism. That means we have to manually restore the state
+            // of the target features to the state on entry. This is complicated, given how arm
+            // features interact. There is some incomplete code here https://gist.github.com/folkertdev/fe99874c466e598d0fb2dadf13b91b6f
+
+            /* fallthrough */
+        }
+
         Architecture::Riscv32 | Architecture::Riscv64 => {
             // https://github.com/riscv-non-isa/riscv-asm-manual/blob/ad0de8c004e29c9a7ac33cfd054f4d4f9392f2fb/src/asm-manual.adoc#arch