diff --git a/compiler/rustc_codegen_ssa/src/mir/block.rs b/compiler/rustc_codegen_ssa/src/mir/block.rs index 4859380daa662..0d42ccc1a73c9 100644 --- a/compiler/rustc_codegen_ssa/src/mir/block.rs +++ b/compiler/rustc_codegen_ssa/src/mir/block.rs @@ -1146,19 +1146,51 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { (args, None) }; + // Special logic for tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments. + // + // Normally an indirect argument with `on_stack: false` would be passed as a pointer into + // the caller's stack frame. For tail calls, that would be unsound, because the caller's + // stack frame is overwritten by the callee's stack frame. + // + // Therefore we store the argument for the callee in the corresponding caller's slot. + // Because guaranteed tail calls demand that the caller's signature matches the callee's, + // the corresponding slot has the correct type. + // + // To handle cases like the one below, the tail call arguments must first be copied to a + // temporary, and only then copied to the caller's argument slots. + // + // ``` + // // A struct big enough that it is not passed via registers. + // pub struct Big([u64; 4]); + // + // fn swapper(a: Big, b: Big) -> (Big, Big) { + // become swapper_helper(b, a); + // } + // ``` + let mut tail_call_temporaries = vec![]; + if kind == CallKind::Tail { + tail_call_temporaries = vec![None; first_args.len()]; + // Copy the arguments that use `PassMode::Indirect { on_stack: false , ..}` + // to temporary stack allocations. See the comment above. + for (i, arg) in first_args.iter().enumerate() { + if !matches!(fn_abi.args[i].mode, PassMode::Indirect { on_stack: false, .. }) { + continue; + } + + let op = self.codegen_operand(bx, &arg.node); + let tmp = PlaceRef::alloca(bx, op.layout); + bx.lifetime_start(tmp.val.llval, tmp.layout.size); + op.store_with_annotation(bx, tmp); + + tail_call_temporaries[i] = Some(tmp); + } + } + // When generating arguments we sometimes introduce temporary allocations with lifetime // that extend for the duration of a call. Keep track of those allocations and their sizes // to generate `lifetime_end` when the call returns. let mut lifetime_ends_after_call: Vec<(Bx::Value, Size)> = Vec::new(); 'make_args: for (i, arg) in first_args.iter().enumerate() { - if kind == CallKind::Tail && matches!(fn_abi.args[i].mode, PassMode::Indirect { .. }) { - // FIXME: https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841 - span_bug!( - fn_span, - "arguments using PassMode::Indirect are currently not supported for tail calls" - ); - } - let mut op = self.codegen_operand(bx, &arg.node); if let (0, Some(ty::InstanceKind::Virtual(_, idx))) = (i, instance.map(|i| i.def)) { @@ -1209,18 +1241,72 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> { } } - // The callee needs to own the argument memory if we pass it - // by-ref, so make a local copy of non-immediate constants. - match (&arg.node, op.val) { - (&mir::Operand::Copy(_), Ref(PlaceValue { llextra: None, .. })) - | (&mir::Operand::Constant(_), Ref(PlaceValue { llextra: None, .. })) => { - let tmp = PlaceRef::alloca(bx, op.layout); - bx.lifetime_start(tmp.val.llval, tmp.layout.size); - op.store_with_annotation(bx, tmp); - op.val = Ref(tmp.val); - lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size)); + match kind { + CallKind::Normal => { + // The callee needs to own the argument memory if we pass it + // by-ref, so make a local copy of non-immediate constants. + if let &mir::Operand::Copy(_) | &mir::Operand::Constant(_) = &arg.node + && let Ref(PlaceValue { llextra: None, .. }) = op.val + { + let tmp = PlaceRef::alloca(bx, op.layout); + bx.lifetime_start(tmp.val.llval, tmp.layout.size); + op.store_with_annotation(bx, tmp); + op.val = Ref(tmp.val); + lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size)); + } + } + CallKind::Tail => { + match fn_abi.args[i].mode { + PassMode::Indirect { on_stack: false, .. } => { + let Some(tmp) = tail_call_temporaries[i].take() else { + span_bug!( + fn_span, + "missing temporary for indirect tail call argument #{i}" + ) + }; + + let local = self.mir.args_iter().nth(i).unwrap(); + + match &self.locals[local] { + LocalRef::Place(arg) => { + bx.typed_place_copy(arg.val, tmp.val, fn_abi.args[i].layout); + op.val = Ref(arg.val); + } + LocalRef::Operand(arg) => { + let Ref(place_value) = arg.val else { + bug!("only `Ref` should use `PassMode::Indirect`"); + }; + bx.typed_place_copy( + place_value, + tmp.val, + fn_abi.args[i].layout, + ); + op.val = arg.val; + } + LocalRef::UnsizedPlace(_) => { + span_bug!(fn_span, "unsized types are not supported") + } + LocalRef::PendingOperand => { + span_bug!(fn_span, "argument local should not be pending") + } + }; + + bx.lifetime_end(tmp.val.llval, tmp.layout.size); + } + PassMode::Indirect { on_stack: true, .. } => { + // FIXME: some LLVM backends (notably x86) do not correctly pass byval + // arguments to tail calls (as of LLVM 21). See also: + // + // - https://github.com/rust-lang/rust/pull/144232#discussion_r2218543841 + // - https://github.com/rust-lang/rust/issues/144855 + span_bug!( + fn_span, + "arguments using PassMode::Indirect {{ on_stack: true, .. }} are currently not supported for tail calls" + ) + } + _ => (), + } } - _ => {} } self.codegen_argument( diff --git a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs index 1b3e01f3a3809..ad94c23f229c4 100644 --- a/compiler/rustc_mir_transform/src/deduce_param_attrs.rs +++ b/compiler/rustc_mir_transform/src/deduce_param_attrs.rs @@ -121,17 +121,29 @@ impl<'tcx> Visitor<'tcx> for DeduceParamAttrs { // `f` passes. Note that function arguments are the only situation in which this problem can // arise: every other use of `move` in MIR doesn't actually write to the value it moves // from. - if let TerminatorKind::Call { ref args, .. } = terminator.kind { - for arg in args { - if let Operand::Move(place) = arg.node - && !place.is_indirect_first_projection() - && let Some(i) = self.as_param(place.local) - { - self.usage[i] |= UsageSummary::MUTATE; - self.usage[i] |= UsageSummary::CAPTURE; + match terminator.kind { + TerminatorKind::Call { ref args, .. } => { + for arg in args { + if let Operand::Move(place) = arg.node + && !place.is_indirect_first_projection() + && let Some(i) = self.as_param(place.local) + { + self.usage[i] |= UsageSummary::MUTATE; + self.usage[i] |= UsageSummary::CAPTURE; + } } } - }; + + // Like a call, but more conservative because the backend may introduce writes to an + // argument if the argument is passed as `PassMode::Indirect { on_stack: false, ... }`. + TerminatorKind::TailCall { .. } => { + for usage in self.usage.iter_mut() { + *usage |= UsageSummary::MUTATE; + *usage |= UsageSummary::CAPTURE; + } + } + _ => {} + } self.super_terminator(terminator, location); } diff --git a/tests/assembly-llvm/tail-call-indirect.rs b/tests/assembly-llvm/tail-call-indirect.rs new file mode 100644 index 0000000000000..2bc1743a9bafd --- /dev/null +++ b/tests/assembly-llvm/tail-call-indirect.rs @@ -0,0 +1,76 @@ +//@ add-minicore +//@ min-llvm-version: 22 +//@ assembly-output: emit-asm +//@ needs-llvm-components: x86 +//@ compile-flags: --target=x86_64-unknown-linux-gnu +//@ compile-flags: -Copt-level=3 -C llvm-args=-x86-asm-syntax=intel + +#![feature(no_core, explicit_tail_calls)] +#![expect(incomplete_features)] +#![no_core] +#![crate_type = "lib"] + +// Test tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments. +// +// Normally an indirect argument with `on_stack: false` would be passed as a pointer to the +// caller's stack frame. For tail calls, that would be unsound, because the caller's stack +// frame is overwritten by the callee's stack frame. +// +// The solution is to write the argument into the caller's argument place (stored somewhere further +// up the stack), and forward that place. + +extern crate minicore; +use minicore::*; + +#[repr(C)] +struct S { + x: u64, + y: u64, + z: u64, +} + +unsafe extern "C" { + safe fn force_usage(_: u64, _: u64, _: u64) -> u64; +} + +// CHECK-LABEL: callee: +// CHECK-NEXT: .cfi_startproc +// +// CHECK-NEXT: mov rax, qword ptr [rdi] +// CHECK-NEXT: mov rsi, qword ptr [rdi + 8] +// CHECK-NEXT: mov rdx, qword ptr [rdi + 16] +// CHECK-NEXT: mov rdi, rax +// +// CHECK-NEXT: jmp qword ptr [rip + force_usage@GOTPCREL] +#[inline(never)] +#[unsafe(no_mangle)] +fn callee(s: S) -> u64 { + force_usage(s.x, s.y, s.z) +} + +// CHECK-LABEL: caller1: +// CHECK-NEXT: .cfi_startproc +// +// Just forward the argument: +// +// CHECK-NEXT: jmp qword ptr [rip + callee@GOTPCREL] +#[unsafe(no_mangle)] +fn caller1(s: S) -> u64 { + become callee(s); +} + +// CHECK-LABEL: caller2: +// CHECK-NEXT: .cfi_startproc +// +// Construct the S value directly into the argument slot: +// +// CHECK-NEXT: mov qword ptr [rdi], 1 +// CHECK-NEXT: mov qword ptr [rdi + 8], 2 +// CHECK-NEXT: mov qword ptr [rdi + 16], 3 +// +// CHECK-NEXT: jmp qword ptr [rip + callee@GOTPCREL] +#[unsafe(no_mangle)] +fn caller2(_: S) -> u64 { + let s = S { x: 1, y: 2, z: 3 }; + become callee(s); +} diff --git a/tests/codegen-llvm/tail-call-u128.rs b/tests/codegen-llvm/tail-call-u128.rs new file mode 100644 index 0000000000000..25dfc47a2bfec --- /dev/null +++ b/tests/codegen-llvm/tail-call-u128.rs @@ -0,0 +1,41 @@ +//@ add-minicore +//@ revisions: win linux +//@ min-llvm-version: 22 +// +//@ compile-flags: -Copt-level=3 +//@[linux] compile-flags: --target x86_64-unknown-linux-gnu +//@[linux] needs-llvm-components: x86 +//@[win] compile-flags: --target x86_64-pc-windows-msvc +//@[win] needs-llvm-components: x86 + +#![crate_type = "lib"] +#![feature(no_core, lang_items, explicit_tail_calls)] +#![allow(incomplete_features)] +#![no_core] + +// Test passing of i128/u128, which is passed directly on x86, but indirectly on win64. + +extern crate minicore; +use minicore::*; + +// linux: define noundef i128 @foo(i128 noundef %a, i128 noundef %b) +// win: define <16 x i8> @foo(ptr {{.*}} %a, ptr {{.*}} %b) +#[unsafe(no_mangle)] +#[inline(never)] +extern "C" fn foo(a: u128, b: u128) -> u128 { + // linux: start: + // linux-NEXT: musttail call noundef i128 @bar(i128 noundef %b, i128 noundef %a) + // + // + // win: start: + // win-NEXT: %0 = load i128, ptr %b + // win-NEXT: %1 = load i128, ptr %a + // win-NEXT: store i128 %0, ptr %a + // win-NEXT: store i128 %1, ptr %b + // win-NEXT: musttail call <16 x i8> @bar(ptr {{.*}} %a, ptr {{.*}} %b) + become bar(b, a) +} + +unsafe extern "C" { + safe fn bar(a: u128, b: u128) -> u128; +} diff --git a/tests/crashes/144293-indirect-ops-llvm.rs b/tests/crashes/144293-indirect-ops-llvm.rs deleted file mode 100644 index 490a0116d7d21..0000000000000 --- a/tests/crashes/144293-indirect-ops-llvm.rs +++ /dev/null @@ -1,42 +0,0 @@ -//@ known-bug: #144293 -// Same as recursion-etc but eggs LLVM emission into giving indirect arguments. -#![expect(incomplete_features)] -#![feature(explicit_tail_calls)] - -use std::hint::black_box; - -struct U64Wrapper { - pub x: u64, - pub arbitrary: String, -} - -fn count(curr: U64Wrapper, top: U64Wrapper) -> U64Wrapper { - if black_box(curr.x) >= top.x { - curr - } else { - become count( - U64Wrapper { - x: curr.x + 1, - arbitrary: curr.arbitrary, - }, - top, - ) - } -} - -fn main() { - println!( - "{}", - count( - U64Wrapper { - x: 0, - arbitrary: "hello!".into() - }, - black_box(U64Wrapper { - x: 1000000, - arbitrary: "goodbye!".into() - }) - ) - .x - ); -} diff --git a/tests/ui/explicit-tail-calls/indirect.rs b/tests/ui/explicit-tail-calls/indirect.rs new file mode 100644 index 0000000000000..537976b7f6f43 --- /dev/null +++ b/tests/ui/explicit-tail-calls/indirect.rs @@ -0,0 +1,68 @@ +//@ run-pass +//@ ignore-backends: gcc +// +//@ ignore-wasm +//@ ignore-riscv64 +#![feature(explicit_tail_calls)] +#![expect(incomplete_features)] + +// Test tail calls with `PassMode::Indirect { on_stack: false, .. }` arguments. +// +// Normally an indirect argument with `on_stack: false` would be passed as a pointer to the +// caller's stack frame. For tail calls, that would be unsound, because the caller's stack +// frame is overwritten by the callee's stack frame. +// +// The solution is to write the argument into the caller's argument place (stored somewhere further +// up the stack), and forward that place. + +// A struct big enough that it is not passed via registers, so that the rust calling convention uses +// `Indirect { on_stack: false, .. }`. +#[repr(C)] +#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)] +pub struct Big([u64; 4]); + +#[inline(never)] +fn update_in_caller(y: Big) -> u64 { + #[inline(never)] + fn helper(x: Big) -> u64 { + x.0.iter().sum() + } + + let x = Big([y.0[0], 2, 3, 4]); + + // `x` is actually stored in `y`'s space. + become helper(x) +} + +#[inline(never)] +fn swapper(a: T, b: T) -> (T, T) { + #[inline(never)] + fn helper(a: T, b: T) -> (T, T) { + (a, b) + } + + become helper(b, a) +} + +#[inline(never)] +fn swapper_derived(a: Big, _: (u64, u64), b: Big, _: (u64, u64)) -> ((u64, u64), (u64, u64)) { + #[inline(never)] + fn helper(_: Big, x: (u64, u64), _: Big, y: (u64, u64)) -> ((u64, u64), (u64, u64)) { + (x, y) + } + + // Read the values at various points in the swapping process, testing that they have the correct + // value at every point. + become helper(b, (a.0[0], b.0[0]), a, (a.0[0], b.0[0])); +} + +fn main() { + assert_eq!(update_in_caller(Big::default()), 0 + 2 + 3 + 4); + + assert_eq!(swapper(u8::MIN, u8::MAX), (u8::MAX, u8::MIN)); + // i128 uses `PassMode::Indirect { on_stack: false, .. }` on x86_64 MSVC. + assert_eq!(swapper(i128::MIN, i128::MAX), (i128::MAX, i128::MIN)); + assert_eq!(swapper(Big([1; 4]), Big([2; 4])), (Big([2; 4]), Big([1; 4]))); + + assert_eq!(swapper_derived(Big([1; 4]), (0, 0), Big([2; 4]), (0, 0)), ((1, 2), (1, 2))); +}