Skip to content

Missed optimization with NonNull::new then pointer::wrapping_byte_add(1) then NonNull::new then Option::unwrap then NonNull::len in some situtations #120440

Closed
@zachs18

Description

@zachs18
Contributor

(Title is a mess, but I didn't really find a simpler reproducer)

I tried this code:

use std::ptr::NonNull;

#[inline(never)]
pub fn slice_ptr_len_1(ptr: *const [u8]) -> usize {
  let ptr = ptr.cast_mut();
  if let Some(ptr) = NonNull::new(ptr) {
    ptr.len()
  } else {
    // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
    NonNull::new(ptr.wrapping_byte_add(1)).unwrap().len()
  }
}

#[inline(never)]
pub fn slice_ptr_len_2(ptr: *const [u8]) -> usize {
  let ptr = ptr.cast_mut();
  if let Some(ptr) = NonNull::new(ptr) {
    ptr.len()
  } else {
    // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
    NonNull::new(ptr.wrapping_byte_add(1)).unwrap_or(NonNull::from(&[])).len()
  }
}

#[inline(never)]
pub fn slice_ptr_len_3(ptr: *const [u8]) -> usize {
    fn some_nonnull_slice_ptr(ptr: *const [u8]) -> NonNull<[u8]> {
        let ptr = ptr.cast_mut();
        if let Some(ptr) = NonNull::new(ptr) {
            ptr
        } else {
            // We know ptr is null, so we know ptr.wrapping_byte_add(1) is not null.
            NonNull::new(ptr.wrapping_byte_add(1)).unwrap()
        }
    }
    some_nonnull_slice_ptr(ptr).len()
}

#[used]
static _SHOW_DEDUP: [fn(*const [u8]) -> usize; 3] = [
    slice_ptr_len_1,
    slice_ptr_len_2,
    slice_ptr_len_3,
];

(Slice element type u8 is not important. Context: stable polyfill for <*const [T]>::len. slice_ptr_len_3 is essentially just slice_ptr_len_1 but with the .len() factored out into a separate function.)

I expected to see this happen: With optimizations, all three fns compile to roughly the same asm, with no panic or branches.

Instead, this happened: The first has a(n unreachable) panic branch, the second and third do not.

Godbolt link

example::slice_ptr_len_1:
  lea rax, [rdi + 1]
  or rax, rdi
  je .LBB0_2
  mov rax, rsi
  ret
.LBB0_2:
  push rax
  lea rdi, [rip + .L__unnamed_1]
  call qword ptr [rip + core::option::unwrap_failed@GOTPCREL]

example::slice_ptr_len_2:
  mov rax, rsi
  ret

// snip: panic payload etc

example::_SHOW_DEDUP:
  .quad example::slice_ptr_len_1
  .quad example::slice_ptr_len_2
  .quad example::slice_ptr_len_2 // slice_ptr_len_3 is dedup'd to slice_ptr_len_2

(tested on nightly x86_64-unknown-linux-gnu, similar on other versions/targets. // comments mine)

LLVM IR

rustc -OO --emit=llvm-ir

@alloc_9be5c135c0f7c91e35e471f025924b11 = private unnamed_addr constant <{ [15 x i8] }> <{ [15 x i8] c"/app/example.rs" }>, align 1
@alloc_a22f3c8c001f7e83bbc8418ba69a40e0 = private unnamed_addr constant <{ ptr, [16 x i8] }> <{ ptr @alloc_9be5c135c0f7c91e35e471f025924b11, [16 x i8] c"\0F\00\00\00\00\00\00\00\09\00\00\00,\00\00\00" }>, align 8
@example::_SHOW_DEDUP = constant <{ ptr, ptr, ptr }> <{ ptr @example::slice_ptr_len_1, ptr @example::slice_ptr_len_2, ptr @example::slice_ptr_len_2 }>, align 8
@llvm.compiler.used = appending global [1 x ptr] [ptr @example::_SHOW_DEDUP], section "llvm.metadata"

@example::slice_ptr_len_3 = unnamed_addr alias i64 (ptr, i64), ptr @example::slice_ptr_len_2

define noundef i64 @example::slice_ptr_len_1(ptr noundef readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #0 !dbg !7 {
start:
  %0 = icmp eq ptr %ptr.0, null, !dbg !12
  %1 = getelementptr i8, ptr %ptr.0, i64 1
  %2 = icmp eq ptr %1, null
  %or.cond = and i1 %0, %2, !dbg !23
  br i1 %or.cond, label %bb11, label %bb3, !dbg !23

bb3:
  ret i64 %ptr.1, !dbg !24

bb11:
  tail call void @core::option::unwrap_failed(ptr noalias noundef nonnull readonly align 8 dereferenceable(24) @alloc_a22f3c8c001f7e83bbc8418ba69a40e0) #4, !dbg !25
  unreachable, !dbg !25
}

define noundef i64 @example::slice_ptr_len_2(ptr nocapture readnone %ptr.0, i64 noundef returned %ptr.1) unnamed_addr #1 !dbg !32 {
start:
  ret i64 %ptr.1, !dbg !33
}

define noundef i64 @example::asdf() unnamed_addr #2 !dbg !34 {
start:
  ret i64 4294967295, !dbg !35
}

declare void @core::option::unwrap_failed(ptr noalias noundef readonly align 8 dereferenceable(24)) unnamed_addr #3

attributes #0 = { nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { mustprogress nofree norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #2 = { mustprogress nofree noinline norecurse nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #3 = { cold noinline noreturn nonlazybind uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #4 = { noreturn }

Meta

rustc --version --verbose:

$ rustc +nightly --version --verbose
rustc 1.77.0-nightly (6b4f1c5e7 2024-01-27)
binary: rustc
commit-hash: 6b4f1c5e782c72a047a23e922decd33e7d462345
commit-date: 2024-01-27
host: x86_64-unknown-linux-gnu
release: 1.77.0-nightly
LLVM version: 17.0.6
$ rustc +stable --version --verbose
rustc 1.75.0 (82e1608df 2023-12-21)
binary: rustc
commit-hash: 82e1608dfa6e0b5569232559e3d385fea5a93112
commit-date: 2023-12-21
host: x86_64-unknown-linux-gnu
release: 1.75.0
LLVM version: 17.0.6

(result is the roughly same on stable and nightly. wrapping_byte_add was stabilized in 1.75.0, so this doesn't compile with stable versions before that)

Activity

added
needs-triageThis issue may need triage. Remove it if it has been sufficiently triaged.
on Jan 28, 2024
zachs18

zachs18 commented on Jan 28, 2024

@zachs18
Author
added
C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing such
and removed
C-bugCategory: This is a bug.
on Jan 28, 2024
changed the title [-]Missed optimization with `NonNull::new` then `pointer::wrapping_byte_offset(1)` then `NonNull::new` then `NonNull::len` in some situtations[/-] [+]Missed optimization with `NonNull::new` then `pointer::wrapping_byte_add(1)` then `NonNull::new` then `NonNull::len` in some situtations[/+] on Jan 28, 2024
changed the title [-]Missed optimization with `NonNull::new` then `pointer::wrapping_byte_add(1)` then `NonNull::new` then `NonNull::len` in some situtations[/-] [+]Missed optimization with `NonNull::new` then `pointer::wrapping_byte_add(1)` then `NonNull::new` then `Option::unwrap` then `NonNull::len` in some situtations[/+] on Jan 28, 2024
added
llvm-fixed-upstreamIssue expected to be fixed by the next major LLVM upgrade, or backported fixes
A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.
I-slowIssue: Problems and improvements with respect to performance of generated code.
and removed
needs-triageThis issue may need triage. Remove it if it has been sufficiently triaged.
on Jan 28, 2024
nikic

nikic commented on Jan 28, 2024

@nikic
Contributor

I believe this is fixed in LLVM 18.

added
E-needs-testCall for participation: An issue has been fixed and does not reproduce, but no test has been added.
on Feb 14, 2024
nikic

nikic commented on Feb 14, 2024

@nikic
Contributor

Confirmed fixed by #120055, needs codegen test.

removed
llvm-fixed-upstreamIssue expected to be fixed by the next major LLVM upgrade, or backported fixes
on Feb 14, 2024

11 remaining items

Loading
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Metadata

Metadata

Assignees

No one assigned

    Labels

    A-LLVMArea: Code generation parts specific to LLVM. Both correctness bugs and optimization-related issues.C-optimizationCategory: An issue highlighting optimization opportunities or PRs implementing suchE-needs-testCall for participation: An issue has been fixed and does not reproduce, but no test has been added.I-slowIssue: Problems and improvements with respect to performance of generated code.

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

      Development

      Participants

      @nikic@zachs18@rustbot

      Issue actions

        Missed optimization with `NonNull::new` then `pointer::wrapping_byte_add(1)` then `NonNull::new` then `Option::unwrap` then `NonNull::len` in some situtations · Issue #120440 · rust-lang/rust