Skip to content

Dead jmp at end of function after 4109bac3301eb7b7033eec3c8e8107be8cad9bc9 #167774

@nathanchance

Description

@nathanchance

After commit 4109bac3301e ("[IR] Do not store Function inside BlockAddress (#137958)"), I am seeing a (leftover?) dead jump in radeon_bo_list_validate() when building the Linux kernel, which causes the kernel's objtool tool to complain about an unreachable instruction.

Full radeon_object.i

$ install/llvm-good/bin/clang --version | head -1
ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git ff28e1a5a92da380c2869aba09971687c26d2f0f)

$ install/llvm-good/bin/clang --target=x86_64-linux-gnu -O2 -w -c -o good-radeon_object.o radeon_object.i

$ build/linux/good/tools/objtool/objtool --uaccess good-radeon_object.o
$ install/llvm-bad/bin/clang --version | head -1
ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git 4109bac3301eb7b7033eec3c8e8107be8cad9bc9)

$ install/llvm-bad/bin/clang --target=x86_64-linux-gnu -O2 -w -c -o bad-radeon_object.o radeon_object.i

$ build/linux/good/tools/objtool/objtool --uaccess bad-radeon_object.o
bad-radeon_object.o: warning: objtool: radeon_bo_list_validate+0x228: unreachable instruction

The diff of llvm-objdump -dr --disassemble-symbols=radeon_bo_list_validate between the good and bad object files:

diff --git a/tmp/.psub.1YIAH3 b/tmp/.psub.4rVRuG
index 2773db7..b49bcc1 100644
--- a/tmp/.psub.1YIAH3
+++ b/tmp/.psub.4rVRuG
@@ -1,5 +1,5 @@
 
-good-radeon_object.o:	file format elf64-x86-64
+bad-radeon_object.o:	file format elf64-x86-64
 
 Disassembly of section .text:
 
@@ -157,4 +157,5 @@ Disassembly of section .text:
      ce4: 41 5f                        	popq	%r15
      ce6: 5d                           	popq	%rbp
      ce7: c3                           	retq
-     ce8: 0f 1f 84 00 00 00 00 00      	nopl	(%rax,%rax)
+     ce8: e9 41 fe ff ff               	jmp	0xb2e <radeon_bo_list_validate+0x6e>
+     ced: 0f 1f 00                     	nopl	(%rax)

Although the IR does not appear to be any different?

diff --git a/build/linux/good/drivers/gpu/drm/radeon/radeon_object.ll b/build/linux/bad/drivers/gpu/drm/radeon/radeon_object.ll
index cee5120..5ed5015 100644
--- a/build/linux/good/drivers/gpu/drm/radeon/radeon_object.ll
+++ b/build/linux/bad/drivers/gpu/drm/radeon/radeon_object.ll
@@ -1111,7 +1111,7 @@ for.cond3.preheader.lr.ph.lr.ph:                  ; preds = %entry
   %contended.i = getelementptr i8, ptr %exec, i64 48
   br label %for.cond3.preheader
 
-__drm_exec_476.loopexit:                          ; preds = %for.body5
+__drm_exec_476:                                   ; preds = %for.body5
   %call1147 = tail call zeroext i1 @drm_exec_cleanup(ptr noundef %exec) #11
   br i1 %call1147, label %for.cond3.preheader.backedge, label %for.cond32.preheader
 
@@ -1119,7 +1119,7 @@ for.cond.loopexit:                                ; preds = %for.inc, %for.cond3
   %call1 = tail call zeroext i1 @drm_exec_cleanup(ptr noundef %exec) #11
   br i1 %call1, label %for.cond3.preheader.backedge, label %for.cond32.preheader
 
-for.cond3.preheader.backedge:                     ; preds = %for.cond.loopexit, %__drm_exec_476.loopexit
+for.cond3.preheader.backedge:                     ; preds = %for.cond.loopexit, %__drm_exec_476
   br label %for.cond3.preheader, !llvm.loop !17
 
 for.cond3.preheader:                              ; preds = %for.cond3.preheader.backedge, %for.cond3.preheader.lr.ph.lr.ph
@@ -1127,7 +1127,7 @@ for.cond3.preheader:                              ; preds = %for.cond3.preheader
   %cmp.i.not145 = icmp eq ptr %.pn129144, %head
   br i1 %cmp.i.not145, label %for.cond.loopexit, label %for.body5
 
-for.cond32.preheader:                             ; preds = %__drm_exec_476.loopexit, %for.cond.loopexit, %entry
+for.cond32.preheader:                             ; preds = %__drm_exec_476, %for.cond.loopexit, %entry
   %.pn149 = load ptr, ptr %head, align 8
   %cmp.i136.not150 = icmp eq ptr %.pn149, %head
   br i1 %cmp.i136.not150, label %return, label %for.body38.lr.ph
@@ -1145,7 +1145,7 @@ for.body5:                                        ; preds = %for.cond3.preheader
   %call6 = tail call i32 @drm_exec_prepare_obj(ptr noundef %exec, ptr noundef %tbo, i32 noundef 1) #11
   %3 = load ptr, ptr %contended.i, align 8
   %tobool.i.not = icmp eq ptr %3, null
-  br i1 %tobool.i.not, label %do.end, label %__drm_exec_476.loopexit, !prof !5
+  br i1 %tobool.i.not, label %do.end, label %__drm_exec_476, !prof !5
 
 do.end:                                           ; preds = %for.body5
   switch i32 %call6, label %return [
@@ -1941,7 +1941,7 @@ attributes #15 = { cold noredzone nounwind "no-builtin-wcslen" }
 !1 = !{i32 1, !"Code Model", i32 2}
 !2 = !{i32 1, !"override-stack-alignment", i32 8}
 !3 = !{i32 4, !"SkipRaxSetup", i32 1}
-!4 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git ff28e1a5a92da380c2869aba09971687c26d2f0f)"}
+!4 = !{!"ClangBuiltLinux clang version 21.0.0git (https://github.com/llvm/llvm-project.git 4109bac3301eb7b7033eec3c8e8107be8cad9bc9)"}
 !5 = !{!"branch_weights", !"expected", i32 2000, i32 1}
 !6 = !{i64 2158211981, i64 2158212007, i64 2158212327, i64 2158212382, i64 2158212434, i64 2158212486, i64 2158212087, i64 2158212119, i64 2158212193, i64 2158212248, i64 2158212273, i64 2158212298, i64 2158212322}
 !7 = distinct !{!7, !8}

I was able to reduce the following out with cvise:

typedef long u64;
struct ttm_placement {};
struct ttm_buffer_object {};
int *radeon_bo_list_validate_exec;
struct {
  struct ttm_placement placement;
  struct ttm_buffer_object tbo;
} radeon_bo_list_validate_bo;
_Bool radeon_bo_list_validate___trans_tmp_1;
u64 radeon_bo_list_validate_bytes_moved = 0,
    radeon_bo_list_validate_initial_bytes_moved,
    radeon_bo_list_validate_bytes_moved_threshold = 0;
struct {
  ;
  unsigned preferred_domains;
  unsigned allowed_domains;
} *radeon_bo_list_validate_lobj;
int radeon_bo_list_validate_ctx, radeon_bo_list_validate_r;
int ttm_bo_validate(struct ttm_buffer_object *, struct ttm_placement *, int *);
_Bool drm_exec_is_contended(int *);
void radeon_bo_list_validate() {
__drm_exec_476:
  for (void *__drm_exec_retry_ptr; ({
         __drm_exec_retry_ptr = &&__drm_exec_476;
         radeon_bo_list_validate_exec;
       });)
    for (;;)
      do {
        radeon_bo_list_validate___trans_tmp_1 =
            drm_exec_is_contended(radeon_bo_list_validate_exec);
        if (__builtin_expect(radeon_bo_list_validate___trans_tmp_1, 0))
          goto *__drm_exec_retry_ptr;
      } while (0);
  for (radeon_bo_list_validate_lobj = 0;;) {
    int domain = radeon_bo_list_validate_lobj->preferred_domains,
        allowed = radeon_bo_list_validate_lobj->allowed_domains,
        current_domain = 0;
    if (allowed & current_domain &&
        radeon_bo_list_validate_bytes_moved >
            radeon_bo_list_validate_bytes_moved_threshold)
    retry:
      radeon_bo_list_validate_initial_bytes_moved = radeon_bo_list_validate_r =
          ttm_bo_validate(&radeon_bo_list_validate_bo.tbo,
                          &radeon_bo_list_validate_bo.placement,
                          &radeon_bo_list_validate_ctx);
    radeon_bo_list_validate_bytes_moved +=
        radeon_bo_list_validate_initial_bytes_moved;
    if (domain)
      goto retry;
  }
}

which exhibits the same behavior as above:

$ install/llvm-good/bin/clang --target=x86_64-linux-gnu -O2 -w -c -o good-radeon_object.o radeon_object.i

$ install/llvm-bad/bin/clang --target=x86_64-linux-gnu -O2 -w -c -o bad-radeon_object.o radeon_object.i

$ build/linux/good/tools/objtool/objtool --uaccess good-radeon_object.o

$ build/linux/good/tools/objtool/objtool --uaccess bad-radeon_object.o
bad-radeon_object.o: warning: objtool: radeon_bo_list_validate+0x62: unreachable instruction
diff --git a/tmp/.psub.8s3ccz b/tmp/.psub.fPWVlL
index 9684da6..10aab15 100644
--- a/tmp/.psub.8s3ccz
+++ b/tmp/.psub.fPWVlL
@@ -1,5 +1,5 @@
 
-good-radeon_object.o:	file format elf64-x86-64
+bad-radeon_object.o:	file format elf64-x86-64
 
 Disassembly of section .text:
 
@@ -27,3 +27,4 @@ Disassembly of section .text:
 		000000000000004d:  R_X86_64_PC32	radeon_bo_list_validate_bytes_moved-0x4
       51: 66 66 66 66 66 66 2e 0f 1f 84 00 00 00 00 00 	nopw	%cs:(%rax,%rax)
       60: eb fe                        	jmp	0x60 <radeon_bo_list_validate+0x60>
+      62: eb ac                        	jmp	0x10 <radeon_bo_list_validate+0x10>

It may be possible to workaround this in objtool but it seems suboptimal to have this dead code around?

cc @nikic

Metadata

Metadata

Assignees

No one assigned

    Labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions