Skip to content

Commit 6e948da

Browse files
committed
new code
1 parent 6ba0ce4 commit 6e948da

File tree

3 files changed

+148
-57
lines changed

3 files changed

+148
-57
lines changed

compiler/rustc_codegen_llvm/src/builder/gpu_offload.rs

Lines changed: 97 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,41 @@ pub(crate) fn handle_gpu_code<'ll>(
1818
// The offload memory transfer type for each kernel
1919
let mut o_types = vec![];
2020
let mut kernels = vec![];
21+
let mut region_ids = vec![];
2122
let offload_entry_ty = add_tgt_offload_entry(&cx);
2223
for num in 0..9 {
2324
let kernel = cx.get_function(&format!("kernel_{num}"));
2425
if let Some(kernel) = kernel {
25-
o_types.push(gen_define_handling(&cx, kernel, offload_entry_ty, num));
26+
let (o, k) = gen_define_handling(&cx, kernel, offload_entry_ty, num);
27+
o_types.push(o);
28+
region_ids.push(k);
2629
kernels.push(kernel);
2730
}
2831
}
2932

30-
gen_call_handling(&cx, &kernels, &o_types);
33+
gen_call_handling(&cx, &kernels, &o_types, &region_ids);
34+
}
35+
36+
// ; Function Attrs: nounwind
37+
// declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr) #2
38+
fn generate_launcher<'ll>(cx: &'ll SimpleCx<'_>) -> (&'ll llvm::Value, &'ll llvm::Type) {
39+
let tptr = cx.type_ptr();
40+
let ti64 = cx.type_i64();
41+
let ti32 = cx.type_i32();
42+
let args = vec![tptr, ti64, ti32, ti32, tptr, tptr];
43+
let tgt_fn_ty = cx.type_func(&args, ti32);
44+
let name = "__tgt_target_kernel";
45+
let tgt_decl = declare_offload_fn(&cx, name, tgt_fn_ty);
46+
let nounwind = llvm::AttributeKind::NoUnwind.create_attr(cx.llcx);
47+
attributes::apply_to_llfn(tgt_decl, Function, &[nounwind]);
48+
(tgt_decl, tgt_fn_ty)
3149
}
3250

3351
// What is our @1 here? A magic global, used in our data_{begin/update/end}_mapper:
3452
// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3553
// @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
54+
// FIXME(offload): @0 should include the file name (e.g. lib.rs) in which the function to be
55+
// offloaded was defined.
3656
fn generate_at_one<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Value {
3757
// @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3858
let unknown_txt = ";unknown;unknown;0;0;;";
@@ -83,7 +103,7 @@ pub(crate) fn add_tgt_offload_entry<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Ty
83103
offload_entry_ty
84104
}
85105

86-
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
106+
fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) -> &'ll llvm::Type {
87107
let kernel_arguments_ty = cx.type_named_struct("struct.__tgt_kernel_arguments");
88108
let tptr = cx.type_ptr();
89109
let ti64 = cx.type_i64();
@@ -107,7 +127,7 @@ fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
107127
// uint64_t NoWait : 1; // Was this kernel spawned with a `nowait` clause.
108128
// uint64_t IsCUDA : 1; // Was this kernel spawned via CUDA.
109129
// uint64_t Unused : 62;
110-
// } Flags = {0, 0, 0};
130+
// } Flags = {0, 0, 0}; // totals to 64 Bit, 8 Byte
111131
// // The number of teams (for x,y,z dimension).
112132
// uint32_t NumTeams[3] = {0, 0, 0};
113133
// // The number of threads (for x,y,z dimension).
@@ -118,9 +138,7 @@ fn gen_tgt_kernel_global<'ll>(cx: &'ll SimpleCx<'_>) {
118138
vec![ti32, ti32, tptr, tptr, tptr, tptr, tptr, tptr, ti64, ti64, tarr, tarr, ti32];
119139

120140
cx.set_struct_body(kernel_arguments_ty, &kernel_elements, false);
121-
// For now we don't handle kernels, so for now we just add a global dummy
122-
// to make sure that the __tgt_offload_entry is defined and handled correctly.
123-
cx.declare_global("my_struct_global2", kernel_arguments_ty);
141+
kernel_arguments_ty
124142
}
125143

126144
fn gen_tgt_data_mappers<'ll>(
@@ -187,7 +205,7 @@ fn gen_define_handling<'ll>(
187205
kernel: &'ll llvm::Value,
188206
offload_entry_ty: &'ll llvm::Type,
189207
num: i64,
190-
) -> &'ll llvm::Value {
208+
) -> (&'ll llvm::Value, &'ll llvm::Value) {
191209
let types = cx.func_params_types(cx.get_type_of_global(kernel));
192210
// It seems like non-pointer values are automatically mapped. So here, we focus on pointer (or
193211
// reference) types.
@@ -205,10 +223,14 @@ fn gen_define_handling<'ll>(
205223
// or both to and from the gpu (=3). Other values shouldn't affect us for now.
206224
// A non-mutable reference or pointer will be 1, an array that's not read, but fully overwritten
207225
// will be 2. For now, everything is 3, until we have our frontend set up.
208-
let o_types =
209-
add_priv_unnamed_arr(&cx, &format!(".offload_maptypes.{num}"), &vec![3; num_ptr_types]);
226+
// 1+2+32: 1 (MapTo), 2 (MapFrom), 32 (Add one extra input ptr per function, to be used later).
227+
let o_types = add_priv_unnamed_arr(
228+
&cx,
229+
&format!(".offload_maptypes.{num}"),
230+
&vec![1 + 2 + 32; num_ptr_types],
231+
);
210232
// Next: For each function, generate these three entries. A weak constant,
211-
// the llvm.rodata entry name, and the omp_offloading_entries value
233+
// the llvm.rodata entry name, and the llvm_offload_entries value
212234

213235
let name = format!(".kernel_{num}.region_id");
214236
let initializer = cx.get_const_i8(0);
@@ -242,13 +264,13 @@ fn gen_define_handling<'ll>(
242264
llvm::set_global_constant(llglobal, true);
243265
llvm::set_linkage(llglobal, WeakAnyLinkage);
244266
llvm::set_initializer(llglobal, initializer);
245-
llvm::set_alignment(llglobal, Align::ONE);
246-
let c_section_name = CString::new(".omp_offloading_entries").unwrap();
267+
llvm::set_alignment(llglobal, Align::EIGHT);
268+
let c_section_name = CString::new("llvm_offload_entries").unwrap();
247269
llvm::set_section(llglobal, &c_section_name);
248-
o_types
270+
(o_types, region_id)
249271
}
250272

251-
fn declare_offload_fn<'ll>(
273+
pub(crate) fn declare_offload_fn<'ll>(
252274
cx: &'ll SimpleCx<'_>,
253275
name: &str,
254276
ty: &'ll llvm::Type,
@@ -287,15 +309,17 @@ fn gen_call_handling<'ll>(
287309
cx: &'ll SimpleCx<'_>,
288310
_kernels: &[&'ll llvm::Value],
289311
o_types: &[&'ll llvm::Value],
312+
region_ids: &[&'ll llvm::Value],
290313
) {
314+
let (tgt_decl, tgt_target_kernel_ty) = generate_launcher(&cx);
291315
// %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
292316
let tptr = cx.type_ptr();
293317
let ti32 = cx.type_i32();
294318
let tgt_bin_desc_ty = vec![ti32, tptr, tptr, tptr];
295319
let tgt_bin_desc = cx.type_named_struct("struct.__tgt_bin_desc");
296320
cx.set_struct_body(tgt_bin_desc, &tgt_bin_desc_ty, false);
297321

298-
gen_tgt_kernel_global(&cx);
322+
let tgt_kernel_decl = gen_tgt_kernel_global(&cx);
299323
let (begin_mapper_decl, _, end_mapper_decl, fn_ty) = gen_tgt_data_mappers(&cx);
300324

301325
let main_fn = cx.get_function("main");
@@ -329,35 +353,32 @@ fn gen_call_handling<'ll>(
329353
// These represent the sizes in bytes, e.g. the entry for `&[f64; 16]` will be 8*16.
330354
let ty2 = cx.type_array(cx.type_i64(), num_args);
331355
let a4 = builder.direct_alloca(ty2, Align::EIGHT, ".offload_sizes");
356+
357+
//%kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
358+
let a5 = builder.direct_alloca(tgt_kernel_decl, Align::EIGHT, "kernel_args");
359+
360+
// Step 1)
361+
unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) };
362+
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);
363+
332364
// Now we allocate once per function param, a copy to be passed to one of our maps.
333365
let mut vals = vec![];
334366
let mut geps = vec![];
335367
let i32_0 = cx.get_const_i32(0);
336-
for (index, in_ty) in types.iter().enumerate() {
337-
// get function arg, store it into the alloca, and read it.
338-
let p = llvm::get_param(called, index as u32);
339-
let name = llvm::get_value_name(p);
340-
let name = str::from_utf8(&name).unwrap();
341-
let arg_name = format!("{name}.addr");
342-
let alloca = builder.direct_alloca(in_ty, Align::EIGHT, &arg_name);
343-
344-
builder.store(p, alloca, Align::EIGHT);
345-
let val = builder.load(in_ty, alloca, Align::EIGHT);
346-
let gep = builder.inbounds_gep(cx.type_f32(), val, &[i32_0]);
347-
vals.push(val);
368+
for index in 0..types.len() {
369+
let v = unsafe { llvm::LLVMGetOperand(kernel_call, index as u32).unwrap() };
370+
let gep = builder.inbounds_gep(cx.type_f32(), v, &[i32_0]);
371+
vals.push(v);
348372
geps.push(gep);
349373
}
350374

351-
// Step 1)
352-
unsafe { llvm::LLVMRustPositionBefore(builder.llbuilder, kernel_call) };
353-
builder.memset(tgt_bin_desc_alloca, cx.get_const_i8(0), cx.get_const_i64(32), Align::EIGHT);
354-
355375
let mapper_fn_ty = cx.type_func(&[cx.type_ptr()], cx.type_void());
356376
let register_lib_decl = declare_offload_fn(&cx, "__tgt_register_lib", mapper_fn_ty);
357377
let unregister_lib_decl = declare_offload_fn(&cx, "__tgt_unregister_lib", mapper_fn_ty);
358378
let init_ty = cx.type_func(&[], cx.type_void());
359379
let init_rtls_decl = declare_offload_fn(cx, "__tgt_init_all_rtls", init_ty);
360380

381+
// FIXME(offload): Later we want to add them to the wrapper code, rather than our main function.
361382
// call void @__tgt_register_lib(ptr noundef %6)
362383
builder.call(mapper_fn_ty, register_lib_decl, &[tgt_bin_desc_alloca], None);
363384
// call void @__tgt_init_all_rtls()
@@ -420,17 +441,58 @@ fn gen_call_handling<'ll>(
420441
generate_mapper_call(&mut builder, &cx, geps, o, begin_mapper_decl, fn_ty, num_args, s_ident_t);
421442

422443
// Step 3)
423-
// Here we will add code for the actual kernel launches in a follow-up PR.
424-
// FIXME(offload): launch kernels
444+
let mut values = vec![];
445+
let offload_version = cx.get_const_i32(3);
446+
values.push((4, offload_version));
447+
values.push((4, cx.get_const_i32(num_args)));
448+
values.push((8, geps.0));
449+
values.push((8, geps.1));
450+
values.push((8, geps.2));
451+
values.push((8, o_types[0]));
452+
// The next two are debug infos. FIXME(offload) set them
453+
values.push((8, cx.const_null(cx.type_ptr())));
454+
values.push((8, cx.const_null(cx.type_ptr())));
455+
values.push((8, cx.get_const_i64(0)));
456+
values.push((8, cx.get_const_i64(0)));
457+
let ti32 = cx.type_i32();
458+
let ci32_0 = cx.get_const_i32(0);
459+
values.push((4, cx.const_array(ti32, &vec![cx.get_const_i32(2097152), ci32_0, ci32_0])));
460+
values.push((4, cx.const_array(ti32, &vec![cx.get_const_i32(256), ci32_0, ci32_0])));
461+
values.push((4, cx.get_const_i32(0)));
462+
463+
for (i, value) in values.iter().enumerate() {
464+
let ptr = builder.inbounds_gep(tgt_kernel_decl, a5, &[i32_0, cx.get_const_i32(i as u64)]);
465+
builder.store(value.1, ptr, Align::from_bytes(value.0).unwrap());
466+
}
467+
468+
let args = vec![
469+
s_ident_t,
470+
// MAX == -1
471+
cx.get_const_i64(u64::MAX),
472+
cx.get_const_i32(2097152),
473+
cx.get_const_i32(256),
474+
region_ids[0],
475+
a5,
476+
];
477+
let offload_success = builder.call(tgt_target_kernel_ty, tgt_decl, &args, None);
478+
// %41 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)
479+
unsafe {
480+
let next = llvm::LLVMGetNextInstruction(offload_success).unwrap();
481+
llvm::LLVMRustPositionAfter(builder.llbuilder, next);
482+
llvm::LLVMInstructionEraseFromParent(next);
483+
}
425484

426485
// Step 4)
427-
unsafe { llvm::LLVMRustPositionAfter(builder.llbuilder, kernel_call) };
486+
//unsafe { llvm::LLVMRustPositionAfter(builder.llbuilder, kernel_call) };
428487

429488
let geps = get_geps(&mut builder, &cx, ty, ty2, a1, a2, a4);
430489
generate_mapper_call(&mut builder, &cx, geps, o, end_mapper_decl, fn_ty, num_args, s_ident_t);
431490

432491
builder.call(mapper_fn_ty, unregister_lib_decl, &[tgt_bin_desc_alloca], None);
433492

493+
drop(builder);
494+
unsafe { llvm::LLVMDeleteFunction(called) };
495+
434496
// With this we generated the following begin and end mappers. We could easily generate the
435497
// update mapper in an update.
436498
// call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 3, ptr %27, ptr %28, ptr %29, ptr @.offload_maptypes, ptr null, ptr null)

compiler/rustc_codegen_llvm/src/llvm/ffi.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1201,6 +1201,7 @@ unsafe extern "C" {
12011201

12021202
// Operations on functions
12031203
pub(crate) fn LLVMSetFunctionCallConv(Fn: &Value, CC: c_uint);
1204+
pub(crate) fn LLVMDeleteFunction(Fn: &Value);
12041205

12051206
// Operations about llvm intrinsics
12061207
pub(crate) fn LLVMLookupIntrinsicID(Name: *const c_char, NameLen: size_t) -> c_uint;
@@ -1230,6 +1231,8 @@ unsafe extern "C" {
12301231
pub(crate) fn LLVMIsAInstruction(Val: &Value) -> Option<&Value>;
12311232
pub(crate) fn LLVMGetFirstBasicBlock(Fn: &Value) -> &BasicBlock;
12321233
pub(crate) fn LLVMGetOperand(Val: &Value, Index: c_uint) -> Option<&Value>;
1234+
pub(crate) fn LLVMGetNextInstruction(Val: &Value) -> Option<&Value>;
1235+
pub(crate) fn LLVMInstructionEraseFromParent(Val: &Value);
12331236

12341237
// Operations on call sites
12351238
pub(crate) fn LLVMSetInstructionCallConv(Instr: &Value, CC: c_uint);

tests/codegen-llvm/gpu_offload/gpu_host.rs

Lines changed: 48 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -21,16 +21,15 @@ fn main() {
2121
}
2222

2323
// CHECK: %struct.__tgt_offload_entry = type { i64, i16, i16, i32, ptr, ptr, i64, i64, ptr }
24-
// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2524
// CHECK: %struct.ident_t = type { i32, i32, i32, i32, ptr }
2625
// CHECK: %struct.__tgt_bin_desc = type { i32, ptr, ptr, ptr }
26+
// CHECK: %struct.__tgt_kernel_arguments = type { i32, i32, ptr, ptr, ptr, ptr, ptr, ptr, i64, i64, [3 x i32], [3 x i32], i32 }
2727

2828
// CHECK: @.offload_sizes.1 = private unnamed_addr constant [1 x i64] [i64 1024]
29-
// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 3]
29+
// CHECK: @.offload_maptypes.1 = private unnamed_addr constant [1 x i64] [i64 35]
3030
// CHECK: @.kernel_1.region_id = weak unnamed_addr constant i8 0
3131
// CHECK: @.offloading.entry_name.1 = internal unnamed_addr constant [9 x i8] c"kernel_1\00", section ".llvm.rodata.offloading", align 1
32-
// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section ".omp_offloading_entries", align 1
33-
// CHECK: @my_struct_global2 = external global %struct.__tgt_kernel_arguments
32+
// CHECK: @.offloading.entry.kernel_1 = weak constant %struct.__tgt_offload_entry { i64 0, i16 1, i16 1, i32 0, ptr @.kernel_1.region_id, ptr @.offloading.entry_name.1, i64 0, i64 0, ptr null }, section "llvm_offload_entries", align 8
3433
// CHECK: @0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
3534
// CHECK: @1 = private unnamed_addr constant %struct.ident_t { i32 0, i32 2, i32 0, i32 22, ptr @0 }, align 8
3635

@@ -43,34 +42,61 @@ fn main() {
4342
// CHECK-NEXT: %.offload_baseptrs = alloca [1 x ptr], align 8
4443
// CHECK-NEXT: %.offload_ptrs = alloca [1 x ptr], align 8
4544
// CHECK-NEXT: %.offload_sizes = alloca [1 x i64], align 8
46-
// CHECK-NEXT: %x.addr = alloca ptr, align 8
47-
// CHECK-NEXT: store ptr %x, ptr %x.addr, align 8
48-
// CHECK-NEXT: %1 = load ptr, ptr %x.addr, align 8
49-
// CHECK-NEXT: %2 = getelementptr inbounds float, ptr %1, i32 0
45+
// CHECK-NEXT: %kernel_args = alloca %struct.__tgt_kernel_arguments, align 8
5046
// CHECK: call void @llvm.memset.p0.i64(ptr align 8 %EmptyDesc, i8 0, i64 32, i1 false)
47+
// CHECK-NEXT: %1 = getelementptr inbounds float, ptr %x, i32 0
5148
// CHECK-NEXT: call void @__tgt_register_lib(ptr %EmptyDesc)
5249
// CHECK-NEXT: call void @__tgt_init_all_rtls()
53-
// CHECK-NEXT: %3 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
50+
// CHECK-NEXT: %2 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
51+
// CHECK-NEXT: store ptr %x, ptr %2, align 8
52+
// CHECK-NEXT: %3 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
5453
// CHECK-NEXT: store ptr %1, ptr %3, align 8
55-
// CHECK-NEXT: %4 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
56-
// CHECK-NEXT: store ptr %2, ptr %4, align 8
57-
// CHECK-NEXT: %5 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
58-
// CHECK-NEXT: store i64 1024, ptr %5, align 8
59-
// CHECK-NEXT: %6 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
60-
// CHECK-NEXT: %7 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
61-
// CHECK-NEXT: %8 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
62-
// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %6, ptr %7, ptr %8, ptr @.offload_maptypes.1, ptr null, ptr null)
63-
// CHECK-NEXT: call void @kernel_1(ptr noalias noundef nonnull align 4 dereferenceable(1024) %x)
64-
// CHECK-NEXT: %9 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
65-
// CHECK-NEXT: %10 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
66-
// CHECK-NEXT: %11 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
67-
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %9, ptr %10, ptr %11, ptr @.offload_maptypes.1, ptr null, ptr null)
54+
// CHECK-NEXT: %4 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
55+
// CHECK-NEXT: store i64 1024, ptr %4, align 8
56+
// CHECK-NEXT: %5 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
57+
// CHECK-NEXT: %6 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
58+
// CHECK-NEXT: %7 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
59+
// CHECK-NEXT: call void @__tgt_target_data_begin_mapper(ptr @1, i64 -1, i32 1, ptr %5, ptr %6, ptr %7, ptr @.offload_maptypes.1, ptr null, ptr null)
60+
// CHECK-NEXT: %8 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 0
61+
// CHECK-NEXT: store i32 3, ptr %8, align 4
62+
// CHECK-NEXT: %9 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 1
63+
// CHECK-NEXT: store i32 1, ptr %9, align 4
64+
// CHECK-NEXT: %10 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 2
65+
// CHECK-NEXT: store ptr %5, ptr %10, align 8
66+
// CHECK-NEXT: %11 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 3
67+
// CHECK-NEXT: store ptr %6, ptr %11, align 8
68+
// CHECK-NEXT: %12 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 4
69+
// CHECK-NEXT: store ptr %7, ptr %12, align 8
70+
// CHECK-NEXT: %13 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 5
71+
// CHECK-NEXT: store ptr @.offload_maptypes.1, ptr %13, align 8
72+
// CHECK-NEXT: %14 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 6
73+
// CHECK-NEXT: store ptr null, ptr %14, align 8
74+
// CHECK-NEXT: %15 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 7
75+
// CHECK-NEXT: store ptr null, ptr %15, align 8
76+
// CHECK-NEXT: %16 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 8
77+
// CHECK-NEXT: store i64 0, ptr %16, align 8
78+
// CHECK-NEXT: %17 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 9
79+
// CHECK-NEXT: store i64 0, ptr %17, align 8
80+
// CHECK-NEXT: %18 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 10
81+
// CHECK-NEXT: store [3 x i32] [i32 2097152, i32 0, i32 0], ptr %18, align 4
82+
// CHECK-NEXT: %19 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 11
83+
// CHECK-NEXT: store [3 x i32] [i32 256, i32 0, i32 0], ptr %19, align 4
84+
// CHECK-NEXT: %20 = getelementptr inbounds %struct.__tgt_kernel_arguments, ptr %kernel_args, i32 0, i32 12
85+
// CHECK-NEXT: store i32 0, ptr %20, align 4
86+
// CHECK-NEXT: %21 = call i32 @__tgt_target_kernel(ptr @1, i64 -1, i32 2097152, i32 256, ptr @.kernel_1.region_id, ptr %kernel_args)
87+
// CHECK-NEXT: %22 = getelementptr inbounds [1 x ptr], ptr %.offload_baseptrs, i32 0, i32 0
88+
// CHECK-NEXT: %23 = getelementptr inbounds [1 x ptr], ptr %.offload_ptrs, i32 0, i32 0
89+
// CHECK-NEXT: %24 = getelementptr inbounds [1 x i64], ptr %.offload_sizes, i32 0, i32 0
90+
// CHECK-NEXT: call void @__tgt_target_data_end_mapper(ptr @1, i64 -1, i32 1, ptr %22, ptr %23, ptr %24, ptr @.offload_maptypes.1, ptr null, ptr null)
6891
// CHECK-NEXT: call void @__tgt_unregister_lib(ptr %EmptyDesc)
6992
// CHECK: store ptr %x, ptr %0, align 8
7093
// CHECK-NEXT: call void asm sideeffect "", "r,~{memory}"(ptr nonnull %0)
7194
// CHECK: ret void
7295
// CHECK-NEXT: }
7396

97+
// CHECK: Function Attrs: nounwind
98+
// CHECK: declare i32 @__tgt_target_kernel(ptr, i64, i32, i32, ptr, ptr)
99+
74100
#[unsafe(no_mangle)]
75101
#[inline(never)]
76102
pub fn kernel_1(x: &mut [f32; 256]) {

0 commit comments

Comments
 (0)