Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clang/include/clang/Basic/BuiltinsAMDGPU.td
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,7 @@ def __builtin_amdgcn_s_barrier_leave : AMDGPUBuiltin<"void(_Constant short)", []
def __builtin_amdgcn_s_get_barrier_state : AMDGPUBuiltin<"unsigned int(int)", [], "gfx12-insts">;
def __builtin_amdgcn_s_get_named_barrier_state : AMDGPUBuiltin<"unsigned int(void *)", [], "gfx12-insts">;
def __builtin_amdgcn_s_prefetch_data : AMDGPUBuiltin<"void(void const *, unsigned int)", [Const], "gfx12-insts">;
def __builtin_amdgcn_s_prefetch_inst : AMDGPUBuiltin<"void(void const *, unsigned int)", [Const], "gfx12-insts">;
def __builtin_amdgcn_s_buffer_prefetch_data : AMDGPUBuiltin<"void(__amdgpu_buffer_rsrc_t, _Constant int, unsigned int)", [Const], "gfx12-insts">;

def __builtin_amdgcn_global_load_tr_b64_v2i32 : AMDGPUBuiltin<"_Vector<2, int>(_Vector<2, int address_space<1> *>)", [Const], "gfx12-insts,wavefrontsize32">;
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuiltinAMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1029,6 +1029,12 @@ CIRGenFunction::emitAMDGPUBuiltinExpr(unsigned builtinId,
getContext().BuiltinInfo.getName(builtinId));
return mlir::Value{};
}
case AMDGPU::BI__builtin_amdgcn_s_prefetch_inst: {
cgm.errorNYI(expr->getSourceRange(),
std::string("unimplemented AMDGPU builtin call: ") +
getContext().BuiltinInfo.getName(builtinId));
return mlir::Value{};
}
case Builtin::BIlogbf:
case Builtin::BI__builtin_logbf:
return emitLogbBuiltin(*this, expr, llvm::APFloat::IEEEsingle());
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CodeGen/TargetBuiltins/AMDGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2185,6 +2185,9 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
case AMDGPU::BI__builtin_amdgcn_s_prefetch_data:
return emitBuiltinWithOneOverloadedType<2>(
*this, E, Intrinsic::amdgcn_s_prefetch_data);
case AMDGPU::BI__builtin_amdgcn_s_prefetch_inst:
return emitBuiltinWithOneOverloadedType<2>(
*this, E, Intrinsic::amdgcn_s_prefetch_inst);
case Builtin::BIlogbf:
case Builtin::BI__builtin_logbf: {
Value *Src0 = EmitScalarExpr(E->getArg(0));
Expand Down
71 changes: 71 additions & 0 deletions clang/test/CodeGenHIP/builtins-amdgcn-prefetch.hip
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
// RUN: %clang_cc1 -triple amdgcn-amd-amdhsa -target-cpu gfx1200 -emit-llvm -fcuda-is-device -o - %s | FileCheck %s --check-prefix=CHECK-GFX1200

#define __device__ __attribute__((device))
#define __shared__ __attribute__((shared))
#define __constant__ __attribute__((constant))

extern "C" __device__ int bar(const char *s);

// CHECK-GFX1200-LABEL: define dso_local noundef i32 @_Z4foo1v(
// CHECK-GFX1200-SAME: ) #[[ATTR0:[0-9]+]] {
// CHECK-GFX1200-NEXT: [[ENTRY:.*:]]
// CHECK-GFX1200-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-GFX1200-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK-GFX1200-NEXT: call void @llvm.amdgcn.s.prefetch.inst.p0(ptr @bar, i32 0)
// CHECK-GFX1200-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr [[S_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[CALL:%.*]] = call i32 @bar(ptr noundef [[TMP0]]) #[[ATTR3:[0-9]+]]
// CHECK-GFX1200-NEXT: ret i32 [[CALL]]
//
__device__ int foo1() {
__builtin_amdgcn_s_prefetch_inst((const void *)bar, 0);
const char *s = "hello world";
return bar(s);
}

// CHECK-GFX1200-LABEL: define dso_local noundef i32 @_Z4foo2i(
// CHECK-GFX1200-SAME: i32 noundef [[ID:%.*]]) #[[ATTR0]] {
// CHECK-GFX1200-NEXT: [[ENTRY:.*:]]
// CHECK-GFX1200-NEXT: [[RETVAL:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-GFX1200-NEXT: [[ID_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-GFX1200-NEXT: [[S:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-GFX1200-NEXT: [[S2:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-GFX1200-NEXT: [[ID_ADDR_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[ID_ADDR]] to ptr
// CHECK-GFX1200-NEXT: [[S_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S]] to ptr
// CHECK-GFX1200-NEXT: [[S2_ASCAST:%.*]] = addrspacecast ptr addrspace(5) [[S2]] to ptr
// CHECK-GFX1200-NEXT: store i32 [[ID]], ptr [[ID_ADDR_ASCAST]], align 4
// CHECK-GFX1200-NEXT: call void @llvm.amdgcn.s.prefetch.inst.p0(ptr blockaddress(@_Z4foo2i, %[[NOBAR:.*]]), i32 0)
// CHECK-GFX1200-NEXT: [[TMP0:%.*]] = load i32, ptr [[ID_ADDR_ASCAST]], align 4
// CHECK-GFX1200-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
// CHECK-GFX1200-NEXT: br i1 [[CMP]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
// CHECK-GFX1200: [[IF_THEN]]:
// CHECK-GFX1200-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str to ptr), ptr [[S_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[TMP1:%.*]] = load ptr, ptr [[S_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[CALL:%.*]] = call i32 @bar(ptr noundef [[TMP1]]) #[[ATTR3]]
// CHECK-GFX1200-NEXT: store i32 [[CALL]], ptr addrspace(5) [[RETVAL]], align 4
// CHECK-GFX1200-NEXT: br label %[[RETURN:.*]]
// CHECK-GFX1200: [[IF_END]]:
// CHECK-GFX1200-NEXT: br label %[[NOBAR]]
// CHECK-GFX1200: [[NOBAR]]:
// CHECK-GFX1200-NEXT: store ptr addrspacecast (ptr addrspace(4) @.str.1 to ptr), ptr [[S2_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[TMP2:%.*]] = load ptr, ptr [[S2_ASCAST]], align 8
// CHECK-GFX1200-NEXT: [[CALL1:%.*]] = call i32 @bar(ptr noundef [[TMP2]]) #[[ATTR3]]
// CHECK-GFX1200-NEXT: store i32 [[CALL1]], ptr addrspace(5) [[RETVAL]], align 4
// CHECK-GFX1200-NEXT: br label %[[RETURN]]
// CHECK-GFX1200: [[RETURN]]:
// CHECK-GFX1200-NEXT: [[TMP3:%.*]] = load i32, ptr addrspace(5) [[RETVAL]], align 4
// CHECK-GFX1200-NEXT: ret i32 [[TMP3]]
// CHECK-GFX1200: [[INDIRECTGOTO:.*:]]
// CHECK-GFX1200-NEXT: indirectbr ptr poison, [label %[[NOBAR]]]
//
__device__ int foo2(int id) {
__builtin_amdgcn_s_prefetch_inst(&&NOBAR, 0);
if (id == 0) {
const char *s = "hello world";
return bar(s);
}
NOBAR:
const char *s2 = "skip hello";
return bar(s2);
}
26 changes: 26 additions & 0 deletions clang/test/CodeGenOpenCL/builtins-amdgcn-gfx12.cl
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,32 @@ void test_s_prefetch_data(int *fp, global float *gp, constant char *cp, unsigned
__builtin_amdgcn_s_prefetch_data(cp, 31);
}

// CHECK-LABEL: @test_s_prefetch_inst(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[FP_ADDR:%.*]] = alloca ptr, align 8, addrspace(5)
// CHECK-NEXT: [[GP_ADDR:%.*]] = alloca ptr addrspace(1), align 8, addrspace(5)
// CHECK-NEXT: [[CP_ADDR:%.*]] = alloca ptr addrspace(4), align 8, addrspace(5)
// CHECK-NEXT: [[LEN_ADDR:%.*]] = alloca i32, align 4, addrspace(5)
// CHECK-NEXT: store ptr [[FP:%.*]], ptr addrspace(5) [[FP_ADDR]], align 8
// CHECK-NEXT: store ptr addrspace(1) [[GP:%.*]], ptr addrspace(5) [[GP_ADDR]], align 8
// CHECK-NEXT: store ptr addrspace(4) [[CP:%.*]], ptr addrspace(5) [[CP_ADDR]], align 8
// CHECK-NEXT: store i32 [[LEN:%.*]], ptr addrspace(5) [[LEN_ADDR]], align 4
// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr addrspace(5) [[FP_ADDR]], align 8
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.inst.p0(ptr [[TMP0]], i32 0)
// CHECK-NEXT: [[TMP1:%.*]] = load ptr addrspace(1), ptr addrspace(5) [[GP_ADDR]], align 8
// CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr addrspace(5) [[LEN_ADDR]], align 4
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.inst.p1(ptr addrspace(1) [[TMP1]], i32 [[TMP2]])
// CHECK-NEXT: [[TMP3:%.*]] = load ptr addrspace(4), ptr addrspace(5) [[CP_ADDR]], align 8
// CHECK-NEXT: call void @llvm.amdgcn.s.prefetch.inst.p4(ptr addrspace(4) [[TMP3]], i32 31)
// CHECK-NEXT: ret void
//
void test_s_prefetch_inst(int *fp, global float *gp, constant char *cp, unsigned int len)
{
__builtin_amdgcn_s_prefetch_inst(fp, 0);
__builtin_amdgcn_s_prefetch_inst(gp, len);
__builtin_amdgcn_s_prefetch_inst(cp, 31);
}

// CHECK-LABEL: @test_s_buffer_prefetch_data(
// CHECK-NEXT: entry:
// CHECK-NEXT: [[RSRC_ADDR:%.*]] = alloca ptr addrspace(8), align 16, addrspace(5)
Expand Down
Loading