Skip to content

Commit

Permalink
MSL: Use templated array type when emitting BDA to arrays.
Browse files Browse the repository at this point in the history
Handling native array types is not really feasible since we need to fuse
the variable declaration with the type declaration.

This is feasible in something like variable_decl, but for plain SSA
pointers, this breaks down.
  • Loading branch information
HansKristian-Work committed May 10, 2023
1 parent 193ca86 commit cfd1618
Show file tree
Hide file tree
Showing 5 changed files with 263 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
#pragma clang diagnostic ignored "-Wmissing-prototypes"
#pragma clang diagnostic ignored "-Wmissing-braces"

#include <metal_stdlib>
#include <simd/simd.h>

using namespace metal;

template<typename T, size_t Num>
struct spvUnsafeArray
{
T elements[Num ? Num : 1];

thread T& operator [] (size_t pos) thread
{
return elements[pos];
}
constexpr const thread T& operator [] (size_t pos) const thread
{
return elements[pos];
}

device T& operator [] (size_t pos) device
{
return elements[pos];
}
constexpr const device T& operator [] (size_t pos) const device
{
return elements[pos];
}

constexpr const constant T& operator [] (size_t pos) const constant
{
return elements[pos];
}

threadgroup T& operator [] (size_t pos) threadgroup
{
return elements[pos];
}
constexpr const threadgroup T& operator [] (size_t pos) const threadgroup
{
return elements[pos];
}
};

struct _7
{
device uchar* _m0;
device spvUnsafeArray<uchar, 12>* _m1;
};

static inline __attribute__((always_inline))
device uint* _23(device _7& _2)
{
device spvUnsafeArray<uchar, 12>* _26 = _2._m1;
device uint* _29 = reinterpret_cast<device uint*>(reinterpret_cast<ulong>(_26) + 16ul);
*_29 = 1u;
return _29;
}

kernel void main0(device _7& _2 [[buffer(0)]])
{
device uint* _31 = _23(_2);
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#version 450
#extension GL_EXT_shader_explicit_arithmetic_types_int8 : require
#extension GL_EXT_shader_8bit_storage : require
#if defined(GL_ARB_gpu_shader_int64)
#extension GL_ARB_gpu_shader_int64 : require
#else
#error No extension available for 64-bit integers.
#endif
#extension GL_EXT_buffer_reference : require
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;

layout(buffer_reference) buffer uint8_tPointer
{
uint8_t value;
};

layout(buffer_reference, buffer_reference_align = 4) buffer uintPointer
{
uint value;
};

layout(buffer_reference, buffer_reference_align = 1) buffer uint8_t12_Pointer
{
uint8_t value[12];
};

layout(set = 0, binding = 0, std430) buffer _7_2
{
uint8_tPointer _m0;
uint8_t12_Pointer _m1;
} _2;

uintPointer _23()
{
uint8_t12_Pointer _26 = _2._m1;
uintPointer _29 = uintPointer(uint64_t(_26) + 16ul);
_29.value = 1u;
return _29;
}

void main()
{
uintPointer _31 = _23();
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.5
; Generator: Khronos; 35
; Bound: 5550
; Schema: 0
OpCapability Int8
OpCapability Int64
OpCapability Int64
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Simple
OpEntryPoint GLCompute %main "main" %globals
OpExecutionMode %main LocalSize 16 16 1

OpDecorate %ptr_uchar ArrayStride 8
OpDecorate %ptr_uint ArrayStride 8
OpDecorate %ptr_array_t ArrayStride 8
OpDecorate %array_t ArrayStride 1
OpDecorate %struct_t Block
OpMemberDecorate %struct_t 0 Offset 0
OpMemberDecorate %struct_t 1 Offset 8
OpDecorate %ptr_struct ArrayStride 32
OpDecorate %globals DescriptorSet 0
OpDecorate %globals Binding 0

%void = OpTypeVoid
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%ulong = OpTypeInt 64 0
%bool = OpTypeBool

%ulong_12 = OpConstant %ulong 12
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uchar_69 = OpConstant %uchar 69
%ulong_16 = OpConstant %ulong 16

%ptr_uint = OpTypePointer PhysicalStorageBuffer %uint
%ptr_uchar = OpTypePointer PhysicalStorageBuffer %uchar

%array_t = OpTypeArray %uchar %ulong_12
%ptr_array_t = OpTypePointer PhysicalStorageBuffer %array_t

%struct_t = OpTypeStruct %ptr_uchar %ptr_array_t
%ptr_struct = OpTypePointer StorageBuffer %struct_t

%void_fn = OpTypeFunction %void
%foo_t = OpTypeFunction %ptr_uint

%ptr_uchararr_sb = OpTypePointer StorageBuffer %ptr_array_t

%globals = OpVariable %ptr_struct StorageBuffer

%foo = OpFunction %ptr_uint None %foo_t
%foo_entry = OpLabel
%lea2 = OpAccessChain %ptr_uchararr_sb %globals %uint_1
%loaded2 = OpLoad %ptr_array_t %lea2
%cast = OpConvertPtrToU %ulong %loaded2
%adjusted = OpIAdd %ulong %cast %ulong_16
%cast2 = OpConvertUToPtr %ptr_uint %adjusted
OpStore %cast2 %uint_1 Aligned 4 ; eliminating this store generates different code and the problem disappears
OpReturnValue %cast2
OpFunctionEnd

%main = OpFunction %void None %void_fn
%main_entry = OpLabel
%nothing = OpFunctionCall %ptr_uint %foo
OpReturn
OpFunctionEnd
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
; SPIR-V
; Version: 1.5
; Generator: Khronos; 35
; Bound: 5550
; Schema: 0
OpCapability Int8
OpCapability Int64
OpCapability Int64
OpCapability Shader
OpCapability PhysicalStorageBufferAddresses
OpExtension "SPV_KHR_physical_storage_buffer"
OpMemoryModel PhysicalStorageBuffer64 Simple
OpEntryPoint GLCompute %main "main" %globals
OpExecutionMode %main LocalSize 16 16 1

OpDecorate %ptr_uchar ArrayStride 8
OpDecorate %ptr_uint ArrayStride 8
OpDecorate %ptr_array_t ArrayStride 8
OpDecorate %array_t ArrayStride 1
OpDecorate %struct_t Block
OpMemberDecorate %struct_t 0 Offset 0
OpMemberDecorate %struct_t 1 Offset 8
OpDecorate %ptr_struct ArrayStride 32
OpDecorate %globals DescriptorSet 0
OpDecorate %globals Binding 0

%void = OpTypeVoid
%uchar = OpTypeInt 8 0
%uint = OpTypeInt 32 0
%ulong = OpTypeInt 64 0
%bool = OpTypeBool

%ulong_12 = OpConstant %ulong 12
%uint_0 = OpConstant %uint 0
%uint_1 = OpConstant %uint 1
%uint_2 = OpConstant %uint 2
%uchar_69 = OpConstant %uchar 69
%ulong_16 = OpConstant %ulong 16

%ptr_uint = OpTypePointer PhysicalStorageBuffer %uint
%ptr_uchar = OpTypePointer PhysicalStorageBuffer %uchar

%array_t = OpTypeArray %uchar %ulong_12
%ptr_array_t = OpTypePointer PhysicalStorageBuffer %array_t

%struct_t = OpTypeStruct %ptr_uchar %ptr_array_t
%ptr_struct = OpTypePointer StorageBuffer %struct_t

%void_fn = OpTypeFunction %void
%foo_t = OpTypeFunction %ptr_uint

%ptr_uchararr_sb = OpTypePointer StorageBuffer %ptr_array_t

%globals = OpVariable %ptr_struct StorageBuffer

%foo = OpFunction %ptr_uint None %foo_t
%foo_entry = OpLabel
%lea2 = OpAccessChain %ptr_uchararr_sb %globals %uint_1
%loaded2 = OpLoad %ptr_array_t %lea2
%cast = OpConvertPtrToU %ulong %loaded2
%adjusted = OpIAdd %ulong %cast %ulong_16
%cast2 = OpConvertUToPtr %ptr_uint %adjusted
OpStore %cast2 %uint_1 Aligned 4 ; eliminating this store generates different code and the problem disappears
OpReturnValue %cast2
OpFunctionEnd

%main = OpFunction %void None %void_fn
%main_entry = OpLabel
%nothing = OpFunctionCall %ptr_uint %foo
OpReturn
OpFunctionEnd
10 changes: 10 additions & 0 deletions spirv_msl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14573,7 +14573,17 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id, bool member)
while (type_is_pointer(*p_parent_type))
p_parent_type = &get<SPIRType>(p_parent_type->parent_type);

// If we're emitting BDA, just use the templated type.
// Emitting builtin arrays need a lot of cooperation with other code to ensure
// the C-style nesting works right.
// FIXME: This is somewhat of a hack.
bool old_is_using_builtin_array = is_using_builtin_array;
if (type.storage == StorageClassPhysicalStorageBuffer)
is_using_builtin_array = false;

type_name = join(type_address_space, " ", type_to_glsl(*p_parent_type, id));

is_using_builtin_array = old_is_using_builtin_array;
}

switch (type.basetype)
Expand Down

0 comments on commit cfd1618

Please sign in to comment.