forked from KhronosGroup/SPIRV-Cross
-
Notifications
You must be signed in to change notification settings - Fork 35
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
MSL: Support
SPV_KHR_shader_ballot
and SPV_KHR_subgroup_vote
.
Normally, I wouldn't have bothered with this, given that we already support the Vulkan 1.1 subgroup functionality, but a client asked for the legacy extensions.
- Loading branch information
Showing
7 changed files
with
362 additions
and
39 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
#pragma clang diagnostic ignored "-Wmissing-prototypes" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
|
||
using namespace metal; | ||
|
||
template<typename T> | ||
inline T spvSubgroupBroadcast(T value, ushort lane) | ||
{ | ||
return simd_broadcast(value, lane); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupBroadcast(bool value, ushort lane) | ||
{ | ||
return !!simd_broadcast((ushort)value, lane); | ||
} | ||
|
||
template<uint N> | ||
inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane) | ||
{ | ||
return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane); | ||
} | ||
|
||
template<typename T> | ||
inline T spvSubgroupBroadcastFirst(T value) | ||
{ | ||
return simd_broadcast_first(value); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupBroadcastFirst(bool value) | ||
{ | ||
return !!simd_broadcast_first((ushort)value); | ||
} | ||
|
||
template<uint N> | ||
inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value) | ||
{ | ||
return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value); | ||
} | ||
|
||
inline uint4 spvSubgroupBallot(bool value) | ||
{ | ||
simd_vote vote = simd_ballot(value); | ||
// simd_ballot() returns a 64-bit integer-like object, but | ||
// SPIR-V callers expect a uint4. We must convert. | ||
// FIXME: This won't include higher bits if Apple ever supports | ||
// 128 lanes in an SIMD-group. | ||
return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0); | ||
} | ||
|
||
struct inputData | ||
{ | ||
float inputDataArray[1]; | ||
}; | ||
|
||
struct outputData | ||
{ | ||
float outputDataArray[1]; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); | ||
|
||
kernel void main0(device inputData& _12 [[buffer(0)]], device outputData& _87 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) | ||
{ | ||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); | ||
bool _31 = _12.inputDataArray[gl_LocalInvocationID.x] > 0.0; | ||
uint4 _52 = spvSubgroupBallot(_31); | ||
uint4 _66 = uint4(int4(popcount(uint4(as_type<uint2>(as_type<ulong>(uint2(gl_SubgroupLtMask.xy))), 0u, 0u) & uint4(as_type<uint2>(as_type<ulong>(uint2(_52.xy))), 0u, 0u)))); | ||
if (_31) | ||
{ | ||
_87.outputDataArray[_66.x + _66.y] = _12.inputDataArray[gl_LocalInvocationID.x]; | ||
} | ||
} | ||
|
37 changes: 37 additions & 0 deletions
37
reference/opt/shaders-msl/comp/shader_group_vote.msl21.comp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
#pragma clang diagnostic ignored "-Wmissing-prototypes" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
|
||
using namespace metal; | ||
|
||
template<typename T> | ||
inline bool spvSubgroupAllEqual(T value) | ||
{ | ||
return simd_all(all(value == simd_broadcast_first(value))); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupAllEqual(bool value) | ||
{ | ||
return simd_all(value) || !simd_any(value); | ||
} | ||
|
||
template<uint N> | ||
inline bool spvSubgroupAllEqual(vec<bool, N> value) | ||
{ | ||
return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value))); | ||
} | ||
|
||
struct inputData | ||
{ | ||
float inputDataArray[1]; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); | ||
|
||
kernel void main0(device inputData& _12 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) | ||
{ | ||
bool _31 = _12.inputDataArray[gl_LocalInvocationID.x] > 0.0; | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#pragma clang diagnostic ignored "-Wmissing-prototypes" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
|
||
using namespace metal; | ||
|
||
template<typename T> | ||
inline T spvSubgroupBroadcast(T value, ushort lane) | ||
{ | ||
return simd_broadcast(value, lane); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupBroadcast(bool value, ushort lane) | ||
{ | ||
return !!simd_broadcast((ushort)value, lane); | ||
} | ||
|
||
template<uint N> | ||
inline vec<bool, N> spvSubgroupBroadcast(vec<bool, N> value, ushort lane) | ||
{ | ||
return (vec<bool, N>)simd_broadcast((vec<ushort, N>)value, lane); | ||
} | ||
|
||
template<typename T> | ||
inline T spvSubgroupBroadcastFirst(T value) | ||
{ | ||
return simd_broadcast_first(value); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupBroadcastFirst(bool value) | ||
{ | ||
return !!simd_broadcast_first((ushort)value); | ||
} | ||
|
||
template<uint N> | ||
inline vec<bool, N> spvSubgroupBroadcastFirst(vec<bool, N> value) | ||
{ | ||
return (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value); | ||
} | ||
|
||
inline uint4 spvSubgroupBallot(bool value) | ||
{ | ||
simd_vote vote = simd_ballot(value); | ||
// simd_ballot() returns a 64-bit integer-like object, but | ||
// SPIR-V callers expect a uint4. We must convert. | ||
// FIXME: This won't include higher bits if Apple ever supports | ||
// 128 lanes in an SIMD-group. | ||
return uint4(as_type<uint2>((simd_vote::vote_t)vote), 0, 0); | ||
} | ||
|
||
struct inputData | ||
{ | ||
float inputDataArray[1]; | ||
}; | ||
|
||
struct outputData | ||
{ | ||
float outputDataArray[1]; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); | ||
|
||
kernel void main0(device inputData& _12 [[buffer(0)]], device outputData& _87 [[buffer(1)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]]) | ||
{ | ||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0)); | ||
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x]; | ||
bool laneActive = thisLaneData > 0.0; | ||
uint4 activeSlots = uint4(int4(popcount(uint4(as_type<uint2>(as_type<ulong>(uint2(gl_SubgroupLtMask.xy))), 0u, 0u) & uint4(as_type<uint2>(as_type<ulong>(uint2(spvSubgroupBallot(laneActive).xy))), 0u, 0u)))); | ||
uint thisLaneOutputSlot = activeSlots.x + activeSlots.y; | ||
int firstInvocation = spvSubgroupBroadcastFirst(1); | ||
int invocation = spvSubgroupBroadcast(1, 0u); | ||
if (laneActive) | ||
{ | ||
_87.outputDataArray[thisLaneOutputSlot] = thisLaneData; | ||
} | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
#pragma clang diagnostic ignored "-Wmissing-prototypes" | ||
|
||
#include <metal_stdlib> | ||
#include <simd/simd.h> | ||
|
||
using namespace metal; | ||
|
||
template<typename T> | ||
inline bool spvSubgroupAllEqual(T value) | ||
{ | ||
return simd_all(all(value == simd_broadcast_first(value))); | ||
} | ||
|
||
template<> | ||
inline bool spvSubgroupAllEqual(bool value) | ||
{ | ||
return simd_all(value) || !simd_any(value); | ||
} | ||
|
||
template<uint N> | ||
inline bool spvSubgroupAllEqual(vec<bool, N> value) | ||
{ | ||
return simd_all(all(value == (vec<bool, N>)simd_broadcast_first((vec<ushort, N>)value))); | ||
} | ||
|
||
struct inputData | ||
{ | ||
float inputDataArray[1]; | ||
}; | ||
|
||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(64u, 1u, 1u); | ||
|
||
kernel void main0(device inputData& _12 [[buffer(0)]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) | ||
{ | ||
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x]; | ||
bool laneActive = thisLaneData > 0.0; | ||
bool allInvocations = simd_all(laneActive); | ||
bool anyInvocations = simd_any(laneActive); | ||
bool allInvocationsEqual = spvSubgroupAllEqual(laneActive); | ||
} | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
#version 450 | ||
#extension GL_ARB_shader_ballot : require | ||
|
||
layout (local_size_x = 64) in; | ||
layout (std430, binding = 0) buffer inputData | ||
{ | ||
float inputDataArray[]; | ||
}; | ||
|
||
layout (std430, binding = 1) buffer outputData | ||
{ | ||
float outputDataArray[]; | ||
}; | ||
|
||
void main () | ||
{ | ||
float thisLaneData = inputDataArray [gl_LocalInvocationID.x]; | ||
bool laneActive = (thisLaneData > 0); | ||
|
||
uvec4 activeSlots = bitCount(uvec4(unpackUint2x32(gl_SubGroupLtMaskARB), uvec2(0)) & uvec4(unpackUint2x32(ballotARB (laneActive)), uvec2(0))); | ||
uint thisLaneOutputSlot = activeSlots.x + activeSlots.y; | ||
|
||
int firstInvocation = readFirstInvocationARB(1); | ||
int invocation = readInvocationARB(1, 0); | ||
|
||
if (laneActive) { | ||
outputDataArray[thisLaneOutputSlot] = thisLaneData; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#version 450 | ||
#extension GL_ARB_shader_group_vote : require | ||
|
||
layout (local_size_x = 64) in; | ||
layout (std430, binding = 0) buffer inputData | ||
{ | ||
float inputDataArray[]; | ||
}; | ||
|
||
void main () | ||
{ | ||
float thisLaneData = inputDataArray [gl_LocalInvocationID.x]; | ||
bool laneActive = (thisLaneData > 0); | ||
|
||
bool allInvocations = allInvocationsARB(laneActive); | ||
bool anyInvocations = anyInvocationARB(laneActive); | ||
bool allInvocationsEqual = allInvocationsEqualARB(laneActive); | ||
} |
Oops, something went wrong.