Skip to content
31 changes: 29 additions & 2 deletions tools/clang/unittests/HLSLExec/LongVectorOps.def
Original file line number Diff line number Diff line change
Expand Up @@ -212,12 +212,39 @@ OP_DEFAULT_DEFINES(Wave, WaveActiveBitXor, 1, "TestWaveActiveBitXor", "", " -DFU
OP_DEFAULT_DEFINES(Wave, WaveActiveAllEqual, 1, "TestWaveActiveAllEqual", "", " -DFUNC_WAVE_ACTIVE_ALL_EQUAL=1")
OP_DEFAULT_DEFINES(Wave, WaveReadLaneAt, 1, "TestWaveReadLaneAt", "", " -DFUNC_WAVE_READ_LANE_AT=1")
OP_DEFAULT_DEFINES(Wave, WaveReadLaneFirst, 1, "TestWaveReadLaneFirst", "", " -DFUNC_WAVE_READ_LANE_FIRST=1")
OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1")
OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1")
OP_DEFAULT_DEFINES(Wave, WavePrefixSum, 1, "TestWavePrefixSum", "", " -DFUNC_WAVE_PREFIX_SUM=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1")
OP_DEFAULT_DEFINES(Wave, WavePrefixProduct, 1, "TestWavePrefixProduct", "", " -DFUNC_WAVE_PREFIX_PRODUCT=1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1")
OP(Wave, WaveMultiPrefixSum, 1, "TestWaveMultiPrefixSum", "", " -DFUNC_WAVE_MULTI_PREFIX_SUM=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3)
OP(Wave, WaveMultiPrefixProduct, 1, "TestWaveMultiPrefixProduct", "", " -DFUNC_WAVE_MULTI_PREFIX_PRODUCT=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", Default1, Default2, Default3)
OP(Wave, WaveMultiPrefixBitAnd, 1, "TestWaveMultiPrefixBitAnd", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_AND=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)
OP(Wave, WaveMultiPrefixBitOr, 1, "TestWaveMultiPrefixBitOr", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_OR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)
OP(Wave, WaveMultiPrefixBitXor, 1, "TestWaveMultiPrefixBitXor", "", " -DFUNC_WAVE_MULTI_PREFIX_BIT_XOR=1 -DIS_WAVE_PREFIX_OP=1", "LongVectorOp", WaveMultiPrefixBitwise, Default2, Default3)

#define OP_DERIVATIVE(GROUP, SYMBOL, DERIVATIVE_INTRINSIC) \
OP(GROUP, SYMBOL, 1, "TestDerivative", "", "-DFUNC_TEST_DERIVATIVE=1 \
-DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \
" -DDERIVATIVE_FUNC=" DERIVATIVE_INTRINSIC, \
"LongVectorOp", Default2, Default1, Default3)

OP_DERIVATIVE(Derivative, DerivativeDdx, "ddx")
OP_DERIVATIVE(Derivative, DerivativeDdy, "ddy")
OP_DERIVATIVE(Derivative, DerivativeDdxFine, "ddx_fine")
OP_DERIVATIVE(Derivative, DerivativeDdyFine, "ddy_fine")

#undef OP_DERIVATIVE

#define OP_QUAD_READ(GROUP, ARITY, SYMBOL, QUAD_INTRINSIC, SOURCE_LANE_ID) \
OP(GROUP, SYMBOL, ARITY, "TestQuadRead", "", "-DFUNC_TEST_QUAD_READ=1" \
" -DNUMTHREADS_XYZ=2,2,1 -DOP_STORES_RESULT_ON_SPECIFIC_LANE=1" \
" -DQUAD_READ_FUNC=" QUAD_INTRINSIC \
" -DSOURCE_LANE_ID=" SOURCE_LANE_ID, \
"LongVectorOp", Default1, Default2, Default3)

OP_QUAD_READ(Quad, 2, QuadReadLaneAt, "QuadReadLaneAt", "2")
OP_QUAD_READ(Quad, 1, QuadReadAcrossX, "QuadReadAcrossX", "2")
OP_QUAD_READ(Quad, 1, QuadReadAcrossY, "QuadReadAcrossY", "1")
OP_QUAD_READ(Quad, 1, QuadReadAcrossDiagonal, "QuadReadAcrossDiagonal", "0")

#undef OP_QUAD_READ

#undef OP
105 changes: 104 additions & 1 deletion tools/clang/unittests/HLSLExec/LongVectors.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1339,6 +1339,55 @@ template <typename T> struct ExpectedBuilder<OpType::ModF, T> {
}
};

//
// Derivative Ops
//

// Coarse derivatives (ddx/ddy): All lanes in quad get same result
// Fine derivatives (ddx_fine/ddy_fine): Each lane gets unique result
// For testing, we validate results on lane 3 to keep validation generic
//
// The value of A in each lane is computed by : A = A + LaneID*2
//
// Top right (lane 1) - Top Left (lane 0)
DEFAULT_OP_1(OpType::DerivativeDdx, ((A + 2) - (A + 0)));
// Lower left (lane 2) - Top Left (lane 0)
DEFAULT_OP_1(OpType::DerivativeDdy, ((A + 4) - (A + 0)));

// Bottom right (lane 3) - Bottom left (lane 2)
DEFAULT_OP_1(OpType::DerivativeDdxFine, ((A + 6) - (A + 4)));
// Bottom right (lane 3) - Top right (lane 1)
DEFAULT_OP_1(OpType::DerivativeDdyFine, ((A + 6) - (A + 2)));

//
// Quad Read Ops
//

// We keep things generic so we can re-use this macro for all quad ops.
// The lane we write to is determined via a defines in the shader code.
// See TestQuadRead in ShaderOpArith.xml.
// For all cases we simply fill the vector on that lane with the value of the
// third element.
#define QUAD_READ_OP(OP, ARITY) \
template <typename T> struct Op<OP, T, ARITY> : DefaultValidation<T> {}; \
template <typename T> struct ExpectedBuilder<OP, T> { \
static std::vector<T> buildExpected(Op<OP, T, ARITY> &, \
const InputSets<T> &Inputs) { \
DXASSERT_NOMSG(Inputs.size() == ARITY); \
std::vector<T> Expected; \
const size_t VectorSize = Inputs[0].size(); \
Expected.assign(VectorSize, Inputs[0][2]); \
return Expected; \
} \
};

QUAD_READ_OP(OpType::QuadReadLaneAt, 2);
QUAD_READ_OP(OpType::QuadReadAcrossX, 1);
QUAD_READ_OP(OpType::QuadReadAcrossY, 1);
QUAD_READ_OP(OpType::QuadReadAcrossDiagonal, 1);

#undef QUAD_READ_OP

//
// Wave Ops
//
Expand Down Expand Up @@ -1658,7 +1707,7 @@ void dispatchWaveOpTest(ID3D12Device *D3DDevice, bool VerboseLogging,

const std::string AdditionalCompilerOptions =
"-DWAVE_SIZE=" + std::to_string(WaveSize) +
" -DNUMTHREADS_X=" + std::to_string(WaveSize);
" -DNUMTHREADS_XYZ=" + std::to_string(WaveSize) + ",1,1 ";

for (size_t VectorSize : InputVectorSizes) {
std::vector<std::vector<T>> Inputs =
Expand Down Expand Up @@ -2458,6 +2507,60 @@ class DxilConf_SM69_Vectorized {
HLK_TEST(LoadAndStore_RD_SB_SRV, double);
HLK_TEST(LoadAndStore_RD_SB_UAV, double);

// Derivative
HLK_TEST(DerivativeDdx, HLSLHalf_t);
HLK_TEST(DerivativeDdy, HLSLHalf_t);
HLK_TEST(DerivativeDdxFine, HLSLHalf_t);
HLK_TEST(DerivativeDdyFine, HLSLHalf_t);
HLK_TEST(DerivativeDdx, float);
HLK_TEST(DerivativeDdy, float);
HLK_TEST(DerivativeDdxFine, float);
HLK_TEST(DerivativeDdyFine, float);

// Quad
HLK_TEST(QuadReadLaneAt, HLSLBool_t);
HLK_TEST(QuadReadAcrossX, HLSLBool_t);
HLK_TEST(QuadReadAcrossY, HLSLBool_t);
HLK_TEST(QuadReadAcrossDiagonal, HLSLBool_t);
HLK_TEST(QuadReadLaneAt, int16_t);
HLK_TEST(QuadReadAcrossX, int16_t);
HLK_TEST(QuadReadAcrossY, int16_t);
HLK_TEST(QuadReadAcrossDiagonal, int16_t);
HLK_TEST(QuadReadLaneAt, int32_t);
HLK_TEST(QuadReadAcrossX, int32_t);
HLK_TEST(QuadReadAcrossY, int32_t);
HLK_TEST(QuadReadAcrossDiagonal, int32_t);
HLK_TEST(QuadReadLaneAt, int64_t);
HLK_TEST(QuadReadAcrossX, int64_t);
HLK_TEST(QuadReadAcrossY, int64_t);
HLK_TEST(QuadReadAcrossDiagonal, int64_t);
HLK_TEST(QuadReadLaneAt, uint16_t);
HLK_TEST(QuadReadAcrossX, uint16_t);
HLK_TEST(QuadReadAcrossY, uint16_t);
HLK_TEST(QuadReadAcrossDiagonal, uint16_t);
HLK_TEST(QuadReadLaneAt, uint32_t);
HLK_TEST(QuadReadAcrossX, uint32_t);
HLK_TEST(QuadReadAcrossY, uint32_t);
HLK_TEST(QuadReadAcrossDiagonal, uint32_t);
HLK_TEST(QuadReadLaneAt, uint64_t);
HLK_TEST(QuadReadAcrossX, uint64_t);
HLK_TEST(QuadReadAcrossY, uint64_t);
HLK_TEST(QuadReadAcrossDiagonal, uint64_t);
HLK_TEST(QuadReadLaneAt, HLSLHalf_t);
HLK_TEST(QuadReadAcrossX, HLSLHalf_t);
HLK_TEST(QuadReadAcrossY, HLSLHalf_t);
HLK_TEST(QuadReadAcrossDiagonal, HLSLHalf_t);
HLK_TEST(QuadReadLaneAt, float);
HLK_TEST(QuadReadAcrossX, float);
HLK_TEST(QuadReadAcrossY, float);
HLK_TEST(QuadReadAcrossDiagonal, float);
HLK_TEST(QuadReadLaneAt, double);
HLK_TEST(QuadReadAcrossX, double);
HLK_TEST(QuadReadAcrossY, double);
HLK_TEST(QuadReadAcrossDiagonal, double);

// Wave

HLK_WAVEOP_TEST(WaveActiveAllEqual, HLSLBool_t);
HLK_WAVEOP_TEST(WaveReadLaneAt, HLSLBool_t);
HLK_WAVEOP_TEST(WaveReadLaneFirst, HLSLBool_t);
Expand Down
66 changes: 62 additions & 4 deletions tools/clang/unittests/HLSLExec/ShaderOpArith.xml
Original file line number Diff line number Diff line change
Expand Up @@ -4437,8 +4437,67 @@ void MSMain(uint GID : SV_GroupIndex,
}
#endif

#ifdef NUMTHREADS_X
#define NUMTHREADS_ATTR [numthreads(NUMTHREADS_X, 1, 1)]
#ifdef FUNC_TEST_DERIVATIVE
void TestDerivative(vector<TYPE, NUM> Vector)
{
// 0 == upper-left lane in quad
// 1 == upper-right lane in quad
// 2 == lower-left lane in quad
// 3 == lower-right lane in quad

const uint LaneIndex = WaveGetLaneIndex();

// We need to make sure the values are unique across lanes used in the
// partial derivative calculation so we can get a non-zero partial
// derivative. Multiplying the lane index by 2 is a simple way to
// ensure that. And we do this on all lanes so this function can be
// used generically for coarse and fine partial derivatives.
Vector += ((TYPE)(LaneIndex * 2));

vector<OUT_TYPE, NUM> Result = DERIVATIVE_FUNC(Vector);

// For coarse derivatives, all lanes in the quad get the same result.
// But for fine derivatives, each lane gets a different result. To
// keep things generic we only store in the third lane as thats the
// lane we arbitrarily chose for validation with fine derivatives.
if(LaneIndex == 3)
{
g_OutputVector.Store< vector<OUT_TYPE, NUM> >(0, Result);
}
}
#endif

#ifdef FUNC_TEST_QUAD_READ
void TestQuadRead(vector<TYPE, NUM> Vector)
{
const uint LaneIndex = WaveGetLaneIndex();

// Fill the long vector with something different on SOURCE_LANE_ID.
// We choose the 3rd element arbitrarily because it makes it easy
// to compute expected values CPU side.
[unroll]
for(uint i = 0; i < NUM; ++i)
{
Vector[i] = (LaneIndex == SOURCE_LANE_ID) ? Vector[2] : Vector[i];
}

#if IS_BINARY_OP
// QuadReadLaneAt
vector<OUT_TYPE, NUM> Result = QUAD_READ_FUNC(Vector, SOURCE_LANE_ID);
#else
// QuadReadAcross*
vector<OUT_TYPE, NUM> Result = QUAD_READ_FUNC(Vector);
#endif

if(LaneIndex == 3)
{
g_OutputVector.Store< vector<OUT_TYPE, NUM> >(0, Result);
}
}
#endif

#ifdef NUMTHREADS_XYZ
#define NUMTHREADS_ATTR [numthreads(NUMTHREADS_XYZ)]
#else
#define NUMTHREADS_ATTR [numthreads(1, 1, 1)]
#endif
Expand Down Expand Up @@ -4479,8 +4538,7 @@ void MSMain(uint GID : SV_GroupIndex,
#endif

vector<OUT_TYPE, OutNum> OutputVector;
#ifdef IS_WAVE_PREFIX_OP
// Wave prefix ops store the output on a specific lane only.
#ifdef OP_STORES_RESULT_ON_SPECIFIC_LANE
FUNC(Input1);
return;
#elif TEST_ARRAY_OPERATOR
Expand Down