Skip to content

Commit 8860fcc

Browse files
Handle patch token for implicit args buffer
Related-To: NEO-5081, IGC-4710 Signed-off-by: Mateusz Jablonski <[email protected]>
1 parent ea6f089 commit 8860fcc

File tree

8 files changed

+39
-1
lines changed

8 files changed

+39
-1
lines changed

shared/source/device_binary_format/patchtokens_decoder.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,9 @@ inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *toke
290290
case DATA_PARAMETER_PREFERRED_WORKGROUP_MULTIPLE:
291291
crossthread.preferredWorkgroupMultiple = token;
292292
break;
293+
case DATA_PARAMETER_IMPL_ARG_BUFFER:
294+
out.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset = token;
295+
break;
293296

294297
case DATA_PARAMETER_NUM_HARDWARE_THREADS:
295298
case DATA_PARAMETER_PRINTF_SURFACE_SIZE:
@@ -300,6 +303,7 @@ inline void decodeKernelDataParameterToken(const SPatchDataParameterBuffer *toke
300303
case DATA_PARAMETER_EXECUTION_MASK:
301304
case DATA_PARAMETER_VME_IMAGE_TYPE:
302305
case DATA_PARAMETER_VME_MB_SKIP_BLOCK_TYPE:
306+
case DATA_PARAMETER_LOCAL_ID_BUFFER:
303307
// ignored intentionally
304308
break;
305309
}

shared/source/device_binary_format/patchtokens_decoder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -157,6 +157,7 @@ struct KernelFromPatchtokens {
157157
const SPatchDataParameterBuffer *localMemoryStatelessWindowStartAddress = nullptr;
158158
const SPatchDataParameterBuffer *preferredWorkgroupMultiple = nullptr;
159159
StackVec<const SPatchDataParameterBuffer *, 4> childBlockSimdSize;
160+
const SPatchDataParameterBuffer *implicitArgsBufferOffset = nullptr;
160161
} crossThreadPayloadArgs;
161162
} tokens;
162163

shared/source/device_binary_format/patchtokens_dumper.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,7 @@ std::string asString(DATA_PARAMETER_TOKEN dataParameter) {
135135
CASE_TOK_STR(DATA_PARAMETER_STAGE_IN_GRID_SIZE);
136136
CASE_TOK_STR(DATA_PARAMETER_BUFFER_OFFSET);
137137
CASE_TOK_STR(DATA_PARAMETER_BUFFER_STATEFUL);
138+
CASE_TOK_STR(DATA_PARAMETER_IMPL_ARG_BUFFER);
138139
}
139140
}
140141
#undef CASE_TOK_STR
@@ -764,6 +765,7 @@ std::string asString(const KernelFromPatchtokens &kern) {
764765
dumpOrNull(kern.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress, "", stream, indentLevel1);
765766
dumpOrNull(kern.tokens.crossThreadPayloadArgs.preferredWorkgroupMultiple, "", stream, indentLevel1);
766767
dumpVecIfNotEmpty(kern.tokens.crossThreadPayloadArgs.childBlockSimdSize, "Child block simd size(s)", stream, indentLevel1);
768+
dumpOrNull(kern.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset, "", stream, indentLevel1);
767769

768770
if (kern.tokens.kernelArgs.size() != 0) {
769771
stream << "Kernel arguments [" << kern.tokens.kernelArgs.size() << "] :\n";

shared/source/kernel/kernel_descriptor.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -266,6 +266,7 @@ struct KernelDescriptor {
266266
CrossThreadDataOffset preferredWkgMultiple = undefined<CrossThreadDataOffset>;
267267
CrossThreadDataOffset localMemoryStatelessWindowSize = undefined<CrossThreadDataOffset>;
268268
CrossThreadDataOffset localMemoryStatelessWindowStartAddres = undefined<CrossThreadDataOffset>;
269+
CrossThreadDataOffset implcitArgsBuffer = undefined<CrossThreadDataOffset>;
269270
} implicitArgs;
270271

271272
std::vector<std::unique_ptr<ArgDescriptorExtended>> explicitArgsExtendedDescriptors;

shared/source/kernel/kernel_descriptor_from_patchtokens.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -510,13 +510,15 @@ void populateKernelDescriptor(KernelDescriptor &dst, const PatchTokenBinary::Ker
510510
dst.payloadMappings.implicitArgs.privateMemorySize = getOffset(src.tokens.crossThreadPayloadArgs.privateMemoryStatelessSize);
511511
dst.payloadMappings.implicitArgs.localMemoryStatelessWindowSize = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowSize);
512512
dst.payloadMappings.implicitArgs.localMemoryStatelessWindowStartAddres = getOffset(src.tokens.crossThreadPayloadArgs.localMemoryStatelessWindowStartAddress);
513+
dst.payloadMappings.implicitArgs.implcitArgsBuffer = getOffset(src.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset);
513514

514515
if (src.tokens.gtpinInfo) {
515516
dst.external.igcInfoForGtpin = (src.tokens.gtpinInfo + 1);
516517
}
517518

518519
dst.kernelAttributes.binaryFormat = DeviceBinaryFormat::Patchtokens;
519520
dst.kernelAttributes.gpuPointerSize = gpuPointerSizeInBytes;
521+
dst.kernelAttributes.flags.requiresImplicitArgs = src.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset != nullptr;
520522

521523
if (DebugManager.flags.UpdateCrossThreadDataSize.get()) {
522524
dst.updateCrossThreadDataSize();

shared/test/unit_test/device_binary_format/patchtokens_decoder_tests.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ TEST(KernelDecoder, GivenKernelWithValidNonArgCrossThreadDataPatchtokensThenDeco
578578
auto childBlockSimdSize0Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage);
579579
auto childBlockSimdSize1Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage);
580580
auto childBlockSimdSize2Off = pushBackDataParameterToken(DATA_PARAMETER_CHILD_BLOCK_SIMD_SIZE, storage);
581+
auto implictArgBufferOffset = pushBackDataParameterToken(DATA_PARAMETER_IMPL_ARG_BUFFER, storage);
581582

582583
ASSERT_EQ(storage.data(), kernelToEncode.blobs.kernelInfo.begin());
583584
auto kernelHeader = reinterpret_cast<iOpenCL::SKernelBinaryHeaderCommon *>(storage.data());
@@ -620,6 +621,7 @@ TEST(KernelDecoder, GivenKernelWithValidNonArgCrossThreadDataPatchtokensThenDeco
620621
EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize0Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[0]));
621622
EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize1Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[1]));
622623
EXPECT_TRUE(tokenOffsetMatched(base, childBlockSimdSize2Off, decodedKernel.tokens.crossThreadPayloadArgs.childBlockSimdSize[2]));
624+
EXPECT_TRUE(tokenOffsetMatched(base, implictArgBufferOffset, decodedKernel.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset));
623625
}
624626

625627
TEST(KernelDecoder, GivenKernelWithArgCrossThreadDataPatchtokensWhenSourceIndexIsGreaterThan2ThenThenDecodingSucceedsButTokenIsMarkedAsUnhandled) {

shared/test/unit_test/device_binary_format/patchtokens_dumper_tests.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2094,7 +2094,7 @@ TEST(PatchTokenDumper, GivenAnyTokenThenDumpingIsHandled) {
20942094
auto kernelDataParamToken = static_cast<iOpenCL::SPatchDataParameterBuffer *>(kernelToken);
20952095
*kernelDataParamToken = PatchTokensTestData::initDataParameterBufferToken(iOpenCL::DATA_PARAMETER_BUFFER_OFFSET);
20962096
kernelDataParamToken->Size = maxTokenSize;
2097-
std::unordered_set<int> dataParamTokensPasslist{6, 7, 17, 19, 36, 37, 39, 40, 41};
2097+
std::unordered_set<int> dataParamTokensPasslist{6, 7, 17, 19, 36, 37, 39, 40, 41, iOpenCL::DATA_PARAMETER_LOCAL_ID_BUFFER};
20982098
for (int i = 0; i < iOpenCL::NUM_DATA_PARAMETER_TOKENS; ++i) {
20992099
if (dataParamTokensPasslist.count(i) != 0) {
21002100
continue;

shared/test/unit_test/kernel/kernel_descriptor_from_patchtokens_tests.cpp

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1571,4 +1571,30 @@ TEST(KernelDescriptorFromPatchtokens, GivenUpdateCrossThreadDataSizeFalseWhenPop
15711571
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 8);
15721572

15731573
EXPECT_EQ(32u, kernelDescriptor.kernelAttributes.crossThreadDataSize);
1574+
}
1575+
1576+
TEST(KernelDescriptorFromPatchtokens, givenDataParameterImplArgBufferTokenWhenPopulateKernelDescriptorThenProperOffsetIsSetAndImplicitArgsAreRequired) {
1577+
NEO::PatchTokenBinary::KernelFromPatchtokens kernelTokens;
1578+
iOpenCL::SKernelBinaryHeaderCommon kernelHeader;
1579+
kernelTokens.header = &kernelHeader;
1580+
1581+
NEO::KernelDescriptor kernelDescriptor;
1582+
1583+
uint16_t offset = 0x30;
1584+
1585+
iOpenCL::SPatchDataParameterBuffer dataParameterToken{};
1586+
dataParameterToken.Token = iOpenCL::PATCH_TOKEN_DATA_PARAMETER_BUFFER;
1587+
dataParameterToken.Size = sizeof(iOpenCL::SPatchDataParameterBuffer);
1588+
dataParameterToken.Type = iOpenCL::DATA_PARAMETER_IMPL_ARG_BUFFER;
1589+
dataParameterToken.ArgumentNumber = 0;
1590+
dataParameterToken.Offset = offset;
1591+
dataParameterToken.DataSize = sizeof(uint32_t);
1592+
dataParameterToken.SourceOffset = 0;
1593+
1594+
kernelTokens.tokens.crossThreadPayloadArgs.implicitArgsBufferOffset = &dataParameterToken;
1595+
1596+
NEO::populateKernelDescriptor(kernelDescriptor, kernelTokens, 8);
1597+
1598+
EXPECT_EQ(offset, kernelDescriptor.payloadMappings.implicitArgs.implcitArgsBuffer);
1599+
EXPECT_TRUE(kernelDescriptor.kernelAttributes.flags.requiresImplicitArgs);
15741600
}

0 commit comments

Comments
 (0)