Skip to content

Commit 82a6f9e

Browse files
Use compute walker system fence for system memory or events in use
Related-To: NEO-6959 Signed-off-by: Zbigniew Zdanowicz <[email protected]>
1 parent 60c819b commit 82a6f9e

File tree

5 files changed

+337
-56
lines changed

5 files changed

+337
-56
lines changed

level_zero/core/test/unit_tests/xe_hpc_core/test_cmdlist_xe_hpc_core.cpp

Lines changed: 81 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -321,7 +321,8 @@ HWTEST2_F(CommandListAppendRangesBarrierXeHpcCore, givenCallToAppendRangesBarrie
321321
EXPECT_TRUE(pipeControlCmd->getUnTypedDataPortCacheFlush());
322322
}
323323

324-
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhenKernelNotUsingSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXeHpcCore) {
324+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
325+
givenHwSupportsSystemFenceWhenKernelNotUsingSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXeHpcCore) {
325326
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
326327

327328
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -390,7 +391,8 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhen
390391
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
391392
}
392393

393-
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventNotHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
394+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
395+
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXeHpcCore) {
394396
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
395397

396398
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -451,13 +453,14 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhen
451453

452454
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
453455
auto &postSyncData = walkerCmd->getPostSync();
454-
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
456+
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
455457

456458
result = context->freeMem(ptr);
457459
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
458460
}
459461

460-
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhenMigrationOnComputeKernelUsingUsmSharedCpuMemoryAllocationsAndEventNotHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
462+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
463+
givenHwSupportsSystemFenceWhenMigrationOnComputeKernelUsingUsmSharedCpuMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXeHpcCore) {
461464
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
462465

463466
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -506,13 +509,14 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhen
506509

507510
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
508511
auto &postSyncData = walkerCmd->getPostSync();
509-
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
512+
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
510513

511514
result = context->freeMem(ptr);
512515
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
513516
}
514517

515-
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhenKernelUsingIndirectSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
518+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
519+
givenHwSupportsSystemFenceWhenKernelUsingIndirectSystemMemoryAllocationsAndEventNotHostSignalScopeThenExpectsNoSystemFenceUsed, IsXeHpcCore) {
516520
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
517521

518522
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -577,13 +581,14 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhen
577581

578582
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
579583
auto &postSyncData = walkerCmd->getPostSync();
580-
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());
584+
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
581585

582586
result = context->freeMem(ptr);
583587
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
584588
}
585589

586-
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
590+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
591+
givenHwSupportsSystemFenceWhenKernelUsingDeviceMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
587592
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
588593

589594
ze_result_t result = ZE_RESULT_SUCCESS;
@@ -644,6 +649,74 @@ HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore, givenHwSupportsSystemFenceWhen
644649
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
645650
ASSERT_NE(itor, commands.end());
646651

652+
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
653+
auto &postSyncData = walkerCmd->getPostSync();
654+
EXPECT_FALSE(postSyncData.getSystemMemoryFenceRequest());
655+
656+
result = context->freeMem(ptr);
657+
ASSERT_EQ(result, ZE_RESULT_SUCCESS);
658+
}
659+
660+
HWTEST2_F(CommandListAppendLaunchKernelXeHpcCore,
661+
givenHwSupportsSystemFenceWhenKernelUsingUsmHostMemoryAllocationsAndEventHostSignalScopeThenExpectsSystemFenceUsed, IsXeHpcCore) {
662+
using WALKER_TYPE = typename FamilyType::WALKER_TYPE;
663+
664+
ze_result_t result = ZE_RESULT_SUCCESS;
665+
666+
auto &hwInfo = *device->getNEODevice()->getRootDeviceEnvironment().getMutableHardwareInfo();
667+
auto &hwConfig = *NEO::HwInfoConfig::get(hwInfo.platform.eProductFamily);
668+
669+
VariableBackup<unsigned short> hwRevId{&hwInfo.platform.usRevId};
670+
hwRevId = hwConfig.getHwRevIdFromStepping(REVISION_B, hwInfo);
671+
672+
constexpr size_t size = 4096u;
673+
constexpr size_t alignment = 4096u;
674+
void *ptr = nullptr;
675+
676+
ze_host_mem_alloc_desc_t hostDesc = {};
677+
result = context->allocHostMem(&hostDesc, size, alignment, &ptr);
678+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
679+
EXPECT_NE(nullptr, ptr);
680+
681+
Mock<::L0::Kernel> kernel;
682+
auto mockModule = std::unique_ptr<Module>(new Mock<Module>(device, nullptr));
683+
kernel.module = mockModule.get();
684+
685+
auto allocData = driverHandle->getSvmAllocsManager()->getSVMAlloc(ptr);
686+
ASSERT_NE(nullptr, allocData);
687+
auto kernelAllocation = allocData->gpuAllocations.getGraphicsAllocation(device->getRootDeviceIndex());
688+
ASSERT_NE(nullptr, kernelAllocation);
689+
kernel.residencyContainer.push_back(kernelAllocation);
690+
691+
ze_event_pool_desc_t eventPoolDesc = {};
692+
eventPoolDesc.count = 1;
693+
auto eventPool = std::unique_ptr<L0::EventPool>(L0::EventPool::create(driverHandle.get(), context, 0, nullptr, &eventPoolDesc, result));
694+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
695+
ze_event_desc_t eventDesc = {};
696+
eventDesc.index = 0;
697+
eventDesc.signal = ZE_EVENT_SCOPE_FLAG_HOST;
698+
eventDesc.wait = 0;
699+
auto event = std::unique_ptr<L0::Event>(L0::Event::create<uint32_t>(eventPool.get(), &eventDesc, device));
700+
701+
kernel.setGroupSize(1, 1, 1);
702+
ze_group_count_t groupCount{8, 1, 1};
703+
auto commandList = std::make_unique<WhiteBox<::L0::CommandListCoreFamily<gfxCoreFamily>>>();
704+
result = commandList->initialize(device, NEO::EngineGroupType::Compute, 0u);
705+
ASSERT_EQ(ZE_RESULT_SUCCESS, result);
706+
707+
CmdListKernelLaunchParams launchParams = {};
708+
result = commandList->appendLaunchKernelWithParams(&kernel, &groupCount, event.get(), launchParams);
709+
EXPECT_EQ(ZE_RESULT_SUCCESS, result);
710+
711+
GenCmdList commands;
712+
ASSERT_TRUE(CmdParse<FamilyType>::parseCommandBuffer(
713+
commands,
714+
commandList->commandContainer.getCommandStream()->getCpuBase(),
715+
commandList->commandContainer.getCommandStream()->getUsed()));
716+
717+
auto itor = find<WALKER_TYPE *>(commands.begin(), commands.end());
718+
ASSERT_NE(itor, commands.end());
719+
647720
auto walkerCmd = genCmdCast<WALKER_TYPE *>(*itor);
648721
auto &postSyncData = walkerCmd->getPostSync();
649722
EXPECT_TRUE(postSyncData.getSystemMemoryFenceRequest());

opencl/source/command_queue/hardware_interface_xehp_and_later.inl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,8 @@ inline void HardwareInterface<GfxFamily>::programWalker(
110110
numWorkGroups, walkerArgs.localWorkSizes, simd, dim,
111111
localIdsGenerationByRuntime, inlineDataProgrammingRequired, requiredWalkOrder);
112112

113-
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), true};
113+
bool requiredSystemFence = kernel.isAnyKernelArgumentUsingSystemMemory() && walkerArgs.event != nullptr;
114+
EncodeWalkerArgs encodeWalkerArgs{kernel.getExecutionType(), requiredSystemFence};
114115
EncodeDispatchKernel<GfxFamily>::encodeAdditionalWalkerFields(hwInfo, walkerCmd, encodeWalkerArgs);
115116

116117
auto devices = queueCsr.getOsContext().getDeviceBitfield();

0 commit comments

Comments
 (0)