-
Notifications
You must be signed in to change notification settings - Fork 14
bitonic sort sample #209
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
bitonic sort sample #209
Changes from 1 commit
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
include(common RESULT_VARIABLE RES) | ||
if(NOT RES) | ||
message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") | ||
endif() | ||
|
||
nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") | ||
|
||
if(NBL_EMBED_BUILTIN_RESOURCES) | ||
set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) | ||
set(RESOURCE_DIR "app_resources") | ||
|
||
get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) | ||
get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) | ||
|
||
file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") | ||
foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) | ||
LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") | ||
endforeach() | ||
|
||
ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") | ||
|
||
LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) | ||
endif() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
#include "nbl/builtin/hlsl/bda/bda_accessor.hlsl" | ||
|
||
struct BitonicPushData | ||
{ | ||
uint64_t inputKeyAddress; | ||
uint64_t inputValueAddress; | ||
uint64_t outputKeyAddress; | ||
uint64_t outputValueAddress; | ||
uint32_t dataElementCount; | ||
}; | ||
|
||
|
||
using namespace nbl::hlsl; | ||
|
||
[[vk::push_constant]] BitonicPushData pushData; | ||
|
||
using DataPtr = bda::__ptr<uint32_t>; | ||
using DataAccessor = BdaAccessor<uint32_t>; | ||
|
||
groupshared uint32_t sharedKeys[ElementCount]; | ||
|
||
groupshared uint32_t sharedValues[ElementCount]; | ||
|
||
|
||
[numthreads(WorkgroupSize, 1, 1)] | ||
|
||
[shader("compute")] | ||
void main(uint32_t3 dispatchId : SV_DispatchThreadID, uint32_t3 localId : SV_GroupThreadID) | ||
|
||
{ | ||
const uint32_t threadId = localId.x; | ||
const uint32_t dataSize = pushData.dataElementCount; | ||
|
||
|
||
DataAccessor inputKeys = DataAccessor::create(DataPtr::create(pushData.inputKeyAddress)); | ||
DataAccessor inputValues = DataAccessor::create(DataPtr::create(pushData.inputValueAddress)); | ||
|
||
for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize) | ||
|
||
{ | ||
inputKeys.get(i, sharedKeys[i]); | ||
inputValues.get(i, sharedValues[i]); | ||
|
||
} | ||
|
||
// Synchronize all threads after loading | ||
GroupMemoryBarrierWithGroupSync(); | ||
|
||
|
||
|
||
for (uint32_t stage = 0; stage < Log2ElementCount; stage++) | ||
|
||
{ | ||
for (uint32_t pass = 0; pass <= stage; pass++) | ||
{ | ||
const uint32_t compareDistance = 1 << (stage - pass); | ||
|
||
|
||
for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize) | ||
|
||
{ | ||
|
||
const uint32_t partnerId = i ^ compareDistance; | ||
|
||
if (partnerId >= dataSize) | ||
|
||
continue; | ||
|
||
|
||
const uint32_t waveSize = WaveGetLaneCount(); | ||
|
||
const uint32_t myWaveId = i / waveSize; | ||
const uint32_t partnerWaveId = partnerId / waveSize; | ||
const bool sameWave = (myWaveId == partnerWaveId); | ||
|
||
|
||
uint32_t myKey, myValue, partnerKey, partnerValue; | ||
[branch] | ||
|
||
if (sameWave && compareDistance < waveSize) | ||
{ | ||
// WAVE INTRINSIC | ||
myKey = sharedKeys[i]; | ||
myValue = sharedValues[i]; | ||
|
||
|
||
const uint32_t partnerLane = partnerId % waveSize; | ||
|
||
partnerKey = WaveReadLaneAt(myKey, partnerLane); | ||
|
||
partnerValue = WaveReadLaneAt(myValue, partnerLane); | ||
|
||
} | ||
else | ||
{ | ||
// SHARED MEM | ||
myKey = sharedKeys[i]; | ||
myValue = sharedValues[i]; | ||
partnerKey = sharedKeys[partnerId]; | ||
partnerValue = sharedValues[partnerId]; | ||
} | ||
|
||
|
||
const uint32_t sequenceSize = 1 << (stage + 1); | ||
const uint32_t sequenceIndex = i / sequenceSize; | ||
const bool sequenceAscending = (sequenceIndex % 2) == 0; | ||
const bool ascending = true; | ||
const bool finalDirection = sequenceAscending == ascending; | ||
|
||
const bool swap = (myKey > partnerKey) == finalDirection; | ||
|
||
|
||
// WORKGROUP COORDINATION: Only lower-indexed element writes both | ||
|
||
if (i < partnerId && swap) | ||
{ | ||
sharedKeys[i] = partnerKey; | ||
sharedKeys[partnerId] = myKey; | ||
sharedValues[i] = partnerValue; | ||
sharedValues[partnerId] = myValue; | ||
} | ||
|
||
} | ||
|
||
GroupMemoryBarrierWithGroupSync(); | ||
|
||
} | ||
} | ||
|
||
|
||
DataAccessor outputKeys = DataAccessor::create(DataPtr::create(pushData.outputKeyAddress)); | ||
DataAccessor outputValues = DataAccessor::create(DataPtr::create(pushData.outputValueAddress)); | ||
|
||
|
||
for (uint32_t i = threadId; i < dataSize; i += WorkgroupSize) | ||
{ | ||
outputKeys.set(i, sharedKeys[i]); | ||
outputValues.set(i, sharedValues[i]); | ||
} | ||
} | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. | ||
// This file is part of the "Nabla Engine". | ||
// For conditions of distribution and use, see copyright notice in nabla.h | ||
#ifndef _BITONIC_SORT_COMMON_INCLUDED_ | ||
#define _BITONIC_SORT_COMMON_INCLUDED_ | ||
|
||
struct BitonicPushData | ||
{ | ||
|
||
uint64_t inputKeyAddress; | ||
uint64_t inputValueAddress; | ||
uint64_t outputKeyAddress; | ||
uint64_t outputValueAddress; | ||
uint32_t dataElementCount; | ||
}; | ||
|
||
#endif |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
{ | ||
"enableParallelBuild": true, | ||
"threadsPerBuildProcess" : 2, | ||
"isExecuted": false, | ||
"scriptPath": "", | ||
"cmake": { | ||
"configurations": [ "Release", "Debug", "RelWithDebInfo" ], | ||
"buildModes": [], | ||
"requiredOptions": [] | ||
}, | ||
"profiles": [ | ||
{ | ||
"backend": "vulkan", // should be none | ||
"platform": "windows", | ||
"buildModes": [], | ||
"runConfiguration": "Release", // we also need to run in Debug nad RWDI because foundational example | ||
"gpuArchitectures": [] | ||
} | ||
], | ||
"dependencies": [], | ||
"data": [ | ||
{ | ||
"dependencies": [], | ||
"command": [""], | ||
"outputs": [] | ||
} | ||
] | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Replace this with an
#include "common.hlsl"
, otherwise you have the same struct two times