diff --git a/01_HelloCoreSystemAsset/main.cpp b/01_HelloCoreSystemAsset/main.cpp index 6a9188344..7ca4badb4 100644 --- a/01_HelloCoreSystemAsset/main.cpp +++ b/01_HelloCoreSystemAsset/main.cpp @@ -2,8 +2,8 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// always include nabla first before std:: headers -#include "nabla.h" +// public interface and common examples API, always include first before std:: headers +#include "nbl/examples/examples.hpp" #include "nbl/system/IApplicationFramework.h" diff --git a/02_HelloCompute/main.cpp b/02_HelloCompute/main.cpp index 124cd7dc5..32812fb1a 100644 --- a/02_HelloCompute/main.cpp +++ b/02_HelloCompute/main.cpp @@ -94,9 +94,9 @@ class HelloComputeApp final : public nbl::application_templates::MonoSystemMonoL // The convention is that an `ICPU` object represents a potentially Mutable (and in the past, Serializable) recipe for creating an `IGPU` object, and later examples will show automated systems for doing that. // The Assets always form a Directed Acyclic Graph and our type system enforces that property at compile time (i.e. an `IBuffer` cannot reference an `IImageView` even indirectly). // Another reason for the 1:1 pairing of types is that one can use a CPU-to-GPU associative cache (asset manager has a default one) and use the pointers to the CPU objects as UUIDs. - // The ICPUShader is just a mutable container for source code (can be high level like HLSL needing compilation to SPIR-V or SPIR-V itself) held in an `nbl::asset::ICPUBuffer`. + // The IShader is just a mutable container for source code (can be high level like HLSL needing compilation to SPIR-V or SPIR-V itself) held in an `nbl::asset::ICPUBuffer`. // They can be created: from buffers of code, by compilation from some other source code, or loaded from files (next example will do that). - smart_refctd_ptr cpuShader; + smart_refctd_ptr cpuShader; { // Normally we'd use the ISystem and the IAssetManager to load shaders flexibly from (virtual) files for ease of development (syntax highlighting and Intellisense), // but I want to show the full process of assembling a shader from raw source code at least once. @@ -138,7 +138,7 @@ class HelloComputeApp final : public nbl::application_templates::MonoSystemMonoL } // Note how each ILogicalDevice method takes a smart-pointer r-value, so that the GPU objects refcount their dependencies - smart_refctd_ptr shader = device->createShader(cpuShader.get()); + smart_refctd_ptr shader = device->compileShader({.source = cpuShader.get()}); if (!shader) return logFail("Failed to create a GPU Shader, seems the Driver doesn't like the SPIR-V we're feeding it!\n"); diff --git a/03_DeviceSelectionAndSharedSources/Testers.h b/03_DeviceSelectionAndSharedSources/Testers.h index a76d4b668..fcd5c5ee4 100644 --- a/03_DeviceSelectionAndSharedSources/Testers.h +++ b/03_DeviceSelectionAndSharedSources/Testers.h @@ -4,8 +4,7 @@ #ifndef _NBL_TESTERS_H_INCLUDED_ #define _NBL_TESTERS_H_INCLUDED_ -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/examples/examples.hpp" using namespace nbl; @@ -24,7 +23,7 @@ class IntrospectionTesterBase const std::string m_functionToTestName = ""; protected: - static std::pair, smart_refctd_ptr> compileHLSLShaderAndTestIntrospection( + static std::pair, smart_refctd_ptr> compileHLSLShaderAndTestIntrospection( video::IPhysicalDevice* physicalDevice, video::ILogicalDevice* device, system::ILogger* logger, asset::IAssetManager* assetMgr, const std::string& shaderPath, CSPIRVIntrospector& introspector) { IAssetLoader::SAssetLoadParams lp = {}; @@ -33,15 +32,18 @@ class IntrospectionTesterBase // this time we load a shader directly from a file auto assetBundle = assetMgr->getAsset(shaderPath, lp); const auto assets = assetBundle.getContents(); - if (assets.empty()) + const auto* metadata = assetBundle.getMetadata(); + if (assets.empty() || assetBundle.getAssetType() != IAsset::ET_SHADER) { logFail(logger, "Could not load shader!"); assert(0); } + const auto hlslMetadata = static_cast(metadata); + const auto shaderStage = hlslMetadata->shaderStages->front(); // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); + smart_refctd_ptr source = IAsset::castDown(assets[0]); smart_refctd_ptr introspection; { @@ -53,8 +55,8 @@ class IntrospectionTesterBase // The Shader Asset Loaders deduce the stage from the file extension, // if the extension is generic (.glsl or .hlsl) the stage is unknown. // But it can still be overriden from within the source with a `#pragma shader_stage` - options.stage = source->getStage() == IShader::E_SHADER_STAGE::ESS_COMPUTE ? source->getStage() : IShader::E_SHADER_STAGE::ESS_VERTEX; // TODO: do smth with it - options.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; + options.stage = shaderStage == IShader::E_SHADER_STAGE::ESS_COMPUTE ? shaderStage : IShader::E_SHADER_STAGE::ESS_VERTEX; // TODO: do smth with it + options.preprocessorOptions.targetSpirvVersion = device->getPhysicalDevice()->getLimits().spirvVersion; // we need to perform an unoptimized compilation with source debug info or we'll lose names of variable sin the introspection options.spirvOptimizer = nullptr; options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; @@ -186,7 +188,7 @@ class PredefinedLayoutTester final : public IntrospectionTesterBase constexpr uint32_t MERGE_TEST_SHADERS_CNT = mergeTestShadersPaths.size(); CSPIRVIntrospector introspector[MERGE_TEST_SHADERS_CNT]; - smart_refctd_ptr sources[MERGE_TEST_SHADERS_CNT]; + smart_refctd_ptr sources[MERGE_TEST_SHADERS_CNT]; for (uint32_t i = 0u; i < MERGE_TEST_SHADERS_CNT; ++i) { @@ -201,7 +203,7 @@ class PredefinedLayoutTester final : public IntrospectionTesterBase .binding = 0, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ICPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .count = 1, .immutableSamplers = nullptr } @@ -213,7 +215,7 @@ class PredefinedLayoutTester final : public IntrospectionTesterBase .binding = 0, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ICPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .count = 1, .immutableSamplers = nullptr }, @@ -221,7 +223,7 @@ class PredefinedLayoutTester final : public IntrospectionTesterBase .binding = 1, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = ICPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .count = 2, .immutableSamplers = nullptr } @@ -251,9 +253,9 @@ class PredefinedLayoutTester final : public IntrospectionTesterBase bool pplnCreationSuccess[MERGE_TEST_SHADERS_CNT]; for (uint32_t i = 0u; i < MERGE_TEST_SHADERS_CNT; ++i) { - ICPUShader::SSpecInfo specInfo; + ICPUPipelineBase::SShaderSpecInfo specInfo; specInfo.entryPoint = "main"; - specInfo.shader = sources[i].get(); + specInfo.shader = sources[i]; pplnCreationSuccess[i] = static_cast(introspector[i].createApproximateComputePipelineFromIntrospection(specInfo, core::smart_refctd_ptr(predefinedPplnLayout))); } diff --git a/03_DeviceSelectionAndSharedSources/main.cpp b/03_DeviceSelectionAndSharedSources/main.cpp index be56791a1..b8fd3d18b 100644 --- a/03_DeviceSelectionAndSharedSources/main.cpp +++ b/03_DeviceSelectionAndSharedSources/main.cpp @@ -2,15 +2,20 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "CommonPCH/PCH.hpp" + +#include "nbl/examples/examples.hpp" +// TODO: why isn't this in `nabla.h` ? +#include "nbl/asset/metadata/CHLSLMetadata.h" + using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; // TODO[Przemek]: update comments @@ -21,10 +26,10 @@ using namespace video; constexpr bool ENABLE_TESTS = false; // This time we create the device in the base class and also use a base class to give us an Asset Manager and an already mounted built-in resource archive -class DeviceSelectionAndSharedSourcesApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class DeviceSelectionAndSharedSourcesApp final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore DeviceSelectionAndSharedSourcesApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : @@ -60,9 +65,9 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M //shaderIntrospection->debugPrint(m_logger.get()); // We've now skipped the manual creation of a descriptor set layout, pipeline layout - ICPUShader::SSpecInfo specInfo; + ICPUPipelineBase::SShaderSpecInfo specInfo; specInfo.entryPoint = "main"; - specInfo.shader = source.get(); + specInfo.shader = source; smart_refctd_ptr cpuPipeline = introspector.createApproximateComputePipelineFromIntrospection(specInfo); @@ -236,7 +241,7 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M // Whether to keep invoking the above. In this example because its headless GPU compute, we do all the work in the app initialization. bool keepRunning() override { return false; } - std::pair, smart_refctd_ptr> compileShaderAndTestIntrospection( + std::pair, smart_refctd_ptr> compileShaderAndTestIntrospection( const std::string& shaderPath, CSPIRVIntrospector& introspector) { IAssetLoader::SAssetLoadParams lp = {}; @@ -245,15 +250,19 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M // this time we load a shader directly from a file auto assetBundle = m_assetMgr->getAsset(shaderPath, lp); const auto assets = assetBundle.getContents(); - if (assets.empty()) + if (assets.empty() || assetBundle.getAssetType() != IAsset::ET_SHADER) { logFail("Could not load shader!"); assert(0); } + const auto* metadata = assetBundle.getMetadata(); + const auto hlslMetadata = static_cast(metadata); + const auto shaderStage = hlslMetadata->shaderStages->front(); + // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); + smart_refctd_ptr source = IAsset::castDown(assets[0]); smart_refctd_ptr introspection; { @@ -265,8 +274,8 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M // The Shader Asset Loaders deduce the stage from the file extension, // if the extension is generic (.glsl or .hlsl) the stage is unknown. // But it can still be overriden from within the source with a `#pragma shader_stage` - options.stage = source->getStage() == IShader::E_SHADER_STAGE::ESS_COMPUTE ? source->getStage() : IShader::E_SHADER_STAGE::ESS_VERTEX; // TODO: do smth with it - options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.stage = shaderStage == IShader::E_SHADER_STAGE::ESS_COMPUTE ? shaderStage : IShader::E_SHADER_STAGE::ESS_VERTEX; // TODO: do smth with it + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; // we need to perform an unoptimized compilation with source debug info or we'll lose names of variable sin the introspection options.spirvOptimizer = nullptr; options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; @@ -277,7 +286,7 @@ class DeviceSelectionAndSharedSourcesApp final : public application_templates::M options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); auto spirvUnspecialized = compilerSet->compileToSPIRV(source.get(), options); - const CSPIRVIntrospector::CStageIntrospectionData::SParams inspctParams = { .entryPoint = "main", .shader = spirvUnspecialized }; + const CSPIRVIntrospector::CStageIntrospectionData::SParams inspctParams = { .entryPoint = "main", .shader = spirvUnspecialized, .stage = shaderStage }; introspection = introspector.introspect(inspctParams); introspection->debugPrint(m_logger.get()); diff --git a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl index 4aeef0e0f..af38ffada 100644 --- a/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl +++ b/05_StreamingAndBufferDeviceAddressApp/app_resources/shader.comp.hlsl @@ -10,6 +10,7 @@ template void dummyTraitTest() {} [numthreads(WorkgroupSize,1,1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { dummyTraitTest(); diff --git a/05_StreamingAndBufferDeviceAddressApp/main.cpp b/05_StreamingAndBufferDeviceAddressApp/main.cpp index e8f7dbd33..b82dc18ca 100644 --- a/05_StreamingAndBufferDeviceAddressApp/main.cpp +++ b/05_StreamingAndBufferDeviceAddressApp/main.cpp @@ -5,7 +5,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" using namespace nbl; @@ -20,10 +20,10 @@ using namespace video; // In this application we'll cover buffer streaming, Buffer Device Address (BDA) and push constants -class StreamingAndBufferDeviceAddressApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class StreamingAndBufferDeviceAddressApp final : public application_templates::MonoDeviceApplication, public examples::BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = examples::BuiltinResourcesApplication; // This is the first example that submits multiple workloads in-flight. // What the shader does is it computes the minimum distance of each point against K other random input points. @@ -91,7 +91,7 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M return false; // this time we load a shader directly from a file - smart_refctd_ptr shader; + smart_refctd_ptr shader; { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -102,14 +102,10 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M return logFail("Could not load shader!"); // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); + const auto shaderSource = IAsset::castDown(assets[0]); + shader = m_device->compileShader({shaderSource.get()}); // The down-cast should not fail! - assert(source); - - // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple - shader = m_device->createShader(source.get()); - if (!shader) - return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + assert(shader); } // The StreamingTransientDataBuffers are actually composed on top of another useful utility called `CAsyncSingleBufferSubAllocator` @@ -117,8 +113,8 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M // `CAsyncSingleBufferSubAllocator` just allows you suballocate subranges of any `IGPUBuffer` range with deferred/latched frees. constexpr uint32_t DownstreamBufferSize = sizeof(output_t)<<23; constexpr uint32_t UpstreamBufferSize = sizeof(input_t)<<23; - - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); + + m_utils = IUtilities::create(smart_refctd_ptr(m_device),smart_refctd_ptr(m_logger),DownstreamBufferSize,UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); @@ -139,6 +135,7 @@ class StreamingAndBufferDeviceAddressApp final : public application_templates::M IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; if (!m_device->createComputePipelines(nullptr,{¶ms,1},&m_pipeline)) return logFail("Failed to create compute pipeline!\n"); } diff --git a/06_HelloGraphicsQueue/main.cpp b/06_HelloGraphicsQueue/main.cpp index dc2f3ebb4..07d6affd3 100644 --- a/06_HelloGraphicsQueue/main.cpp +++ b/06_HelloGraphicsQueue/main.cpp @@ -3,18 +3,20 @@ // For conditions of distribution and use, see copyright notice in nabla.h -// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/examples/examples.hpp" #include "nbl/ext/ScreenShot/ScreenShot.h" using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + // Here we showcase the use of Graphics Queue only // Steps we take in this example: @@ -26,10 +28,10 @@ using namespace video; // - save the smallImg to disk // // all without using IUtilities. -class HelloGraphicsQueueApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class HelloGraphicsQueueApp final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore. diff --git a/06_MeshLoaders/CMakeLists.txt b/06_MeshLoaders/CMakeLists.txt deleted file mode 100644 index 2f9218f93..000000000 --- a/06_MeshLoaders/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/06_MeshLoaders/main.cpp b/06_MeshLoaders/main.cpp deleted file mode 100644 index 75135c033..000000000 --- a/06_MeshLoaders/main.cpp +++ /dev/null @@ -1,563 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - -#include "CCamera.hpp" -#include "../common/CommonAPI.h" -#include "nbl/ext/ScreenShot/ScreenShot.h" - -using namespace nbl; -using namespace core; -using namespace ui; -/* - Uncomment for more detailed logging -*/ - -// #define NBL_MORE_LOGS - -class MeshLoadersApp : public ApplicationBase -{ - constexpr static uint32_t WIN_W = 1280; - constexpr static uint32_t WIN_H = 720; - constexpr static uint32_t SC_IMG_COUNT = 3u; - constexpr static uint32_t FRAMES_IN_FLIGHT = 5u; - constexpr static uint64_t MAX_TIMEOUT = 99999999999999ull; - constexpr static size_t NBL_FRAMES_TO_AVERAGE = 100ull; - - static_assert(FRAMES_IN_FLIGHT > SC_IMG_COUNT); -public: - nbl::core::smart_refctd_ptr windowManager; - nbl::core::smart_refctd_ptr window; - nbl::core::smart_refctd_ptr windowCb; - nbl::core::smart_refctd_ptr apiConnection; - nbl::core::smart_refctd_ptr surface; - nbl::core::smart_refctd_ptr utilities; - nbl::core::smart_refctd_ptr logicalDevice; - nbl::video::IPhysicalDevice* physicalDevice; - std::array queues; - nbl::core::smart_refctd_ptr swapchain; - nbl::core::smart_refctd_ptr renderpass; - nbl::core::smart_refctd_dynamic_array> fbo; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; - nbl::core::smart_refctd_ptr system; - nbl::core::smart_refctd_ptr assetManager; - nbl::video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - nbl::core::smart_refctd_ptr logger; - nbl::core::smart_refctd_ptr inputSystem; - - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - - video::IDeviceMemoryBacked::SDeviceMemoryRequirements ubomemreq; - core::smart_refctd_ptr gpuubo; - core::smart_refctd_ptr gpuds1; - - core::smart_refctd_ptr occlusionQueryPool; - core::smart_refctd_ptr timestampQueryPool; - - asset::ICPUMesh* meshRaw = nullptr; - const asset::COBJMetadata* metaOBJ = nullptr; - - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr commandBuffers[FRAMES_IN_FLIGHT]; - - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - Camera camera = Camera(vectorSIMDf(0, 0, 0), vectorSIMDf(0, 0, 0), matrix4SIMD()); - - using RENDERPASS_INDEPENDENT_PIPELINE_ADRESS = size_t; - std::map> gpuPipelines; - core::smart_refctd_ptr gpumesh; - const asset::ICPUMeshBuffer* firstMeshBuffer; - const nbl::asset::COBJMetadata::CRenderpassIndependentPipeline* pipelineMetadata; - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - - uint32_t ds1UboBinding = 0; - int resourceIx; - uint32_t acquiredNextFBO = {}; - std::chrono::steady_clock::time_point lastTime; - bool frameDataFilled = false; - size_t frame_count = 0ull; - double time_sum = 0; - double dtList[NBL_FRAMES_TO_AVERAGE] = {}; - - video::CDumbPresentationOracle oracle; - - core::smart_refctd_ptr queryResultsBuffer; - - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - void setSystem(core::smart_refctd_ptr&& s) override - { - system = std::move(s); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - video::IAPIConnection* getAPIConnection() override - { - return apiConnection.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return logicalDevice.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbo->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_D32_SFLOAT; - } - - void getAndLogQueryPoolResults() - { -#ifdef QUERY_POOL_LOGS - { - uint64_t samples_passed[4] = {}; - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT) | video::IQueryPool::EQRF_64_BIT; - logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), samples_passed, sizeof(uint64_t) * 2, queryResultFlags); - logger->log("[AVAIL+64] SamplesPassed[0] = %d, SamplesPassed[1] = %d, Result Available = %d, %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[2], samples_passed[1], samples_passed[3]); - } - { - uint64_t samples_passed[4] = {}; - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT) | video::IQueryPool::EQRF_64_BIT | video::IQueryPool::EQRF_WAIT_BIT; - logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), samples_passed, sizeof(uint64_t) * 2, queryResultFlags); - logger->log("[WAIT+AVAIL+64] SamplesPassed[0] = %d, SamplesPassed[1] = %d, Result Available = %d, %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[2], samples_passed[1], samples_passed[3]); - } - { - uint32_t samples_passed[2] = {}; - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WAIT_BIT); - logicalDevice->getQueryPoolResults(occlusionQueryPool.get(), 0u, 2u, sizeof(samples_passed), samples_passed, sizeof(uint32_t), queryResultFlags); - logger->log("[WAIT] SamplesPassed[0] = %d, SamplesPassed[1] = %d", system::ILogger::ELL_INFO, samples_passed[0], samples_passed[1]); - } - { - uint64_t timestamps[4] = {}; - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT | video::IQueryPool::EQRF_64_BIT; - logicalDevice->getQueryPoolResults(timestampQueryPool.get(), 0u, 2u, sizeof(timestamps), timestamps, sizeof(uint64_t) * 2ull, queryResultFlags); - float timePassed = (timestamps[2] - timestamps[0]) * physicalDevice->getLimits().timestampPeriodInNanoSeconds; - logger->log("Time Passed (Seconds) = %f", system::ILogger::ELL_INFO, (timePassed * 1e-9)); - logger->log("Timestamps availablity: %d, %d", system::ILogger::ELL_INFO, timestamps[1], timestamps[3]); - } -#endif - } - - APP_CONSTRUCTOR(MeshLoadersApp) - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT | asset::IImage::EUF_TRANSFER_SRC_BIT); - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = SC_IMG_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = nbl::asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - window = std::move(initParams.window); - windowCb = std::move(initParams.windowCb); - apiConnection = std::move(initOutput.apiConnection); - surface = std::move(initOutput.surface); - utilities = std::move(initOutput.utilities); - logicalDevice = std::move(initOutput.logicalDevice); - physicalDevice = initOutput.physicalDevice; - queues = std::move(initOutput.queues); - renderpass = std::move(initOutput.renderToSwapchainRenderpass); - commandPools = std::move(initOutput.commandPools); - system = std::move(initOutput.system); - assetManager = std::move(initOutput.assetManager); - cpu2gpuParams = std::move(initOutput.cpu2gpuParams); - logger = std::move(initOutput.logger); - inputSystem = std::move(initOutput.inputSystem); - m_swapchainCreationParams = std::move(initOutput.swapchainCreationParams); - - CommonAPI::createSwapchain(std::move(logicalDevice), m_swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbo = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - logicalDevice, swapchain, renderpass, - nbl::asset::EF_D32_SFLOAT - ); - - // Occlusion Query - { - video::IQueryPool::SCreationParams queryPoolCreationParams = {}; - queryPoolCreationParams.queryType = video::IQueryPool::EQT_OCCLUSION; - queryPoolCreationParams.queryCount = 2u; - occlusionQueryPool = logicalDevice->createQueryPool(std::move(queryPoolCreationParams)); - } - - // Timestamp Query - video::IQueryPool::SCreationParams queryPoolCreationParams = {}; - { - video::IQueryPool::SCreationParams queryPoolCreationParams = {}; - queryPoolCreationParams.queryType = video::IQueryPool::EQT_TIMESTAMP; - queryPoolCreationParams.queryCount = 2u; - timestampQueryPool = logicalDevice->createQueryPool(std::move(queryPoolCreationParams)); - } - - { - // SAMPLES_PASSED_0 + AVAILABILIY_0 + SAMPLES_PASSED_1 + AVAILABILIY_1 (uint32_t) - const size_t queriesSize = sizeof(uint32_t) * 4; - video::IGPUBuffer::SCreationParams gpuuboCreationParams; - gpuuboCreationParams.size = queriesSize; - gpuuboCreationParams.usage = core::bitflag(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT)|asset::IBuffer::EUF_TRANSFER_DST_BIT|asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; - gpuuboCreationParams.queueFamilyIndexCount = 0u; - gpuuboCreationParams.queueFamilyIndices = nullptr; - - queryResultsBuffer = logicalDevice->createBuffer(std::move(gpuuboCreationParams)); - auto memReqs = queryResultsBuffer->getMemoryReqs(); - memReqs.memoryTypeBits &= physicalDevice->getDeviceLocalMemoryTypeBits(); - auto queriesMem = logicalDevice->allocate(memReqs, queryResultsBuffer.get()); - - queryResultsBuffer->setObjectDebugName("QueryResults"); - } - - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - { - auto* quantNormalCache = assetManager->getMeshManipulator()->getQuantNormalCache(); - quantNormalCache->loadCacheFromFile(system.get(), sharedOutputCWD / "normalCache101010.sse"); - - system::path archPath = sharedInputCWD / "sponza.zip"; - auto arch = system->openFileArchive(archPath); - // test no alias loading (TODO: fix loading from absolute paths) - system->mount(std::move(arch)); - asset::IAssetLoader::SAssetLoadParams loadParams; - loadParams.workingDirectory = sharedInputCWD; - loadParams.logger = logger.get(); - auto meshes_bundle = assetManager->getAsset((sharedInputCWD / "sponza.zip/sponza.obj").string(), loadParams); - assert(!meshes_bundle.getContents().empty()); - - metaOBJ = meshes_bundle.getMetadata()->selfCast(); - - auto cpuMesh = meshes_bundle.getContents().begin()[0]; - meshRaw = static_cast(cpuMesh.get()); - - quantNormalCache->saveCacheToFile(system.get(), sharedOutputCWD / "normalCache101010.sse"); - } - - // Fix FrontFace and BlendParams for meshBuffers - for (size_t i = 0ull; i < meshRaw->getMeshBuffers().size(); ++i) - { - auto& meshBuffer = meshRaw->getMeshBuffers().begin()[i]; - meshBuffer->getPipeline()->getRasterizationParams().frontFaceIsCCW = false; - } - - // we can safely assume that all meshbuffers within mesh loaded from OBJ has same DS1 layout (used for camera-specific data) - firstMeshBuffer = *meshRaw->getMeshBuffers().begin(); - pipelineMetadata = metaOBJ->getAssetSpecificMetadata(firstMeshBuffer->getPipeline()); - - // so we can create just one DS - const asset::ICPUDescriptorSetLayout* ds1layout = firstMeshBuffer->getPipeline()->getLayout()->getDescriptorSetLayout(1u); - ds1UboBinding = ds1layout->getDescriptorRedirect(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER).getBinding(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ 0 }).data; - - size_t neededDS1UBOsz = 0ull; - { - for (const auto& shdrIn : pipelineMetadata->m_inputSemantics) - if (shdrIn.descriptorSection.type == asset::IRenderpassIndependentPipelineMetadata::ShaderInput::E_TYPE::ET_UNIFORM_BUFFER && shdrIn.descriptorSection.uniformBufferObject.set == 1u && shdrIn.descriptorSection.uniformBufferObject.binding == ds1UboBinding) - neededDS1UBOsz = std::max(neededDS1UBOsz, shdrIn.descriptorSection.uniformBufferObject.relByteoffset + shdrIn.descriptorSection.uniformBufferObject.bytesize); - } - - core::smart_refctd_ptr gpuds1layout; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&ds1layout, &ds1layout + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuds1layout = (*gpu_array)[0]; - } - - core::smart_refctd_ptr descriptorPool = nullptr; - { - video::IDescriptorPool::SCreateInfo createInfo = {}; - createInfo.maxSets = 1u; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] = 1u; - descriptorPool = logicalDevice->createDescriptorPool(std::move(createInfo)); - } - - video::IGPUBuffer::SCreationParams gpuuboCreationParams; - gpuuboCreationParams.size = neededDS1UBOsz; - gpuuboCreationParams.usage = core::bitflag(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT) | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; - gpuuboCreationParams.queueFamilyIndexCount = 0u; - gpuuboCreationParams.queueFamilyIndices = nullptr; - - gpuubo = logicalDevice->createBuffer(std::move(gpuuboCreationParams)); - auto gpuuboMemReqs = gpuubo->getMemoryReqs(); - gpuuboMemReqs.memoryTypeBits &= physicalDevice->getDeviceLocalMemoryTypeBits(); - auto uboMemoryOffset = logicalDevice->allocate(gpuuboMemReqs, gpuubo.get(), video::IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_NONE); - - gpuds1 = descriptorPool->createDescriptorSet(std::move(gpuds1layout)); - - { - video::IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = gpuds1.get(); - write.binding = ds1UboBinding; - write.count = 1u; - write.arrayElement = 0u; - write.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuubo; - info.info.buffer.offset = 0ull; - info.info.buffer.size = neededDS1UBOsz; - } - write.info = &info; - logicalDevice->updateDescriptorSets(1u, &write, 0u, nullptr); - } - { - cpu2gpuParams.beginCommandBuffers(); - - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&meshRaw, &meshRaw + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - cpu2gpuParams.waitForCreationToComplete(false); - - gpumesh = (*gpu_array)[0]; - } - - { - for (size_t i = 0; i < gpumesh->getMeshBuffers().size(); ++i) - { - auto gpuIndependentPipeline = gpumesh->getMeshBuffers().begin()[i]->getPipeline(); - - nbl::video::IGPUGraphicsPipeline::SCreationParams graphicsPipelineParams; - graphicsPipelineParams.renderpassIndependent = core::smart_refctd_ptr(const_cast(gpuIndependentPipeline)); - graphicsPipelineParams.renderpass = core::smart_refctd_ptr(renderpass); - - const RENDERPASS_INDEPENDENT_PIPELINE_ADRESS adress = reinterpret_cast(graphicsPipelineParams.renderpassIndependent.get()); - gpuPipelines[adress] = logicalDevice->createGraphicsPipeline(nullptr, std::move(graphicsPipelineParams)); - } - } - - core::vectorSIMDf cameraPosition(-250.0f,177.0f,1.69f); - core::vectorSIMDf cameraTarget(50.0f,125.0f,-3.0f); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.1, 10000); - camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 10.f, 1.f); - lastTime = std::chrono::steady_clock::now(); - - for (size_t i = 0ull; i < NBL_FRAMES_TO_AVERAGE; ++i) - dtList[i] = 0.0; - - oracle.reportBeginFrameRecord(); - - - const auto& graphicsCommandPools = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - { - logicalDevice->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, commandBuffers+i); - imageAcquire[i] = logicalDevice->createSemaphore(); - renderFinished[i] = logicalDevice->createSemaphore(); - } - - constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - uint32_t acquiredNextFBO = {}; - resourceIx = -1; - } - void onAppTerminated_impl() override - { - const auto& fboCreationParams = fbo->begin()[acquiredNextFBO]->getCreationParameters(); - auto gpuSourceImageView = fboCreationParams.attachments[0]; - - bool status = ext::ScreenShot::createScreenShot( - logicalDevice.get(), - queues[CommonAPI::InitOutput::EQT_TRANSFER_DOWN], - renderFinished[resourceIx].get(), - gpuSourceImageView.get(), - assetManager.get(), - "ScreenShot.png", - asset::IImage::EL_PRESENT_SRC, - asset::EAF_NONE); - - assert(status); - logicalDevice->waitIdle(); - } - void workLoopBody() override - { - ++resourceIx; - if (resourceIx >= FRAMES_IN_FLIGHT) - resourceIx = 0; - - auto& commandBuffer = commandBuffers[resourceIx]; - auto& fence = frameComplete[resourceIx]; - if (fence) - logicalDevice->blockForFences(1u, &fence.get()); - else - fence = logicalDevice->createFence(static_cast(0)); - - commandBuffer->reset(nbl::video::IGPUCommandBuffer::ERF_RELEASE_RESOURCES_BIT); - commandBuffer->begin(nbl::video::IGPUCommandBuffer::EU_NONE); - - const auto nextPresentationTimestamp = oracle.acquireNextImage(swapchain.get(), imageAcquire[resourceIx].get(), nullptr, &acquiredNextFBO); - { - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, logger.get()); - camera.endInputProcessing(nextPresentationTimestamp); - } - - const auto& viewMatrix = camera.getViewMatrix(); - const auto& viewProjectionMatrix = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - camera.getConcatenatedMatrix() - ); - - asset::SViewport viewport; - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - commandBuffer->setViewport(0u, 1u, &viewport); - - VkRect2D scissor = {}; - scissor.offset = { 0, 0 }; - scissor.extent = { WIN_W, WIN_H }; - commandBuffer->setScissor(0u, 1u, &scissor); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - core::matrix4SIMD mvp = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - - const size_t uboSize = gpuubo->getSize(); - core::vector uboData(uboSize); - for (const auto& shdrIn : pipelineMetadata->m_inputSemantics) - { - if (shdrIn.descriptorSection.type == asset::IRenderpassIndependentPipelineMetadata::ShaderInput::E_TYPE::ET_UNIFORM_BUFFER && shdrIn.descriptorSection.uniformBufferObject.set == 1u && shdrIn.descriptorSection.uniformBufferObject.binding == ds1UboBinding) - { - switch (shdrIn.type) - { - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW_PROJ: - { - memcpy(uboData.data() + shdrIn.descriptorSection.uniformBufferObject.relByteoffset, mvp.pointer(), shdrIn.descriptorSection.uniformBufferObject.bytesize); - } break; - - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW: - { - memcpy(uboData.data() + shdrIn.descriptorSection.uniformBufferObject.relByteoffset, viewMatrix.pointer(), shdrIn.descriptorSection.uniformBufferObject.bytesize); - } break; - - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW_INVERSE_TRANSPOSE: - { - memcpy(uboData.data() + shdrIn.descriptorSection.uniformBufferObject.relByteoffset, viewMatrix.pointer(), shdrIn.descriptorSection.uniformBufferObject.bytesize); - } break; - } - } - } - commandBuffer->updateBuffer(gpuubo.get(), 0ull, uboSize, uboData.data()); - - nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; - { - VkRect2D area; - area.offset = { 0,0 }; - area.extent = { WIN_W, WIN_H }; - asset::SClearValue clear[2] = {}; - clear[0].color.float32[0] = 1.f; - clear[0].color.float32[1] = 1.f; - clear[0].color.float32[2] = 1.f; - clear[0].color.float32[3] = 1.f; - clear[1].depthStencil.depth = 0.f; - - beginInfo.clearValueCount = 2u; - beginInfo.framebuffer = fbo->begin()[acquiredNextFBO]; - beginInfo.renderpass = renderpass; - beginInfo.renderArea = area; - beginInfo.clearValues = clear; - } - - commandBuffer->resetQueryPool(occlusionQueryPool.get(), 0u, 2u); - commandBuffer->resetQueryPool(timestampQueryPool.get(), 0u, 2u); - commandBuffer->beginRenderPass(&beginInfo, nbl::asset::ESC_INLINE); - - commandBuffer->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_TOP_OF_PIPE_BIT, timestampQueryPool.get(), 0u); - for (size_t i = 0; i < gpumesh->getMeshBuffers().size(); ++i) - { - if(i < 2) - commandBuffer->beginQuery(occlusionQueryPool.get(), i); - auto gpuMeshBuffer = gpumesh->getMeshBuffers().begin()[i]; - auto gpuGraphicsPipeline = gpuPipelines[reinterpret_cast(gpuMeshBuffer->getPipeline())]; - - const video::IGPURenderpassIndependentPipeline* gpuRenderpassIndependentPipeline = gpuMeshBuffer->getPipeline(); - const video::IGPUDescriptorSet* ds3 = gpuMeshBuffer->getAttachedDescriptorSet(); - - commandBuffer->bindGraphicsPipeline(gpuGraphicsPipeline.get()); - - const video::IGPUDescriptorSet* gpuds1_ptr = gpuds1.get(); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 1u, 1u, &gpuds1_ptr); - const video::IGPUDescriptorSet* gpuds3_ptr = gpuMeshBuffer->getAttachedDescriptorSet(); - if (gpuds3_ptr) - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 3u, 1u, &gpuds3_ptr); - commandBuffer->pushConstants(gpuRenderpassIndependentPipeline->getLayout(), asset::IShader::ESS_FRAGMENT, 0u, gpuMeshBuffer->MAX_PUSH_CONSTANT_BYTESIZE, gpuMeshBuffer->getPushConstantsDataPtr()); - - commandBuffer->drawMeshBuffer(gpuMeshBuffer); - - if(i < 2) - commandBuffer->endQuery(occlusionQueryPool.get(), i); - } - commandBuffer->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_BOTTOM_OF_PIPE_BIT, timestampQueryPool.get(), 1u); - - commandBuffer->endRenderPass(); - - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT; - commandBuffer->copyQueryPoolResults(occlusionQueryPool.get(), 0, 2, queryResultsBuffer.get(), 0u, sizeof(uint32_t) * 2, queryResultFlags); - - commandBuffer->end(); - - logicalDevice->resetFences(1, &fence.get()); - CommonAPI::Submit( - logicalDevice.get(), - commandBuffer.get(), - queues[CommonAPI::InitOutput::EQT_COMPUTE], - imageAcquire[resourceIx].get(), - renderFinished[resourceIx].get(), - fence.get()); - CommonAPI::Present(logicalDevice.get(), - swapchain.get(), - queues[CommonAPI::InitOutput::EQT_GRAPHICS], renderFinished[resourceIx].get(), acquiredNextFBO); - - getAndLogQueryPoolResults(); - } - bool keepRunning() override - { - return windowCb->isWindowOpen(); - } -}; - -NBL_COMMON_API_MAIN(MeshLoadersApp) diff --git a/06_MeshLoaders/pipeline.groovy b/06_MeshLoaders/pipeline.groovy deleted file mode 100644 index 0923d296f..000000000 --- a/06_MeshLoaders/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CMeshLoadersBuilder extends IBuilder -{ - public CMeshLoadersBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CMeshLoadersBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/07_StagingAndMultipleQueues/main.cpp b/07_StagingAndMultipleQueues/main.cpp index 658a28a35..fc6bf4551 100644 --- a/07_StagingAndMultipleQueues/main.cpp +++ b/07_StagingAndMultipleQueues/main.cpp @@ -3,26 +3,24 @@ // For conditions of distribution and use, see copyright notice in nabla.h // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. - -#include "nbl/application_templates/BasicMultiQueueApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - -// get asset converter -#include "CommonPCH/PCH.hpp" +#include "nbl/examples/examples.hpp" using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" // This time we let the new base class score and pick queue families, as well as initialize `nbl::video::IUtilities` for us -class StagingAndMultipleQueuesApp final : public application_templates::BasicMultiQueueApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class StagingAndMultipleQueuesApp final : public application_templates::BasicMultiQueueApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::BasicMultiQueueApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; // TODO: would be cool if we used `system::ISystem::listItemsInDirectory(sharedInputCWD/"GLI")` as our dataset static constexpr std::array imagesToLoad = { @@ -246,7 +244,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul .binding = 0, .type = nbl::asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .count = 1, .immutableSamplers = nullptr }, @@ -254,7 +252,7 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul .binding = 1, .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .count = 1, .immutableSamplers = nullptr } @@ -281,18 +279,17 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul } // LOAD SHADER FROM FILE - smart_refctd_ptr source; + smart_refctd_ptr source; { - source = loadFistAssetInBundle("../app_resources/comp_shader.hlsl"); - source->setShaderStage(IShader::E_SHADER_STAGE::ESS_COMPUTE); // can also be done via a #pragma in the shader + source = loadFistAssetInBundle("../app_resources/comp_shader.hlsl"); } if (!source) logFailAndTerminate("Could not create a CPU shader!"); - core::smart_refctd_ptr shader = m_device->createShader(source.get()); + core::smart_refctd_ptr shader = m_device->compileShader({ source.get() }); if(!shader) - logFailAndTerminate("Could not create a GPU shader!"); + logFailAndTerminate("Could not compile shader to spirv!"); // CREATE COMPUTE PIPELINE SPushConstantRange pc[1]; @@ -432,15 +429,16 @@ class StagingAndMultipleQueuesApp final : public application_templates::BasicMul submitInfo[0].waitSemaphores = waitSemaphoreSubmitInfo; // there's no save to wait on, or need to prevent signal-after-submit because Renderdoc freezes because it // starts capturing immediately upon a submit and can't defer a capture till semaphores signal. - if (imageToProcessIdisRunningInRenderdoc()) + const bool isRunningInRenderdoc = m_api->runningInGraphicsDebugger()==IAPIConnection::EDebuggerType::Renderdoc; + if (imageToProcessIdisRunningInRenderdoc() && imageToProcessId>=SUBMITS_IN_FLIGHT) + if (isRunningInRenderdoc && imageToProcessId>=SUBMITS_IN_FLIGHT) for (auto old = histogramsSaved.load(); old < histogramSaveWaitSemaphoreValue; old = histogramsSaved.load()) histogramsSaved.wait(old); // Some Devices like all of the Intel GPUs do not have enough queues for us to allocate different queues to compute and transfers, // so our `BasicMultiQueueApplication` will "alias" a single queue to both usages. Normally you don't need to care, but here we're // attempting to do "out-of-order" "submit-before-signal" so we need to "hold back" submissions if the queues are aliased! - if (getTransferUpQueue()==computeQueue || m_api->isRunningInRenderdoc()) + if (getTransferUpQueue()==computeQueue || isRunningInRenderdoc) for (auto old = transfersSubmitted.load(); old <= imageToProcessId; old = transfersSubmitted.load()) transfersSubmitted.wait(old); computeQueue->submit(submitInfo); diff --git a/08_HelloSwapchain/main.cpp b/08_HelloSwapchain/main.cpp index 9137fe77a..cd294b0d2 100644 --- a/08_HelloSwapchain/main.cpp +++ b/08_HelloSwapchain/main.cpp @@ -1,7 +1,7 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" // #include "nbl/video/surface/CSurfaceVulkan.h" diff --git a/09_GeometryCreator/CMakeLists.txt b/09_GeometryCreator/CMakeLists.txt index 928ef5761..2dd253226 100644 --- a/09_GeometryCreator/CMakeLists.txt +++ b/09_GeometryCreator/CMakeLists.txt @@ -2,5 +2,7 @@ set(NBL_INCLUDE_SERACH_DIRECTORIES "${CMAKE_CURRENT_SOURCE_DIR}/include" ) -nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") -LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} geometryCreatorSpirvBRD) \ No newline at end of file + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "" "") +# TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet +# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) \ No newline at end of file diff --git a/09_GeometryCreator/include/common.hpp b/09_GeometryCreator/include/common.hpp index 3661e5697..84cd8118a 100644 --- a/09_GeometryCreator/include/common.hpp +++ b/09_GeometryCreator/include/common.hpp @@ -1,20 +1,8 @@ -#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#include -#include "nbl/asset/utils/CGeometryCreator.h" -#include "SimpleWindowedApplication.hpp" -#include "InputSystem.hpp" -#include "CEventCallback.hpp" - -#include "CCamera.hpp" -#include "SBasicViewParameters.hlsl" - -#include "geometry/creator/spirv/builtin/CArchive.h" -#include "geometry/creator/spirv/builtin/builtinResources.h" - -#include "CGeomtryCreatorScene.hpp" +#include "nbl/examples/examples.hpp" using namespace nbl; using namespace core; @@ -24,6 +12,7 @@ using namespace asset; using namespace ui; using namespace video; using namespace scene; -using namespace geometrycreator; +using namespace nbl::examples; + #endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/09_GeometryCreator/main.cpp b/09_GeometryCreator/main.cpp index 4ac527e08..cb3c21f4d 100644 --- a/09_GeometryCreator/main.cpp +++ b/09_GeometryCreator/main.cpp @@ -1,146 +1,24 @@ -// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "common.hpp" - -class CSwapchainFramebuffersAndDepth final : public nbl::video::CDefaultSwapchainFramebuffers -{ - using base_t = CDefaultSwapchainFramebuffers; - -public: - template - inline CSwapchainFramebuffersAndDepth(ILogicalDevice* device, const asset::E_FORMAT _desiredDepthFormat, Args&&... args) : CDefaultSwapchainFramebuffers(device, std::forward(args)...) - { - const IPhysicalDevice::SImageFormatPromotionRequest req = { - .originalFormat = _desiredDepthFormat, - .usages = {IGPUImage::EUF_RENDER_ATTACHMENT_BIT} - }; - m_depthFormat = m_device->getPhysicalDevice()->promoteImageFormat(req, IGPUImage::TILING::OPTIMAL); - - const static IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { - {{ - { - .format = m_depthFormat, - .samples = IGPUImage::ESCF_1_BIT, - .mayAlias = false - }, - /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, - /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, - /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, // because we clear we don't care about contents - /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} // transition to presentation right away so we can skip a barrier - }}, - IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd - }; - m_params.depthStencilAttachments = depthAttachments; - - static IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { - m_params.subpasses[0], - IGPURenderpass::SCreationParams::SubpassesEnd - }; - subpasses[0].depthStencilAttachment.render = { .attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL }; - m_params.subpasses = subpasses; - } - -protected: - inline bool onCreateSwapchain_impl(const uint8_t qFam) override - { - auto device = const_cast(m_renderpass->getOriginDevice()); - const auto depthFormat = m_renderpass->getCreationParameters().depthStencilAttachments[0].format; - const auto& sharedParams = getSwapchain()->getCreationParameters().sharedParams; - auto image = device->createImage({ IImage::SCreationParams{ - .type = IGPUImage::ET_2D, - .samples = IGPUImage::ESCF_1_BIT, - .format = depthFormat, - .extent = {sharedParams.width,sharedParams.height,1}, - .mipLevels = 1, - .arrayLayers = 1, - .depthUsage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT - } }); - - device->allocate(image->getMemoryReqs(), image.get()); - - m_depthBuffer = device->createImageView({ - .flags = IGPUImageView::ECF_NONE, - .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, - .image = std::move(image), - .viewType = IGPUImageView::ET_2D, - .format = depthFormat, - .subresourceRange = {IGPUImage::EAF_DEPTH_BIT,0,1,0,1} - }); - - const auto retval = base_t::onCreateSwapchain_impl(qFam); - m_depthBuffer = nullptr; - return retval; - } - - inline smart_refctd_ptr createFramebuffer(IGPUFramebuffer::SCreationParams&& params) override - { - params.depthStencilAttachments = &m_depthBuffer.get(); - return m_device->createFramebuffer(std::move(params)); - } - - E_FORMAT m_depthFormat; - // only used to pass a parameter from `onCreateSwapchain_impl` to `createFramebuffer` - smart_refctd_ptr m_depthBuffer; -}; +#include "common.hpp" -class GeometryCreatorApp final : public examples::SimpleWindowedApplication +class GeometryCreatorApp final : public MonoWindowApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using clock_t = std::chrono::steady_clock; - - constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; - - // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers - constexpr static inline uint32_t MaxFramesInFlight = 3u; - - constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; public: - inline GeometryCreatorApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - virtual SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override - { - auto retval = device_base_t::getRequiredDeviceFeatures(); - retval.geometryShader = true; - return retval; - } - - inline core::vector getSurfaces() const override - { - if (!m_surface) - { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WIN_W; - params.height = WIN_H; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "GeometryCreatorApp"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } - - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); - } - - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; - - return {}; - } + GeometryCreatorApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_D16_UNORM, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} inline bool onAppInitialized(smart_refctd_ptr&& system) override { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; @@ -148,58 +26,7 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); - ISwapchain::SCreationParams swapchainParams = { .surface = m_surface->getSurface() }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); - - // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { - // wipe-transition of Color to ATTACHMENT_OPTIMAL - { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = { - // last place where the depth can get modified in previous frame - .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, - // destination needs to wait as early as possible - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, - // because of depth test needing a read and a write - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT - } - // leave view offsets and flags default - }, - // color from ATTACHMENT_OPTIMAL to PRESENT_SRC - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = { - // last place where the depth can get modified - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - // spec says nothing is needed when presentation is the destination - } - // leave view offsets and flags default - }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - // TODO: promote the depth format if D16 not supported, or quote the spec if there's guaranteed support for it - auto scResources = std::make_unique(m_device.get(), EF_D16_UNORM, swapchainParams.surfaceFormat.format, dependencies); - - auto* renderpass = scResources->getRenderpass(); - - if (!renderpass) - return logFail("Failed to create Renderpass!"); - - auto gQueue = getGraphicsQueue(); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - - auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); for (auto i = 0u; i < MaxFramesInFlight; i++) { if (!pool) @@ -208,80 +35,58 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication return logFail("Couldn't create Command Buffer!"); } - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); - - auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); - auto* geometry = assetManager->getGeometryCreator(); - - //using Builder = typename CScene::CreateResourcesDirectlyWithDevice::Builder; - using Builder = typename CScene::CreateResourcesWithAssetConverter::Builder; - auto oneRunCmd = CScene::createCommandBuffer(m_utils->getLogicalDevice(), m_utils->getLogger(), gQueue->getFamilyIndex()); - Builder builder(m_utils.get(), oneRunCmd.get(), m_logger.get(), geometry); - - // gpu resources - if (builder.build()) + const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()}; + m_scene = CGeometryCreatorScene::create( + { + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies + }, + CSimpleDebugRenderer::DefaultPolygonGeometryPatch // we want to use the vertex data through UTBs + ); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),scRes->getRenderpass(),0,{&geometries.front().get(),geometries.size()}); + if (!m_renderer || m_renderer->getGeometries().size() != geometries.size()) + return logFail("Could not create Renderer!"); + // special case { - if (!builder.finalize(resources, gQueue)) - m_logger->log("Could not finalize resource objects to gpu objects!", ILogger::ELL_ERROR); + const auto& pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto& name : m_scene->getInitParams().geometryNames) + { + if (name=="Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } } - else - m_logger->log("Could not build resource objects!", ILogger::ELL_ERROR); + m_renderer->m_instances.resize(1); + m_renderer->m_instances[0].world = float32_t3x4( + float32_t4(1,0,0,0), + float32_t4(0,1,0,0), + float32_t4(0,0,1,0) + ); // camera { core::vectorSIMDf cameraPosition(-5.81655884, 2.58630896, -4.23974705); core::vectorSIMDf cameraTarget(-0.349590302, -0.213266611, 0.317821503); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1, 10000); + matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(m_initialResolution.x)/float(m_initialResolution.y), 0.1, 10000); camera = Camera(cameraPosition, cameraTarget, projectionMatrix, 1.069f, 0.4f); } - m_winMgr->show(m_window.get()); - oracle.reportBeginFrameRecord(); - + onAppInitializedFinish(); return true; } - inline void workLoopBody() override + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = - { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } - - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - m_inputSystem->getDefaultMouse(&mouse); m_inputSystem->getDefaultKeyboard(&keyboard); - auto updatePresentationTimestamp = [&]() - { - m_currentImageAcquire = m_surface->acquireNextImage(); - - oracle.reportEndFrameRecord(); - const auto timestamp = oracle.getNextPresentationTimeStamp(); - oracle.reportBeginFrameRecord(); - - return timestamp; - }; - - const auto nextPresentationTimestamp = updatePresentationTimestamp(); - - if (!m_currentImageAcquire) - return; + const auto resourceIx = m_realFrameIx % device_base_t::MaxFramesInFlight; auto* const cb = m_cmdBufs.data()[resourceIx].get(); cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); @@ -292,40 +97,8 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); mouseProcess(events); }, m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); camera.endInputProcessing(nextPresentationTimestamp); - - const auto type = static_cast(gcIndex); - const auto& [gpu, meta] = resources.objects[type]; - - object.meta.type = type; - object.meta.name = meta.name; - } - - const auto viewMatrix = camera.getViewMatrix(); - const auto viewProjectionMatrix = camera.getConcatenatedMatrix(); - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - modelMatrix.setRotation(quaternion(0, 0, 0)); - - core::matrix3x4SIMD modelViewMatrix = core::concatenateBFollowedByA(viewMatrix, modelMatrix); - core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - - core::matrix3x4SIMD normalMatrix; - modelViewMatrix.getSub3x3InverseTranspose(normalMatrix); - - SBasicViewParameters uboData; - memcpy(uboData.MVP, modelViewProjectionMatrix.pointer(), sizeof(uboData.MVP)); - memcpy(uboData.MV, modelViewMatrix.pointer(), sizeof(uboData.MV)); - memcpy(uboData.NormalMat, normalMatrix.pointer(), sizeof(uboData.NormalMat)); - { - SBufferRange range; - range.buffer = core::smart_refctd_ptr(resources.ubo.buffer); - range.size = resources.ubo.buffer->getSize(); - - cb->updateBuffer(range, &uboData); } - auto* queue = getGraphicsQueue(); asset::SViewport viewport; { @@ -352,12 +125,12 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication .extent = {m_window->getWidth(),m_window->getHeight()} }; - const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; auto scRes = static_cast(m_surface->getSwapchainResources()); const IGPUCommandBuffer::SRenderpassBeginInfo info = { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), .colorClearValues = &clearValue, .depthStencilClearValues = &depthValue, .renderArea = currentRenderArea @@ -366,112 +139,120 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } - const auto& [hook, meta] = resources.objects[object.meta.type]; - auto* rawPipeline = hook.pipeline.get(); - - SBufferBinding vertex = hook.bindings.vertex, index = hook.bindings.index; - - cb->bindGraphicsPipeline(rawPipeline); - cb->bindDescriptorSets(EPBP_GRAPHICS, rawPipeline->getLayout(), 1, 1, &resources.descriptorSet.get()); - cb->bindVertexBuffers(0, 1, &vertex); - - if (index.buffer && hook.indexType != EIT_UNKNOWN) + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices { - cb->bindIndexBuffer(index, hook.indexType); - cb->drawIndexed(hook.indexCount, 1, 0, 0, 0); + memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); + memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); } - else - cb->draw(hook.indexCount, 1, 0, 0); + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); + + // tear down scene every frame + m_renderer->m_instances[0].packedGeo = m_renderer->getGeometries().data()+gcIndex; + m_renderer->render(cb,viewParams); cb->endRenderPass(); + cb->endDebugMarker(); cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = - { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS - } - }; + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { { - { - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = - { - {.cmdbuf = cb } - }; - - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffers, - .signalSemaphores = rendered - } - }; - - if (queue->submit(infos) == IQueue::RESULT::SUCCESS) - { - const nbl::video::ISemaphore::SWaitInfo waitInfos[] = - { { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx - } }; - - m_device->blockForSemaphores(waitInfos); // this is not solution, quick wa to not throw validation errors - } - else - --m_realFrameIx; - } + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE } - - std::string caption = "[Nabla Engine] Geometry Creator"; + }; + const IQueue::SSubmitInfo infos[] = + { { - caption += ", displaying [" + std::string(object.meta.name.data()) + "]"; - m_window->setCaption(caption); + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} } - m_surface->present(m_currentImageAcquire.imageIndex, rendered); - } - } + }; - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } - return true; + std::string caption = "[Nabla Engine] Geometry Creator"; + { + caption += ", displaying ["; + caption += m_scene->getInitParams().geometryNames[gcIndex]; + caption += "]"; + m_window->setCaption(caption); + } + return retval; } - - inline bool onAppTerminated() override + + protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override { - return device_base_t::onAppTerminated(); + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; } private: - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_renderer; + // smart_refctd_ptr m_semaphore; uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; - - core::smart_refctd_ptr m_inputSystem; + std::array,device_base_t::MaxFramesInFlight> m_cmdBufs; + // InputSystem::ChannelReader mouse; InputSystem::ChannelReader keyboard; + // Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - video::CDumbPresentationOracle oracle; - ResourcesBundle resources; - ObjectDrawHookCpu object; uint16_t gcIndex = {}; void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) @@ -480,8 +261,11 @@ class GeometryCreatorApp final : public examples::SimpleWindowedApplication { auto ev = *eventIt; - if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) - gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(ev.scrollEvent.verticalScroll)), int64_t(0), int64_t(OT_COUNT - (uint8_t)1u)); + if (ev.type==nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + { + gcIndex += int16_t(core::sign(ev.scrollEvent.verticalScroll)); + gcIndex = core::clamp(gcIndex,0ull,m_renderer->getGeometries().size()-1); + } } } }; diff --git a/10_CountingSort/app_resources/prefix_sum_shader.comp.hlsl b/10_CountingSort/app_resources/prefix_sum_shader.comp.hlsl index 1e5d2510e..b0301fc3f 100644 --- a/10_CountingSort/app_resources/prefix_sum_shader.comp.hlsl +++ b/10_CountingSort/app_resources/prefix_sum_shader.comp.hlsl @@ -4,6 +4,7 @@ [[vk::push_constant]] CountingPushData pushData; [numthreads(WorkgroupSize,1,1)] +[shader("compute")] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { sort::CountingParameters < uint32_t > params; diff --git a/10_CountingSort/app_resources/scatter_shader.comp.hlsl b/10_CountingSort/app_resources/scatter_shader.comp.hlsl index fa502726f..ddecfca2b 100644 --- a/10_CountingSort/app_resources/scatter_shader.comp.hlsl +++ b/10_CountingSort/app_resources/scatter_shader.comp.hlsl @@ -6,6 +6,7 @@ using DoublePtrAccessor = DoubleBdaAccessor; [numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_GroupThreadID, uint32_t3 GroupID : SV_GroupID) { sort::CountingParameters params; diff --git a/10_CountingSort/main.cpp b/10_CountingSort/main.cpp index 4d0c93516..d51650919 100644 --- a/10_CountingSort/main.cpp +++ b/10_CountingSort/main.cpp @@ -1,20 +1,21 @@ -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "CommonPCH/PCH.hpp" +#include "nbl/examples/examples.hpp" using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" -class CountingSortApp final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class CountingSortApp final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; public: // Yay thanks to multiple inheritance we cannot forward ctors anymore @@ -37,7 +38,7 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio const uint32_t bucket_count = std::min((uint32_t)3000, MaxBucketCount); const uint32_t elements_per_thread = ceil((float)ceil((float)element_count / limits.computeUnits) / WorkgroupSize); - auto prepShader = [&](const core::string& path) -> smart_refctd_ptr + auto prepShader = [&](const core::string& path) -> smart_refctd_ptr { // this time we load a shader directly from a file IAssetLoader::SAssetLoadParams lp = {}; @@ -51,7 +52,7 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio return nullptr; } - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(source); @@ -63,8 +64,8 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio WorkgroupSize, bucket_count ); - // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple - auto shader = m_device->createShader(overrideSource.get()); + // this time we skip the use of the asset converter since the IShader->IGPUShader path is quick and simple + auto shader = m_device->compileShader({ overrideSource.get() }); if (!shader) { logFail("Creation of Prefix Sum Shader from CPU Shader source failed!"); @@ -93,8 +94,8 @@ class CountingSortApp final : public application_templates::MonoDeviceApplicatio params.shader.shader = prefixSumShader.get(); params.shader.entryPoint = "main"; params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + params.shader.requiredSubgroupSize = static_cast(5); + params.cached.requireFullSubgroups = true; if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &prefixSumPipeline)) return logFail("Failed to create compute pipeline!\n"); params.shader.shader = scatterShader.get(); diff --git a/11_FFT/app_resources/shader.comp.hlsl b/11_FFT/app_resources/shader.comp.hlsl index ecbf4f092..7c86f50b4 100644 --- a/11_FFT/app_resources/shader.comp.hlsl +++ b/11_FFT/app_resources/shader.comp.hlsl @@ -14,13 +14,13 @@ uint32_t3 glsl::gl_WorkGroupSize() { return uint32_t3(uint32_t(ConstevalParamete struct SharedMemoryAccessor { - template + template void set(IndexType idx, AccessType value) { sharedmem[idx] = value; } - template + template void get(IndexType idx, NBL_REF_ARG(AccessType) value) { value = sharedmem[idx]; @@ -44,14 +44,14 @@ struct Accessor } // TODO: can't use our own BDA yet, because it doesn't support the types `workgroup::FFT` will invoke these templates with - template - void get(const uint32_t index, NBL_REF_ARG(AccessType) value) + template + void get(const IndexType index, NBL_REF_ARG(AccessType) value) { value = vk::RawBufferLoad(address + index * sizeof(AccessType)); } - template - void set(const uint32_t index, const AccessType value) + template + void set(const IndexType index, const AccessType value) { vk::RawBufferStore(address + index * sizeof(AccessType), value); } @@ -60,6 +60,7 @@ struct Accessor }; [numthreads(ConstevalParameters::WorkgroupSize,1,1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { Accessor accessor = Accessor::create(pushConstants.deviceBufferAddress); diff --git a/11_FFT/main.cpp b/11_FFT/main.cpp index 80f5f856c..1886da72a 100644 --- a/11_FFT/main.cpp +++ b/11_FFT/main.cpp @@ -3,17 +3,16 @@ // For conditions of distribution and use, see copyright notice in nabla.h -// I've moved out a tiny part of this example into a shared header for reuse, please open and read it. -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - +#include "nbl/examples/examples.hpp" using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; - +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" @@ -21,10 +20,10 @@ using namespace video; // Simple showcase of how to run FFT on a 1D array -class FFT_Test final : public application_templates::MonoDeviceApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class FFT_Test final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = application_templates::MonoDeviceApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; smart_refctd_ptr m_pipeline; @@ -46,13 +45,13 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ smart_refctd_ptr m_timeline; uint64_t semaphorValue = 0; - inline core::smart_refctd_ptr createShader( + inline core::smart_refctd_ptr createShader( const char* includeMainName) { std::string prelude = "#include \""; - auto CPUShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_SHADER_STAGE::ESS_COMPUTE, IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); - assert(CPUShader); - return m_device->createShader(CPUShader.get()); + auto hlslShader = core::make_smart_refctd_ptr((prelude + includeMainName + "\"\n").c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); + assert(hlslShader); + return m_device->compileShader({ hlslShader.get() }); } public: @@ -70,7 +69,7 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ return false; // this time we load a shader directly from a file - smart_refctd_ptr shader; + smart_refctd_ptr shader; /* { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -81,14 +80,14 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ return logFail("Could not load shader!"); // Cast down the asset to its proper type - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(source); - // Compile directly to IGPUShader - shader = m_device->createShader(source.get()); + // Compile directly to SPIR-V Shader + shader = m_device->compileShader({ source.get() }); if (!shader) - return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); + return logFail("Creation of a SPIR-V Shader from HLSL Shader source failed!"); }*/ shader = createShader("app_resources/shader.comp.hlsl"); @@ -96,7 +95,7 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ constexpr uint32_t DownstreamBufferSize = sizeof(scalar_t) << 23; constexpr uint32_t UpstreamBufferSize = sizeof(scalar_t) << 23; - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger), DownstreamBufferSize, UpstreamBufferSize); if (!m_utils) return logFail("Failed to create Utilities!"); m_upStreamingBuffer = m_utils->getDefaultUpStreamingBuffer(); @@ -132,8 +131,9 @@ class FFT_Test final : public application_templates::MonoDeviceApplication, publ IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); params.shader.shader = shader.get(); - params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)); - params.shader.requireFullSubgroups = true; + params.shader.entryPoint = "main"; + params.shader.requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)); + params.cached.requireFullSubgroups = true; if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) return logFail("Failed to create compute pipeline!\n"); } diff --git a/12_MeshLoaders/CMakeLists.txt b/12_MeshLoaders/CMakeLists.txt new file mode 100644 index 000000000..d2ea26ef5 --- /dev/null +++ b/12_MeshLoaders/CMakeLists.txt @@ -0,0 +1,21 @@ +set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" +) +set(NBL_LIBRARIES) + +if (NBL_BUILD_MITSUBA_LOADER) + list(APPEND NBL_INCLUDE_SERACH_DIRECTORIES + "${NBL_EXT_MITSUBA_LOADER_INCLUDE_DIRS}" + ) + list(APPEND NBL_LIBRARIES + "${NBL_EXT_MITSUBA_LOADER_LIB}" + ) +endif() + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? +nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") +# TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet +# LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) + +add_dependencies(${EXECUTABLE_NAME} argparse) +target_include_directories(${EXECUTABLE_NAME} PUBLIC $) \ No newline at end of file diff --git a/12_MeshLoaders/README.md b/12_MeshLoaders/README.md new file mode 100644 index 000000000..6330f4673 --- /dev/null +++ b/12_MeshLoaders/README.md @@ -0,0 +1,2 @@ +https://github.com/user-attachments/assets/6f779700-e6d4-4e11-95fb-7a7fddc47255 + diff --git a/06_MeshLoaders/config.json.template b/12_MeshLoaders/config.json.template similarity index 100% rename from 06_MeshLoaders/config.json.template rename to 12_MeshLoaders/config.json.template diff --git a/12_MeshLoaders/include/common.hpp b/12_MeshLoaders/include/common.hpp new file mode 100644 index 000000000..84cd8118a --- /dev/null +++ b/12_MeshLoaders/include/common.hpp @@ -0,0 +1,18 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + + +#include "nbl/examples/examples.hpp" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace scene; +using namespace nbl::examples; + + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/12_MeshLoaders/main.cpp b/12_MeshLoaders/main.cpp new file mode 100644 index 000000000..d80fa8998 --- /dev/null +++ b/12_MeshLoaders/main.cpp @@ -0,0 +1,505 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "argparse/argparse.hpp" +#include "common.hpp" + +#include "../3rdparty/portable-file-dialogs/portable-file-dialogs.h" + +#ifdef NBL_BUILD_MITSUBA_LOADER +#include "nbl/ext/MitsubaLoader/CSerializedLoader.h" +#endif + +class MeshLoadersApp final : public MonoWindowApplication, public BuiltinResourcesApplication +{ + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; + +public: + inline MeshLoadersApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({ 1280,720 }, EF_D32_SFLOAT, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; +#ifdef NBL_BUILD_MITSUBA_LOADER + m_assetMgr->addAssetLoader(make_smart_refctd_ptr()); +#endif + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + m_saveGeomPrefixPath = localOutputCWD / "saved"; + + // parse args + argparse::ArgumentParser parser("12_meshloaders"); + parser.add_argument("--savegeometry") + .help("Save the mesh on exit or reload") + .flag(); + + parser.add_argument("--savepath") + .nargs(1) + .help("Specify the file to which the mesh will be saved"); + + try + { + parser.parse_args({ argv.data(), argv.data() + argv.size() }); + } + catch (const std::exception& e) + { + return logFail(e.what()); + } + + if (parser["--savegeometry"] == true) + m_saveGeom = true; + + if (parser.present("--savepath")) + { + auto tmp = path(parser.get("--savepath")); + + if (tmp.empty() || !tmp.has_filename()) + return logFail("Invalid path has been specified in --savepath argument"); + + if (!std::filesystem::exists(tmp.parent_path())) + return logFail("Path specified in --savepath argument doesn't exist"); + + m_specifiedGeomSavePath.emplace(std::move(tmp.generic_string())); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i,1 })) + return logFail("Couldn't create Command Buffer!"); + } + + + auto scRes = static_cast(m_surface->getSwapchainResources()); + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(), scRes->getRenderpass(), 0, {}); + if (!m_renderer) + return logFail("Failed to create renderer!"); + + // + if (!reloadModel()) + return false; + + camera.mapKeysToArrows(); + + onAppInitializedFinish(); + return true; + } + + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + { + // begin renderpass + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + auto* framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex); + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {1.f,0.f,1.f,1.f} }; + const IGPUCommandBuffer::SClearDepthStencilValue depthValue = { .depth = 0.f }; + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {framebuffer->getCreationParameters().width,framebuffer->getCreationParameters().height} + }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = framebuffer, + .colorClearValues = &clearValue, + .depthStencilClearValues = &depthValue, + .renderArea = currentRenderArea + }; + cb->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + const SViewport viewport = { + .x = static_cast(currentRenderArea.offset.x), + .y = static_cast(currentRenderArea.offset.y), + .width = static_cast(currentRenderArea.extent.width), + .height = static_cast(currentRenderArea.extent.height) + }; + cb->setViewport(0u, 1u, &viewport); + + cb->setScissor(0u, 1u, ¤tRenderArea); + } + // late latch input + { + bool reload = false; + camera.beginInputProcessing(nextPresentationTimestamp); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + for (const auto& event : events) + if (event.keyCode == E_KEY_CODE::EKC_R && event.action == SKeyboardEvent::ECA_RELEASED) + reload = true; + camera.keyboardProcess(events); + }, + m_logger.get() + ); + camera.endInputProcessing(nextPresentationTimestamp); + if (reload) + reloadModel(); + } + // draw scene + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + memcpy(&viewMatrix, camera.getViewMatrix().pointer(), sizeof(viewMatrix)); + memcpy(&viewProjMatrix, camera.getConcatenatedMatrix().pointer(), sizeof(viewProjMatrix)); + } + m_renderer->render(cb, CSimpleDebugRenderer::SViewParams(viewMatrix, viewProjMatrix)); + } + cb->endRenderPass(); + } + cb->end(); + + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } + + std::string caption = "[Nabla Engine] Mesh Loaders"; + { + caption += ", displaying ["; + caption += m_modelPath; + caption += "]"; + m_window->setCaption(caption); + } + return retval; + } + + inline bool onAppTerminated() override + { + if (m_saveGeomTaskFuture.valid()) + { + m_logger->log("Waiting for geometry writer to finish writing...", ILogger::ELL_INFO); + m_saveGeomTaskFuture.wait(); + } + + return device_base_t::onAppTerminated(); + } + +protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, hence its important to have External Dependencies which prevent users of the depth attachment overlapping. + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT, + // don't want any writes to be available, we'll clear + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // color from ATTACHMENT_OPTIMAL to PRESENT_SRC + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } + +private: + // TODO: standardise this across examples, and take from `argv` + bool m_nonInteractiveTest = false; + + bool reloadModel() + { + if (m_nonInteractiveTest) // TODO: maybe also take from argv and argc + m_modelPath = (sharedInputCWD / "ply/Spanner-ply.ply").string(); + else + { + pfd::open_file file("Choose a supported Model File", sharedInputCWD.string(), + { + "All Supported Formats", "*.ply *.stl *.serialized *.obj", + "TODO (.ply)", "*.ply", + "TODO (.stl)", "*.stl", + "Mitsuba 0.6 Serialized (.serialized)", "*.serialized", + "Wavefront Object (.obj)", "*.obj" + }, + false + ); + if (file.result().empty()) + return false; + m_modelPath = file.result()[0]; + } + + // free up + m_renderer->m_instances.clear(); + m_renderer->clearGeometries({ .semaphore = m_semaphore.get(),.value = m_realFrameIx }); + m_assetMgr->clearAllAssetCache(); + + //! load the geometry + IAssetLoader::SAssetLoadParams params = {}; + params.logger = m_logger.get(); + auto asset = m_assetMgr->getAsset(m_modelPath, params); + if (asset.getContents().empty()) + return false; + + // + core::vector> geometries; + switch (asset.getAssetType()) + { + case IAsset::E_TYPE::ET_GEOMETRY: + for (const auto& item : asset.getContents()) + if (auto polyGeo = IAsset::castDown(item); polyGeo) + geometries.push_back(polyGeo); + break; + default: + m_logger->log("Asset loaded but not a supported type (ET_GEOMETRY,ET_GEOMETRY_COLLECTION)", ILogger::ELL_ERROR); + break; + } + if (geometries.empty()) + return false; + + if (m_saveGeom) + { + if (m_saveGeomTaskFuture.valid()) + { + m_logger->log("Waiting for previous geometry saving task to complete...", ILogger::ELL_INFO); + m_saveGeomTaskFuture.wait(); + } + + std::string currentGeomSavePath = m_specifiedGeomSavePath.value_or((m_saveGeomPrefixPath / path(m_modelPath).filename()).generic_string()); + m_saveGeomTaskFuture = std::async( + std::launch::async, + [this, geometries, currentGeomSavePath] { writeGeometry( + geometries[0], + currentGeomSavePath + ); } + ); + } + + using aabb_t = hlsl::shapes::AABB<3, double>; + auto printAABB = [&](const aabb_t& aabb, const char* extraMsg = "")->void + { + m_logger->log("%s AABB is (%f,%f,%f) -> (%f,%f,%f)", ILogger::ELL_INFO, extraMsg, aabb.minVx.x, aabb.minVx.y, aabb.minVx.z, aabb.maxVx.x, aabb.maxVx.y, aabb.maxVx.z); + }; + auto bound = aabb_t::create(); + // convert the geometries + { + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get() }); + + const auto transferFamily = getTransferUpQueue()->getFamilyIndex(); + + struct SInputs : CAssetConverter::SInputs + { + virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const + { + return sharedBufferOwnership; + } + + core::vector sharedBufferOwnership; + } inputs = {}; + core::vector> patches(geometries.size(), CSimpleDebugRenderer::DefaultPolygonGeometryPatch); + { + inputs.logger = m_logger.get(); + std::get>(inputs.assets) = { &geometries.front().get(),geometries.size() }; + std::get>(inputs.patches) = patches; + // set up shared ownership so we don't have to + core::unordered_set families; + families.insert(transferFamily); + families.insert(getGraphicsQueue()->getFamilyIndex()); + if (families.size() > 1) + for (const auto fam : families) + inputs.sharedBufferOwnership.push_back(fam); + } + + // reserve + auto reservation = converter->reserve(inputs); + if (!reservation) + { + m_logger->log("Failed to reserve GPU objects for CPU->GPU conversion!", ILogger::ELL_ERROR); + return false; + } + + // convert + { + auto semaphore = m_device->createSemaphore(0u); + + constexpr auto MultiBuffering = 2; + std::array, MultiBuffering> commandBuffers = {}; + { + auto pool = m_device->createCommandPool(transferFamily, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, commandBuffers, smart_refctd_ptr(m_logger)); + } + commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + std::array commandBufferSubmits; + for (auto i = 0; i < MultiBuffering; i++) + commandBufferSubmits[i].cmdbuf = commandBuffers[i].get(); + + SIntendedSubmitInfo transfer = {}; + transfer.queue = getTransferUpQueue(); + transfer.scratchCommandBuffers = commandBufferSubmits; + transfer.scratchSemaphore = { + .semaphore = semaphore.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + + CAssetConverter::SConvertParams cpar = {}; + cpar.utilities = m_utils.get(); + cpar.transfer = &transfer; + + // basically it records all data uploads and submits them right away + auto future = reservation.convert(cpar); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + } + + auto tmp = hlsl::float32_t4x3( + hlsl::float32_t3(1, 0, 0), + hlsl::float32_t3(0, 1, 0), + hlsl::float32_t3(0, 0, 1), + hlsl::float32_t3(0, 0, 0) + ); + core::vector worldTforms; + const auto& converted = reservation.getGPUObjects(); + for (const auto& geom : converted) + { + const auto promoted = geom.value->getAABB(); + printAABB(promoted, "Geometry"); + tmp[3].x += promoted.getExtent().x; + const auto promotedWorld = hlsl::float64_t3x4(worldTforms.emplace_back(hlsl::transpose(tmp))); + const auto transformed = hlsl::shapes::util::transform(promotedWorld, promoted); + printAABB(transformed, "Transformed"); + bound = hlsl::shapes::util::union_(transformed, bound); + } + printAABB(bound, "Total"); + if (!m_renderer->addGeometries({ &converted.front().get(),converted.size() })) + return false; + + auto worlTformsIt = worldTforms.begin(); + for (const auto& geo : m_renderer->getGeometries()) + m_renderer->m_instances.push_back({ + .world = *(worlTformsIt++), + .packedGeo = &geo + }); + } + + // get scene bounds and reset camera + { + const double distance = 0.05; + const auto diagonal = bound.getExtent(); + { + const auto measure = hlsl::length(diagonal); + const auto aspectRatio = float(m_window->getWidth()) / float(m_window->getHeight()); + camera.setProjectionMatrix(core::matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(1.2f, aspectRatio, distance * measure * 0.1, measure * 4.0)); + camera.setMoveSpeed(measure * 0.04); + } + const auto pos = bound.maxVx + diagonal * distance; + camera.setPosition(vectorSIMDf(pos.x, pos.y, pos.z)); + const auto center = (bound.minVx + bound.maxVx) * 0.5; + camera.setTarget(vectorSIMDf(center.x, center.y, center.z)); + } + + // TODO: write out the geometry + + return true; + } + + void writeGeometry(smart_refctd_ptr geometry, const std::string& savePath) + { + IAsset* assetPtr = const_cast(static_cast(geometry.get())); + IAssetWriter::SAssetWriteParams params{ assetPtr }; + m_logger->log("Saving mesh to %s", ILogger::ELL_INFO, savePath.c_str()); + if (!m_assetMgr->writeAsset(savePath, params)) + m_logger->log("Failed to save %s", ILogger::ELL_ERROR, savePath.c_str()); + m_logger->log("Mesh successfully saved!", ILogger::ELL_INFO); + } + + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + // + smart_refctd_ptr m_renderer; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + std::string m_modelPath; + + bool m_saveGeom = false; + std::future m_saveGeomTaskFuture; + std::optional m_specifiedGeomSavePath; + nbl::system::path m_saveGeomPrefixPath; +}; + +NBL_MAIN_FUNC(MeshLoadersApp) \ No newline at end of file diff --git a/old_to_refactor/03_GPU_Mesh/pipeline.groovy b/12_MeshLoaders/pipeline.groovy similarity index 88% rename from old_to_refactor/03_GPU_Mesh/pipeline.groovy rename to 12_MeshLoaders/pipeline.groovy index b19625fa7..7b7c9702a 100644 --- a/old_to_refactor/03_GPU_Mesh/pipeline.groovy +++ b/12_MeshLoaders/pipeline.groovy @@ -2,9 +2,9 @@ import org.DevshGraphicsProgramming.Agent import org.DevshGraphicsProgramming.BuilderInfo import org.DevshGraphicsProgramming.IBuilder -class CGPUMeshBuilder extends IBuilder +class CUIBuilder extends IBuilder { - public CGPUMeshBuilder(Agent _agent, _info) + public CUIBuilder(Agent _agent, _info) { super(_agent, _info) } @@ -44,7 +44,7 @@ class CGPUMeshBuilder extends IBuilder def create(Agent _agent, _info) { - return new CGPUMeshBuilder(_agent, _info) + return new CUIBuilder(_agent, _info) } return this \ No newline at end of file diff --git a/21_LRUCacheUnitTest/main.cpp b/21_LRUCacheUnitTest/main.cpp index 1c63fc744..467c6d4e4 100644 --- a/21_LRUCacheUnitTest/main.cpp +++ b/21_LRUCacheUnitTest/main.cpp @@ -5,6 +5,7 @@ // I've moved out a tiny part of this example into a shared header for reuse, please open and read it. #include "nbl/application_templates/MonoSystemMonoLoggerApplication.hpp" +#include using namespace nbl; using namespace core; @@ -180,6 +181,38 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL cache3.insert(1, "bar"); cache3.clear(); + // Cache iterator test + constexpr uint32_t cache4Size = 10; + ResizableLRUCache cache4(cache4Size); + for (auto i = 0u; i < cache4Size; i++) + { + cache4.insert(i, i); + } + // Default iterator is MRU -> LRU + uint32_t counter = cache4Size - 1; + for (auto& pair : cache4) + { + assert(pair.first == counter && pair.second == counter); + counter--; + } + // Reverse LRU -> MRU traversal + counter = 0u; + for (auto it = cache4.crbegin(); it != cache4.crend(); it++) + { + assert(it->first == counter && it->second == counter); + counter++; + } + + // Cache copy test + ResizableLRUCache cache4Copy(cache4); + for (auto it = cache4.cbegin(), itCopy = cache4Copy.cbegin(); it != cache4.cend(); it++, itCopy++) + { + assert(*it == *itCopy); + // Assert deep copy + assert(it.operator->() != itCopy.operator->()); + + } + // Besides the disposal function that gets called when evicting, we need to check that the Cache properly destroys all resident `Key,Value` pairs when destroyed struct Foo { @@ -208,15 +241,13 @@ class LRUCacheTestApp final : public nbl::application_templates::MonoSystemMonoL int destroyCounter = 0; { - ResizableLRUCache cache4(10u); + ResizableLRUCache cache5(10u); for (int i = 0; i < 10; i++) - cache4.insert(i, Foo(&destroyCounter)); + cache5.insert(i, Foo(&destroyCounter)); int x = 0; } - assert(destroyCounter == 10); - m_logger->log("all good"); m_textureLRUCache = std::unique_ptr(new TextureLRUCache(1024u)); diff --git a/22_CppCompat/CIntrinsicsTester.h b/22_CppCompat/CIntrinsicsTester.h index 77aa2c1ca..d053977c0 100644 --- a/22_CppCompat/CIntrinsicsTester.h +++ b/22_CppCompat/CIntrinsicsTester.h @@ -1,12 +1,13 @@ #ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_C_INTRINSICS_TESTER_INCLUDED_ #define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_C_INTRINSICS_TESTER_INCLUDED_ -#include + +#include "nbl/examples/examples.hpp" + #include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "ITester.h" + using namespace nbl; class CIntrinsicsTester final : public ITester diff --git a/22_CppCompat/CTgmathTester.h b/22_CppCompat/CTgmathTester.h index 6d2b23c73..63b0e483e 100644 --- a/22_CppCompat/CTgmathTester.h +++ b/22_CppCompat/CTgmathTester.h @@ -1,12 +1,13 @@ #ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_C_TGMATH_TESTER_INCLUDED_ #define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_C_TGMATH_TESTER_INCLUDED_ -#include + +#include "nbl/examples/examples.hpp" + #include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "ITester.h" + using namespace nbl; class CTgmathTester final : public ITester diff --git a/22_CppCompat/ITester.h b/22_CppCompat/ITester.h index a216fbf40..4ecd522b9 100644 --- a/22_CppCompat/ITester.h +++ b/22_CppCompat/ITester.h @@ -1,10 +1,12 @@ #ifndef _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ #define _NBL_EXAMPLES_TESTS_22_CPP_COMPAT_I_TESTER_INCLUDED_ -#include + +#include "nbl/examples/examples.hpp" + #include "app_resources/common.hlsl" -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" +#include "nbl/asset/metadata/CHLSLMetadata.h" + using namespace nbl; @@ -45,14 +47,15 @@ class ITester logFail("Failed to create Command Buffers!\n"); // Load shaders, set up pipeline - core::smart_refctd_ptr shader; + core::smart_refctd_ptr shader; + auto shaderStage = ESS_UNKNOWN; { asset::IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); lp.workingDirectory = ""; // virtual root auto assetBundle = m_assetMgr->getAsset(pipleineSetupData.testShaderPath, lp); const auto assets = assetBundle.getContents(); - if (assets.empty()) + if (assets.empty() || assetBundle.getAssetType() != asset::IAsset::ET_SHADER) { logFail("Could not load shader!"); assert(0); @@ -60,24 +63,22 @@ class ITester // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + core::smart_refctd_ptr source = asset::IAsset::castDown(assets[0]); + const auto hlslMetadata = static_cast(assetBundle.getMetadata()); + shaderStage = hlslMetadata->shaderStages->front(); auto* compilerSet = m_assetMgr->getCompilerSet(); asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = source->getStage(); - options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.stage = shaderStage; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; options.spirvOptimizer = nullptr; options.debugInfoFlags |= asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); options.preprocessorOptions.logger = m_logger.get(); options.preprocessorOptions.includeFinder = compilerSet->getShaderCompiler(source->getContentType())->getDefaultIncludeFinder(); - auto spirv = compilerSet->compileToSPIRV(source.get(), options); - - video::ILogicalDevice::SShaderCreationParameters params{}; - params.cpushader = spirv.get(); - shader = m_device->createShader(params); + shader = compilerSet->compileToSPIRV(source.get(), options); } if (!shader) diff --git a/22_CppCompat/main.cpp b/22_CppCompat/main.cpp index 7fa2556c4..70c8d7b3a 100644 --- a/22_CppCompat/main.cpp +++ b/22_CppCompat/main.cpp @@ -1,26 +1,26 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include -#include -#include -#include -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" #include "app_resources/common.hlsl" #include "CTgmathTester.h" #include "CIntrinsicsTester.h" +#include +#include +#include + + +using namespace nbl; using namespace nbl::core; using namespace nbl::hlsl; using namespace nbl::system; using namespace nbl::asset; +using namespace nbl::ui; using namespace nbl::video; -using namespace nbl::application_templates; - +using namespace nbl::examples; //using namespace glm; @@ -43,10 +43,10 @@ struct T float32_t4 h; }; -class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetManagerAndBuiltinResourceApplication +class CompatibilityTest final : public application_templates::MonoDeviceApplication, public BuiltinResourcesApplication { - using device_base_t = MonoDeviceApplication; - using asset_base_t = MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = application_templates::MonoDeviceApplication; + using asset_base_t = BuiltinResourcesApplication; public: CompatibilityTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} @@ -84,7 +84,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa m_commandPool = m_device->createCommandPool(m_queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); m_commandPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &m_cmdbuf,1 }, smart_refctd_ptr(m_logger)); - smart_refctd_ptr shader; + smart_refctd_ptr shader; { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -94,14 +94,12 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa if (assets.empty()) return logFail("Could not load shader!"); - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(source); - assert(source->getStage() == IShader::E_SHADER_STAGE::ESS_COMPUTE); // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple - shader = m_device->createShader(source.get()); + shader = m_device->compileShader({ source.get() }); if (!shader) return logFail("Creation of a GPU Shader to from CPU Shader source failed!"); } @@ -129,6 +127,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa IGPUComputePipeline::SCreationParams params = {}; params.layout = layout.get(); params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; if (!m_device->createComputePipelines(nullptr, { ¶ms,1 }, &m_pipeline)) return logFail("Failed to create compute pipeline!\n"); } diff --git a/23_Arithmetic2UnitTest/CMakeLists.txt b/23_Arithmetic2UnitTest/CMakeLists.txt new file mode 100644 index 000000000..a18b7a8c0 --- /dev/null +++ b/23_Arithmetic2UnitTest/CMakeLists.txt @@ -0,0 +1,15 @@ +include(common) + +nbl_create_executable_project("" "" "" "") + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin + TARGET ${EXECUTABLE_NAME}_builtins + LINK_TO ${EXECUTABLE_NAME} + BIND app_resources + BUILTINS + common.hlsl + shaderCommon.hlsl + testSubgroup.comp.hlsl + testWorkgroup.comp.hlsl +) \ No newline at end of file diff --git a/23_ArithmeticUnitTest/app_resources/common.hlsl b/23_Arithmetic2UnitTest/app_resources/common.hlsl similarity index 89% rename from 23_ArithmeticUnitTest/app_resources/common.hlsl rename to 23_Arithmetic2UnitTest/app_resources/common.hlsl index 10892a2b9..6654645cf 100644 --- a/23_ArithmeticUnitTest/app_resources/common.hlsl +++ b/23_Arithmetic2UnitTest/app_resources/common.hlsl @@ -1,15 +1,14 @@ #include "nbl/builtin/hlsl/cpp_compat.hlsl" #include "nbl/builtin/hlsl/functional.hlsl" -template -struct Output +struct PushConstantData { - NBL_CONSTEXPR_STATIC_INLINE uint32_t ScanElementCount = kScanElementCount; - - uint32_t subgroupSize; - uint32_t data[ScanElementCount]; + uint64_t pInputBuf; + uint64_t pOutputBuf[8]; }; +namespace arithmetic +{ // Thanks to our unified HLSL/C++ STD lib we're able to remove a whole load of code template struct bit_and : nbl::hlsl::bit_and @@ -92,5 +91,6 @@ struct ballot : nbl::hlsl::plus static inline constexpr const char* name = "bitcount"; #endif }; +} -#include "nbl/builtin/hlsl/subgroup/basic.hlsl" \ No newline at end of file +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" diff --git a/23_Arithmetic2UnitTest/app_resources/shaderCommon.hlsl b/23_Arithmetic2UnitTest/app_resources/shaderCommon.hlsl new file mode 100644 index 000000000..5baf9a28d --- /dev/null +++ b/23_Arithmetic2UnitTest/app_resources/shaderCommon.hlsl @@ -0,0 +1,19 @@ +#include "app_resources/common.hlsl" + +using namespace nbl; +using namespace hlsl; + +[[vk::push_constant]] PushConstantData pc; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +#ifndef OPERATION +#error "Define OPERATION!" +#endif diff --git a/23_Arithmetic2UnitTest/app_resources/testSubgroup.comp.hlsl b/23_Arithmetic2UnitTest/app_resources/testSubgroup.comp.hlsl new file mode 100644 index 000000000..de1e813f1 --- /dev/null +++ b/23_Arithmetic2UnitTest/app_resources/testSubgroup.comp.hlsl @@ -0,0 +1,55 @@ +#pragma shader_stage(compute) + +#define operation_t nbl::hlsl::OPERATION + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl" + +#include "app_resources/shaderCommon.hlsl" +#include "nbl/builtin/hlsl/workgroup2/basic.hlsl" + +template +using params_t = SUBGROUP_CONFIG_T; + +typedef vector::base_t, device_capabilities>::ItemsPerInvocation> type_t; + +uint32_t globalIndex() +{ + return glsl::gl_WorkGroupID().x*WORKGROUP_SIZE+workgroup::SubgroupContiguousIndex(); +} + +template +static void subtest(NBL_CONST_REF_ARG(type_t) sourceVal) +{ + const uint64_t outputBufAddr = pc.pOutputBuf[Binop::BindingIndex]; + + assert(glsl::gl_SubgroupSize() == params_t::config_t::Size) + + operation_t > func; + type_t val = func(sourceVal); + + vk::RawBufferStore(outputBufAddr + sizeof(type_t) * globalIndex(), val, sizeof(uint32_t)); +} + +type_t test() +{ + const uint32_t idx = globalIndex(); + type_t sourceVal = vk::RawBufferLoad(pc.pInputBuf + idx * sizeof(type_t)); + + subtest >(sourceVal); + subtest >(sourceVal); + subtest >(sourceVal); + subtest >(sourceVal); + subtest >(sourceVal); + subtest >(sourceVal); + subtest >(sourceVal); + return sourceVal; +} + +[numthreads(WORKGROUP_SIZE,1,1)] +void main() +{ + test(); +} diff --git a/23_Arithmetic2UnitTest/app_resources/testWorkgroup.comp.hlsl b/23_Arithmetic2UnitTest/app_resources/testWorkgroup.comp.hlsl new file mode 100644 index 000000000..664e2f472 --- /dev/null +++ b/23_Arithmetic2UnitTest/app_resources/testWorkgroup.comp.hlsl @@ -0,0 +1,75 @@ +#pragma shader_stage(compute) + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" + +using config_t = WORKGROUP_CONFIG_T; + +#include "app_resources/shaderCommon.hlsl" + +typedef vector type_t; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared uint32_t scratch[mpl::max_v]; + +#include "nbl/examples/workgroup/DataAccessors.hlsl" +using namespace nbl::hlsl::examples::workgroup; + +static ScratchProxy arithmeticAccessor; + +template +struct operation_t +{ + using binop_base_t = typename Binop::base_t; + using otype_t = typename Binop::type_t; + + // workgroup reduction returns the value of the reduction + // workgroup scans do no return anything, but use the data accessor to do the storing directly + void operator()() + { + using data_proxy_t = PreloadedDataProxy; + data_proxy_t dataAccessor = data_proxy_t::create(pc.pInputBuf, pc.pOutputBuf[Binop::BindingIndex]); + dataAccessor.preload(); +#if IS_REDUCTION + otype_t value = +#endif + OPERATION::template __call(dataAccessor,arithmeticAccessor); + // we barrier before because we alias the accessors for Binop + arithmeticAccessor.workgroupExecutionAndMemoryBarrier(); +#if IS_REDUCTION + [unroll] + for (uint32_t i = 0; i < data_proxy_t::PreloadedDataCount; i++) + dataAccessor.preloaded[i] = value; +#endif + dataAccessor.unload(); + } +}; + + +template +static void subtest() +{ + assert(glsl::gl_SubgroupSize() == config_t::SubgroupSize) + + operation_t func; + func(); +} + +void test() +{ + subtest >(); + subtest >(); + subtest >(); + subtest >(); + subtest >(); + subtest >(); + subtest >(); +} + +[numthreads(config_t::WorkgroupSize,1,1)] +void main() +{ + test(); +} \ No newline at end of file diff --git a/23_ArithmeticUnitTest/config.json.template b/23_Arithmetic2UnitTest/config.json.template similarity index 100% rename from 23_ArithmeticUnitTest/config.json.template rename to 23_Arithmetic2UnitTest/config.json.template diff --git a/23_Arithmetic2UnitTest/main.cpp b/23_Arithmetic2UnitTest/main.cpp new file mode 100644 index 000000000..8d70547bc --- /dev/null +++ b/23_Arithmetic2UnitTest/main.cpp @@ -0,0 +1,509 @@ +// TODO: copyright notice + + +#include "nbl/examples/examples.hpp" + +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl" + + +using namespace nbl; +using namespace core; +using namespace asset; +using namespace system; +using namespace video; + +// method emulations on the CPU, to verify the results of the GPU methods +template +struct emulatedReduction +{ + using type_t = typename Binop::type_t; + + static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) + { + const type_t red = std::reduce(in,in+itemCount,Binop::identity,Binop()); + std::fill(out,out+itemCount,red); + } + + static inline constexpr const char* name = "reduction"; +}; +template +struct emulatedScanInclusive +{ + using type_t = typename Binop::type_t; + + static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) + { + std::inclusive_scan(in,in+itemCount,out,Binop()); + } + static inline constexpr const char* name = "inclusive_scan"; +}; +template +struct emulatedScanExclusive +{ + using type_t = typename Binop::type_t; + + static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) + { + std::exclusive_scan(in,in+itemCount,out,Binop::identity,Binop()); + } + static inline constexpr const char* name = "exclusive_scan"; +}; + +class Workgroup2ScanTestApp final : public application_templates::BasicMultiQueueApplication, public examples::BuiltinResourcesApplication +{ + using device_base_t = application_templates::BasicMultiQueueApplication; + using asset_base_t = examples::BuiltinResourcesApplication; + +public: + Workgroup2ScanTestApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + if (!device_base_t::onAppInitialized(std::move(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + transferDownQueue = getTransferDownQueue(); + computeQueue = getComputeQueue(); + + // TODO: get the element count from argv + const uint32_t elementCount = 1024 * 1024; + // populate our random data buffer on the CPU and create a GPU copy + inputData = new uint32_t[elementCount]; + smart_refctd_ptr gpuinputDataBuffer; + { + std::mt19937 randGenerator(0xdeadbeefu); + for (uint32_t i = 0u; i < elementCount; i++) + inputData[i] = randGenerator(); // TODO: change to using xoroshiro, then we can skip having the input buffer at all + + IGPUBuffer::SCreationParams inputDataBufferCreationParams = {}; + inputDataBufferCreationParams.size = sizeof(uint32_t) * elementCount; + inputDataBufferCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + m_utils->createFilledDeviceLocalBufferOnDedMem( + SIntendedSubmitInfo{.queue=getTransferUpQueue()}, + std::move(inputDataBufferCreationParams), + inputData + ).move_into(gpuinputDataBuffer); + } + + // create 8 buffers for 8 operations + for (auto i=0u; igetSize(); + params.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_SRC_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + + outputBuffers[i] = m_device->createBuffer(std::move(params)); + auto mreq = outputBuffers[i]->getMemoryReqs(); + mreq.memoryTypeBits &= m_physicalDevice->getDeviceLocalMemoryTypeBits(); + assert(mreq.memoryTypeBits); + + auto bufferMem = m_device->allocate(mreq, outputBuffers[i].get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + assert(bufferMem.isValid()); + } + pc.pInputBuf = gpuinputDataBuffer->getDeviceAddress(); + for (uint32_t i = 0; i < OutputBufferCount; i++) + pc.pOutputBuf[i] = outputBuffers[i]->getDeviceAddress(); + + // create Pipeline Layout + { + SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0,.size = sizeof(PushConstantData) }; + pipelineLayout = m_device->createPipelineLayout({&pcRange, 1}); + } + + const auto spirv_isa_cache_path = localOutputCWD / "spirv_isa_cache.bin"; + // enclose to make sure file goes out of scope and we can reopen it + { + smart_refctd_ptr spirv_isa_cache_input; + // try to load SPIR-V to ISA cache + { + ISystem::future_t> fileCreate; + m_system->createFile(fileCreate, spirv_isa_cache_path, IFile::ECF_READ | IFile::ECF_MAPPABLE | IFile::ECF_COHERENT); + if (auto lock = fileCreate.acquire()) + spirv_isa_cache_input = *lock; + } + // create the cache + { + std::span spirv_isa_cache_data = {}; + if (spirv_isa_cache_input) + spirv_isa_cache_data = { reinterpret_cast(spirv_isa_cache_input->getMappedPointer()),spirv_isa_cache_input->getSize() }; + else + m_logger->log("Failed to load SPIR-V 2 ISA cache!", ILogger::ELL_PERFORMANCE); + // Normally we'd deserialize a `ICPUPipelineCache` properly and pass that instead + m_spirv_isa_cache = m_device->createPipelineCache(spirv_isa_cache_data); + } + } + { + // TODO: rename `deleteDirectory` to just `delete`? and a `IFile::setSize()` ? + m_system->deleteDirectory(spirv_isa_cache_path); + ISystem::future_t> fileCreate; + m_system->createFile(fileCreate, spirv_isa_cache_path, IFile::ECF_WRITE); + // I can be relatively sure I'll succeed to acquire the future, the pointer to created file might be null though. + m_spirv_isa_cache_output = *fileCreate.acquire(); + if (!m_spirv_isa_cache_output) + logFail("Failed to Create SPIR-V to ISA cache file."); + } + + // load shader source from file + auto getShaderSource = [&](const char* filePath) -> auto + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = ""; + auto bundle = m_assetMgr->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + }; + + auto subgroupTestSource = getShaderSource("app_resources/testSubgroup.comp.hlsl"); + auto workgroupTestSource = getShaderSource("app_resources/testWorkgroup.comp.hlsl"); + // now create or retrieve final resources to run our tests + sema = m_device->createSemaphore(timelineValue); + resultsBuffer = ICPUBuffer::create({ outputBuffers[0]->getSize() }); + { + smart_refctd_ptr cmdpool = m_device->createCommandPool(computeQueue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1})) + { + logFail("Failed to create Command Buffers!\n"); + return false; + } + } + + const auto MaxWorkgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; + const auto MinSubgroupSize = m_physicalDevice->getLimits().minSubgroupSize; + const auto MaxSubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + for (uint32_t useNative = 0; useNative <= uint32_t(m_physicalDevice->getProperties().limits.shaderSubgroupArithmetic); useNative++) + { + if (useNative) + m_logger->log("Testing with native subgroup arithmetic", ILogger::ELL_INFO); + else + m_logger->log("Testing with emulated subgroup arithmetic", ILogger::ELL_INFO); + + for (auto subgroupSize = MinSubgroupSize; subgroupSize <= MaxSubgroupSize; subgroupSize *= 2u) + { + const uint8_t subgroupSizeLog2 = hlsl::findMSB(subgroupSize); + for (uint32_t workgroupSize = subgroupSize; workgroupSize <= MaxWorkgroupSize; workgroupSize *= 2u) + { + // make sure renderdoc captures everything for debugging + m_api->startCapture(); + m_logger->log("Testing Workgroup Size %u with Subgroup Size %u", ILogger::ELL_INFO, workgroupSize, subgroupSize); + + for (uint32_t j = 0; j < ItemsPerInvocations.size(); j++) + { + const uint32_t itemsPerInvocation = ItemsPerInvocations[j]; + uint32_t itemsPerWG = workgroupSize * itemsPerInvocation; + m_logger->log("Testing Items per Invocation %u", ILogger::ELL_INFO, itemsPerInvocation); + bool passed = true; + passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + + hlsl::workgroup2::SArithmeticConfiguration wgConfig; + wgConfig.init(hlsl::findMSB(workgroupSize), subgroupSizeLog2, itemsPerInvocation); + itemsPerWG = wgConfig.VirtualWorkgroupSize * wgConfig.ItemsPerInvocation_0; + m_logger->log("Testing Item Count %u", ILogger::ELL_INFO, itemsPerWG); + passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, bool(useNative), itemsPerWG, itemsPerInvocation) && passed; + logTestOutcome(passed, itemsPerWG); + } + m_api->endCapture(); + + // save cache every now and then + { + auto cpu = m_spirv_isa_cache->convertToCPUCache(); + // Normally we'd beautifully JSON serialize the thing, allow multiple devices & drivers + metadata + auto bin = cpu->getEntries().begin()->second.bin; + IFile::success_t success; + m_spirv_isa_cache_output->write(success, bin->data(), 0ull, bin->size()); + if (!success) + logFail("Could not write Create SPIR-V to ISA cache to disk!"); + } + } + } + } + + return true; + } + + virtual bool onAppTerminated() override + { + m_logger->log("==========Result==========", ILogger::ELL_INFO); + m_logger->log("Fail Count: %u", ILogger::ELL_INFO, totalFailCount); + delete[] inputData; + return true; + } + + // the unit test is carried out on init + void workLoopBody() override {} + + // + bool keepRunning() override { return false; } + +private: + void logTestOutcome(bool passed, uint32_t workgroupSize) + { + if (passed) + m_logger->log("Passed test #%u", ILogger::ELL_INFO, workgroupSize); + else + { + totalFailCount++; + m_logger->log("Failed test #%u", ILogger::ELL_ERROR, workgroupSize); + } + } + + // create pipeline (specialized every test) [TODO: turn into a future/async] + smart_refctd_ptr createPipeline(const IShader* overridenUnspecialized, const uint8_t subgroupSizeLog2) + { + auto shader = m_device->compileShader({ overridenUnspecialized }); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = pipelineLayout.get(); + params.shader = { + .shader = shader.get(), + .entryPoint = "main", + .requiredSubgroupSize = static_cast(subgroupSizeLog2), + .entries = nullptr, + }; + params.cached.requireFullSubgroups = true; + core::smart_refctd_ptr pipeline; + if (!m_device->createComputePipelines(m_spirv_isa_cache.get(),{¶ms,1},&pipeline)) + return nullptr; + return pipeline; + } + + template class Arithmetic, bool WorkgroupTest> + bool runTest(const smart_refctd_ptr& source, const uint32_t elementCount, const uint8_t subgroupSizeLog2, const uint32_t workgroupSize, bool useNative, uint32_t itemsPerWG, uint32_t itemsPerInvoc = 1u) + { + std::string arith_name = Arithmetic>::name; + const uint32_t workgroupSizeLog2 = hlsl::findMSB(workgroupSize); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + + auto* includeFinder = compiler->getDefaultIncludeFinder(); + options.preprocessorOptions.includeFinder = includeFinder; + + smart_refctd_ptr overriddenUnspecialized; + if constexpr (WorkgroupTest) + { + hlsl::workgroup2::SArithmeticConfiguration wgConfig; + wgConfig.init(hlsl::findMSB(workgroupSize), subgroupSizeLog2, itemsPerInvoc); + + const std::string definitions[3] = { + "workgroup2::" + arith_name, + wgConfig.getConfigTemplateStructString(), + std::to_string(arith_name=="reduction") + }; + + const IShaderCompiler::SMacroDefinition defines[4] = { + { "OPERATION", definitions[0] }, + { "WORKGROUP_CONFIG_T", definitions[1] }, + { "IS_REDUCTION", definitions[2] }, + { "TEST_NATIVE", "1" } + }; + if (useNative) + options.preprocessorOptions.extraDefines = { defines, defines + 4 }; + else + options.preprocessorOptions.extraDefines = { defines, defines + 3 }; + + overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + } + else + { + hlsl::subgroup2::SArithmeticParams sgParams; + sgParams.init(subgroupSizeLog2, itemsPerInvoc); + + const std::string definitions[3] = { + "subgroup2::" + arith_name, + std::to_string(workgroupSize), + sgParams.getParamTemplateStructString() + }; + + const IShaderCompiler::SMacroDefinition defines[4] = { + { "OPERATION", definitions[0] }, + { "WORKGROUP_SIZE", definitions[1] }, + { "SUBGROUP_CONFIG_T", definitions[2] }, + { "TEST_NATIVE", "1" } + }; + if (useNative) + options.preprocessorOptions.extraDefines = { defines, defines + 4 }; + else + options.preprocessorOptions.extraDefines = { defines, defines + 3 }; + + overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + } + + auto pipeline = createPipeline(overriddenUnspecialized.get(),subgroupSizeLog2); + + // TODO: overlap dispatches with memory readbacks (requires multiple copies of `buffers`) + uint32_t workgroupCount = 1;// min(elementCount / itemsPerWG, m_physicalDevice->getLimits().maxComputeWorkGroupCount[0]); + + cmdbuf->begin(IGPUCommandBuffer::USAGE::NONE); + cmdbuf->bindComputePipeline(pipeline.get()); + cmdbuf->pushConstants(pipelineLayout.get(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PushConstantData), &pc); + cmdbuf->dispatch(workgroupCount, 1, 1); + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t memoryBarrier[OutputBufferCount]; + for (auto i=0u; igetSize(),outputBuffers[i]} + }; + } + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {.memBarriers={},.bufBarriers=memoryBarrier}; + cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE,info); + } + cmdbuf->end(); + + const IQueue::SSubmitInfo::SSemaphoreInfo signal[1] = {{.semaphore=sema.get(),.value=++timelineValue}}; + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1] = {{.cmdbuf=cmdbuf.get()}}; + const IQueue::SSubmitInfo submits[1] = {{.commandBuffers=cmdbufs,.signalSemaphores=signal}}; + computeQueue->submit(submits); + const ISemaphore::SWaitInfo wait[1] = {{.semaphore=sema.get(),.value=timelineValue}}; + m_device->blockForSemaphores(wait); + + const uint32_t subgroupSize = 1u << subgroupSizeLog2; + // check results + bool passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc); + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount, subgroupSize, itemsPerInvoc) && passed; + + return passed; + } + + //returns true if result matches + template class Arithmetic, class Binop, bool WorkgroupTest> + bool validateResults(const uint32_t itemsPerWG, const uint32_t workgroupCount, const uint32_t subgroupSize, const uint32_t itemsPerInvoc) + { + bool success = true; + + // download data + const SBufferRange bufferRange = {0u, resultsBuffer->getSize(), outputBuffers[Binop::BindingIndex]}; + m_utils->downloadBufferRangeViaStagingBufferAutoSubmit(SIntendedSubmitInfo{.queue=transferDownQueue},bufferRange,resultsBuffer->getPointer()); + + using type_t = typename Binop::type_t; + const auto testData = reinterpret_cast(resultsBuffer->getPointer()); + + // TODO: parallel for (the temporary values need to be threadlocal or what?) + // now check if the data obtained has valid values + type_t* tmp = new type_t[itemsPerWG]; + for (uint32_t workgroupID = 0u; success && workgroupID < workgroupCount; workgroupID++) + { + if constexpr (WorkgroupTest) + { + const auto workgroupOffset = workgroupID * itemsPerWG; + Arithmetic::impl(tmp, inputData + workgroupOffset, itemsPerWG); + + for (uint32_t localInvocationIndex = 0u; localInvocationIndex < itemsPerWG; localInvocationIndex++) + { + const auto globalInvocationIndex = workgroupOffset + localInvocationIndex; + const auto cpuVal = tmp[localInvocationIndex]; + const auto gpuVal = testData[globalInvocationIndex]; + if (cpuVal != gpuVal) + { + m_logger->log( + "Failed test #%d (%s) (%s) Expected %u got %u for workgroup %d and localinvoc %d", + ILogger::ELL_ERROR, itemsPerWG, WorkgroupTest ? "workgroup" : "subgroup", Binop::name, + cpuVal, gpuVal, workgroupID, localInvocationIndex + ); + success = false; + break; + } + } + } + else + { + const auto workgroupOffset = workgroupID * itemsPerWG; + const auto workgroupSize = itemsPerWG / itemsPerInvoc; + for (uint32_t pseudoSubgroupID = 0u; pseudoSubgroupID < workgroupSize; pseudoSubgroupID += subgroupSize) + Arithmetic::impl(tmp + pseudoSubgroupID * itemsPerInvoc, inputData + workgroupOffset + pseudoSubgroupID * itemsPerInvoc, subgroupSize * itemsPerInvoc); + + for (uint32_t localInvocationIndex = 0u; localInvocationIndex < workgroupSize; localInvocationIndex++) + { + const auto localOffset = localInvocationIndex * itemsPerInvoc; + const auto globalInvocationIndex = workgroupOffset + localOffset; + + for (uint32_t itemInvocationIndex = 0u; itemInvocationIndex < itemsPerInvoc; itemInvocationIndex++) + { + const auto cpuVal = tmp[localOffset + itemInvocationIndex]; + const auto gpuVal = testData[globalInvocationIndex + itemInvocationIndex]; + if (cpuVal != gpuVal) + { + m_logger->log( + "Failed test #%d (%s) (%s) Expected %u got %u for workgroup %d and localinvoc %d and iteminvoc %d", + ILogger::ELL_ERROR, itemsPerWG, WorkgroupTest ? "workgroup" : "subgroup", Binop::name, + cpuVal, gpuVal, workgroupID, localInvocationIndex, itemInvocationIndex + ); + success = false; + break; + } + } + } + } + } + delete[] tmp; + + return success; + } + + IQueue* transferDownQueue; + IQueue* computeQueue; + smart_refctd_ptr m_spirv_isa_cache; + smart_refctd_ptr m_spirv_isa_cache_output; + + uint32_t* inputData = nullptr; + constexpr static inline uint32_t OutputBufferCount = 8u; + smart_refctd_ptr outputBuffers[OutputBufferCount]; + smart_refctd_ptr pipelineLayout; + PushConstantData pc; + + smart_refctd_ptr sema; + uint64_t timelineValue = 0; + smart_refctd_ptr cmdbuf; + smart_refctd_ptr resultsBuffer; + + uint32_t totalFailCount = 0; + + constexpr static inline std::array ItemsPerInvocations = { 1, 2, 3, 4 }; +}; + +NBL_MAIN_FUNC(Workgroup2ScanTestApp) \ No newline at end of file diff --git a/23_ArithmeticUnitTest/pipeline.groovy b/23_Arithmetic2UnitTest/pipeline.groovy similarity index 100% rename from 23_ArithmeticUnitTest/pipeline.groovy rename to 23_Arithmetic2UnitTest/pipeline.groovy diff --git a/23_ArithmeticUnitTest/CMakeLists.txt b/23_ArithmeticUnitTest/CMakeLists.txt deleted file mode 100644 index 0724366c9..000000000 --- a/23_ArithmeticUnitTest/CMakeLists.txt +++ /dev/null @@ -1,25 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") - - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) - - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() - - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file diff --git a/23_ArithmeticUnitTest/app_resources/shaderCommon.hlsl b/23_ArithmeticUnitTest/app_resources/shaderCommon.hlsl deleted file mode 100644 index 13ee8d21e..000000000 --- a/23_ArithmeticUnitTest/app_resources/shaderCommon.hlsl +++ /dev/null @@ -1,55 +0,0 @@ -#include "common.hlsl" - -#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" -#include "nbl/builtin/hlsl/subgroup/basic.hlsl" -#include "nbl/builtin/hlsl/subgroup/arithmetic_portability.hlsl" - -#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" - -// https://github.com/microsoft/DirectXShaderCompiler/issues/6144 -uint32_t3 nbl::hlsl::glsl::gl_WorkGroupSize() {return uint32_t3(WORKGROUP_SIZE,1,1);} - -// unfortunately DXC chokes on descriptors as static members -// https://github.com/microsoft/DirectXShaderCompiler/issues/5940 -[[vk::binding(0, 0)]] StructuredBuffer inputValue; -[[vk::binding(1, 0)]] RWByteAddressBuffer output[8]; - -// because subgroups don't match `gl_LocalInvocationIndex` snake curve addressing, we also can't load inputs that way -uint32_t globalIndex(); -// since we test ITEMS_PER_WG class binop> -static void subtest(NBL_CONST_REF_ARG(type_t) sourceVal) -{ - if (globalIndex()==0u) - output[binop::BindingIndex].template Store(0,nbl::hlsl::glsl::gl_SubgroupSize()); - - operation_t::base_t,nbl::hlsl::jit::device_capabilities> func; - if (canStore()) - output[binop::BindingIndex].template Store(sizeof(uint32_t)+sizeof(type_t)*globalIndex(),func(sourceVal)); -} - - -type_t test() -{ - const type_t sourceVal = inputValue[globalIndex()]; - - subtest(sourceVal); - subtest(sourceVal); - subtest(sourceVal); - subtest(sourceVal); - subtest(sourceVal); - subtest(sourceVal); - subtest(sourceVal); - return sourceVal; -} - -#include "nbl/builtin/hlsl/workgroup/basic.hlsl" \ No newline at end of file diff --git a/23_ArithmeticUnitTest/app_resources/testSubgroup.comp.hlsl b/23_ArithmeticUnitTest/app_resources/testSubgroup.comp.hlsl deleted file mode 100644 index 479265d73..000000000 --- a/23_ArithmeticUnitTest/app_resources/testSubgroup.comp.hlsl +++ /dev/null @@ -1,18 +0,0 @@ -#pragma shader_stage(compute) - -#define operation_t nbl::hlsl::OPERATION - -#include "shaderCommon.hlsl" - -uint32_t globalIndex() -{ - return nbl::hlsl::glsl::gl_WorkGroupID().x*WORKGROUP_SIZE+nbl::hlsl::workgroup::SubgroupContiguousIndex(); -} - -bool canStore() {return true;} - -[numthreads(WORKGROUP_SIZE,1,1)] -void main() -{ - test(); -} \ No newline at end of file diff --git a/23_ArithmeticUnitTest/app_resources/testWorkgroup.comp.hlsl b/23_ArithmeticUnitTest/app_resources/testWorkgroup.comp.hlsl deleted file mode 100644 index 9bafae47f..000000000 --- a/23_ArithmeticUnitTest/app_resources/testWorkgroup.comp.hlsl +++ /dev/null @@ -1,107 +0,0 @@ -#pragma shader_stage(compute) - - -#include "nbl/builtin/hlsl/workgroup/scratch_size.hlsl" - -static const uint32_t ArithmeticSz = nbl::hlsl::workgroup::scratch_size_arithmetic::value; -static const uint32_t BallotSz = nbl::hlsl::workgroup::scratch_size_ballot::value; -static const uint32_t ScratchSz = ArithmeticSz+BallotSz; - -// TODO: Can we make it a static variable in the ScratchProxy struct? -groupshared uint32_t scratch[ScratchSz]; - - -#include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" - - -template -struct ScratchProxy -{ - void get(const uint32_t ix, NBL_REF_ARG(uint32_t) value) - { - value = scratch[ix+offset]; - } - void set(const uint32_t ix, const uint32_t value) - { - scratch[ix+offset] = value; - } - - uint32_t atomicOr(const uint32_t ix, const uint32_t value) - { - return nbl::hlsl::glsl::atomicOr(scratch[ix],value); - } - - void workgroupExecutionAndMemoryBarrier() - { - nbl::hlsl::glsl::barrier(); - //nbl::hlsl::glsl::memoryBarrierShared(); implied by the above - } -}; - -static ScratchProxy<0> arithmeticAccessor; - - -#include "nbl/builtin/hlsl/workgroup/broadcast.hlsl" - - -template -struct operation_t -{ - using type_t = typename Binop::type_t; - - type_t operator()(type_t value) - { - type_t retval = nbl::hlsl::OPERATION::template __call >(value,arithmeticAccessor); - // we barrier before because we alias the accessors for Binop - arithmeticAccessor.workgroupExecutionAndMemoryBarrier(); - return retval; - } -}; - - -#include "shaderCommon.hlsl" - -static ScratchProxy ballotAccessor; - - -uint32_t globalIndex() -{ - return nbl::hlsl::glsl::gl_WorkGroupID().x*ITEMS_PER_WG+nbl::hlsl::workgroup::SubgroupContiguousIndex(); -} - -bool canStore() -{ - return nbl::hlsl::workgroup::SubgroupContiguousIndex()::BindingIndex].template Store(0,nbl::hlsl::glsl::gl_SubgroupSize()); - - // we can only ballot booleans, so low bit - nbl::hlsl::workgroup::ballot >(bool(sourceVal & 0x1u), ballotAccessor); - // need to barrier between ballot and usages of a ballot by myself - ballotAccessor.workgroupExecutionAndMemoryBarrier(); - - uint32_t destVal = 0xdeadbeefu; -#define CONSTEXPR_OP_TYPE_TEST(IS_OP) nbl::hlsl::is_same,0x45>,nbl::hlsl::workgroup::IS_OP,0x45> >::value -#define BALLOT_TEMPLATE_ARGS ITEMS_PER_WG,decltype(ballotAccessor),decltype(arithmeticAccessor),nbl::hlsl::jit::device_capabilities - if (CONSTEXPR_OP_TYPE_TEST(reduction)) - destVal = nbl::hlsl::workgroup::ballotBitCount(ballotAccessor,arithmeticAccessor); - else if (CONSTEXPR_OP_TYPE_TEST(inclusive_scan)) - destVal = nbl::hlsl::workgroup::ballotInclusiveBitCount(ballotAccessor,arithmeticAccessor); - else if (CONSTEXPR_OP_TYPE_TEST(exclusive_scan)) - destVal = nbl::hlsl::workgroup::ballotExclusiveBitCount(ballotAccessor,arithmeticAccessor); - else - { - assert(false); - } -#undef BALLOT_TEMPLATE_ARGS -#undef CONSTEXPR_OP_TYPE_TEST - - if (canStore()) - output[ballot::BindingIndex].template Store(sizeof(uint32_t)+sizeof(type_t)*globalIndex(),destVal); -} \ No newline at end of file diff --git a/23_ArithmeticUnitTest/main.cpp b/23_ArithmeticUnitTest/main.cpp deleted file mode 100644 index 147d231e2..000000000 --- a/23_ArithmeticUnitTest/main.cpp +++ /dev/null @@ -1,462 +0,0 @@ -#include "nbl/application_templates/BasicMultiQueueApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "app_resources/common.hlsl" - -using namespace nbl; -using namespace core; -using namespace asset; -using namespace system; -using namespace video; - -// method emulations on the CPU, to verify the results of the GPU methods -template -struct emulatedReduction -{ - using type_t = typename Binop::type_t; - - static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) - { - const type_t red = std::reduce(in,in+itemCount,Binop::identity,Binop()); - std::fill(out,out+itemCount,red); - } - - static inline constexpr const char* name = "reduction"; -}; -template -struct emulatedScanInclusive -{ - using type_t = typename Binop::type_t; - - static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) - { - std::inclusive_scan(in,in+itemCount,out,Binop()); - } - static inline constexpr const char* name = "inclusive_scan"; -}; -template -struct emulatedScanExclusive -{ - using type_t = typename Binop::type_t; - - static inline void impl(type_t* out, const type_t* in, const uint32_t itemCount) - { - std::exclusive_scan(in,in+itemCount,out,Binop::identity,Binop()); - } - static inline constexpr const char* name = "exclusive_scan"; -}; - -class ArithmeticUnitTestApp final : public application_templates::BasicMultiQueueApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication -{ - using device_base_t = application_templates::BasicMultiQueueApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; - -public: - ArithmeticUnitTestApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : - system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - bool onAppInitialized(smart_refctd_ptr&& system) override - { - if (!device_base_t::onAppInitialized(std::move(system))) - return false; - if (!asset_base_t::onAppInitialized(std::move(system))) - return false; - - transferDownQueue = getTransferDownQueue(); - computeQueue = getComputeQueue(); - - // TODO: get the element count from argv - const uint32_t elementCount = Output<>::ScanElementCount; - // populate our random data buffer on the CPU and create a GPU copy - inputData = new uint32_t[elementCount]; - smart_refctd_ptr gpuinputDataBuffer; - { - std::mt19937 randGenerator(0xdeadbeefu); - for (uint32_t i = 0u; i < elementCount; i++) - inputData[i] = randGenerator(); // TODO: change to using xoroshiro, then we can skip having the input buffer at all - - IGPUBuffer::SCreationParams inputDataBufferCreationParams = {}; - inputDataBufferCreationParams.size = sizeof(Output<>::data[0]) * elementCount; - inputDataBufferCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - m_utils->createFilledDeviceLocalBufferOnDedMem( - SIntendedSubmitInfo{.queue=getTransferUpQueue()}, - std::move(inputDataBufferCreationParams), - inputData - ).move_into(gpuinputDataBuffer); - } - - // create 8 buffers for 8 operations - for (auto i=0u; igetSize(); - params.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_SRC_BIT; - - outputBuffers[i] = m_device->createBuffer(std::move(params)); - auto mreq = outputBuffers[i]->getMemoryReqs(); - mreq.memoryTypeBits &= m_physicalDevice->getDeviceLocalMemoryTypeBits(); - assert(mreq.memoryTypeBits); - - auto bufferMem = m_device->allocate(mreq, outputBuffers[i].get()); - assert(bufferMem.isValid()); - } - - // create Descriptor Set and Pipeline Layout - { - // create Descriptor Set Layout - smart_refctd_ptr dsLayout; - { - IGPUDescriptorSetLayout::SBinding binding[2]; - for (uint32_t i = 0u; i < 2; i++) - binding[i] = {{},i,IDescriptor::E_TYPE::ET_STORAGE_BUFFER,IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE,IShader::E_SHADER_STAGE::ESS_COMPUTE,1u,nullptr }; - binding[1].count = OutputBufferCount; - dsLayout = m_device->createDescriptorSetLayout(binding); - } - - // set and transient pool - auto descPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_NONE,{&dsLayout.get(),1}); - descriptorSet = descPool->createDescriptorSet(smart_refctd_ptr(dsLayout)); - { - IGPUDescriptorSet::SDescriptorInfo infos[1+OutputBufferCount]; - infos[0].desc = gpuinputDataBuffer; - infos[0].info.buffer = { 0u,gpuinputDataBuffer->getSize() }; - for (uint32_t i = 1u; i <= OutputBufferCount; i++) - { - auto buff = outputBuffers[i - 1]; - infos[i].info.buffer = { 0u,buff->getSize() }; - infos[i].desc = std::move(buff); // save an atomic in the refcount - - } - - IGPUDescriptorSet::SWriteDescriptorSet writes[2]; - for (uint32_t i=0u; i<2; i++) - writes[i] = {descriptorSet.get(),i,0u,1u,infos+i}; - writes[1].count = OutputBufferCount; - - m_device->updateDescriptorSets(2, writes, 0u, nullptr); - } - - pipelineLayout = m_device->createPipelineLayout({},std::move(dsLayout)); - } - - const auto spirv_isa_cache_path = localOutputCWD/"spirv_isa_cache.bin"; - // enclose to make sure file goes out of scope and we can reopen it - { - smart_refctd_ptr spirv_isa_cache_input; - // try to load SPIR-V to ISA cache - { - ISystem::future_t> fileCreate; - m_system->createFile(fileCreate,spirv_isa_cache_path,IFile::ECF_READ|IFile::ECF_MAPPABLE|IFile::ECF_COHERENT); - if (auto lock=fileCreate.acquire()) - spirv_isa_cache_input = *lock; - } - // create the cache - { - std::span spirv_isa_cache_data = {}; - if (spirv_isa_cache_input) - spirv_isa_cache_data = {reinterpret_cast(spirv_isa_cache_input->getMappedPointer()),spirv_isa_cache_input->getSize()}; - else - m_logger->log("Failed to load SPIR-V 2 ISA cache!",ILogger::ELL_PERFORMANCE); - // Normally we'd deserialize a `ICPUPipelineCache` properly and pass that instead - m_spirv_isa_cache = m_device->createPipelineCache(spirv_isa_cache_data); - } - } - { - // TODO: rename `deleteDirectory` to just `delete`? and a `IFile::setSize()` ? - m_system->deleteDirectory(spirv_isa_cache_path); - ISystem::future_t> fileCreate; - m_system->createFile(fileCreate,spirv_isa_cache_path,IFile::ECF_WRITE); - // I can be relatively sure I'll succeed to acquire the future, the pointer to created file might be null though. - m_spirv_isa_cache_output=*fileCreate.acquire(); - if (!m_spirv_isa_cache_output) - logFail("Failed to Create SPIR-V to ISA cache file."); - } - - // load shader source from file - auto getShaderSource = [&](const char* filePath) -> auto - { - IAssetLoader::SAssetLoadParams lparams = {}; - lparams.logger = m_logger.get(); - lparams.workingDirectory = ""; - auto bundle = m_assetMgr->getAsset(filePath, lparams); - if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) - { - m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); - exit(-1); - } - auto firstAssetInBundle = bundle.getContents()[0]; - return smart_refctd_ptr_static_cast(firstAssetInBundle); - }; - - auto subgroupTestSource = getShaderSource("app_resources/testSubgroup.comp.hlsl"); - auto workgroupTestSource = getShaderSource("app_resources/testWorkgroup.comp.hlsl"); - // now create or retrieve final resources to run our tests - sema = m_device->createSemaphore(timelineValue); - resultsBuffer = ICPUBuffer::create({ outputBuffers[0]->getSize() }); - { - smart_refctd_ptr cmdpool = m_device->createCommandPool(computeQueue->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!cmdpool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1})) - { - logFail("Failed to create Command Buffers!\n"); - return false; - } - } - - const auto MaxWorkgroupSize = m_physicalDevice->getLimits().maxComputeWorkGroupInvocations; - const auto MinSubgroupSize = m_physicalDevice->getLimits().minSubgroupSize; - const auto MaxSubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; - for (auto subgroupSize=MinSubgroupSize; subgroupSize <= MaxSubgroupSize; subgroupSize *= 2u) - { - const uint8_t subgroupSizeLog2 = hlsl::findMSB(subgroupSize); - for (uint32_t workgroupSize = subgroupSize; workgroupSize <= MaxWorkgroupSize; workgroupSize += subgroupSize) - { - // make sure renderdoc captures everything for debugging - m_api->startCapture(); - m_logger->log("Testing Workgroup Size %u with Subgroup Size %u", ILogger::ELL_INFO, workgroupSize, subgroupSize); - - bool passed = true; - // TODO async the testing - passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize) && passed; - logTestOutcome(passed, workgroupSize); - passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize) && passed; - logTestOutcome(passed, workgroupSize); - passed = runTest(subgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize) && passed; - logTestOutcome(passed, workgroupSize); - for (uint32_t itemsPerWG = workgroupSize; itemsPerWG > workgroupSize - subgroupSize; itemsPerWG--) - { - m_logger->log("Testing Item Count %u", ILogger::ELL_INFO, itemsPerWG); - passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, itemsPerWG) && passed; - logTestOutcome(passed, itemsPerWG); - passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, itemsPerWG) && passed; - logTestOutcome(passed, itemsPerWG); - passed = runTest(workgroupTestSource, elementCount, subgroupSizeLog2, workgroupSize, itemsPerWG) && passed; - logTestOutcome(passed, itemsPerWG); - } - m_api->endCapture(); - - // save cache every now and then - { - auto cpu = m_spirv_isa_cache->convertToCPUCache(); - // Normally we'd beautifully JSON serialize the thing, allow multiple devices & drivers + metadata - auto bin = cpu->getEntries().begin()->second.bin; - IFile::success_t success; - m_spirv_isa_cache_output->write(success,bin->data(),0ull,bin->size()); - if (!success) - logFail("Could not write Create SPIR-V to ISA cache to disk!"); - } - } - } - - return true; - } - - virtual bool onAppTerminated() override - { - m_logger->log("==========Result==========", ILogger::ELL_INFO); - m_logger->log("Fail Count: %u", ILogger::ELL_INFO, totalFailCount); - delete[] inputData; - return true; - } - - // the unit test is carried out on init - void workLoopBody() override {} - - // - bool keepRunning() override { return false; } - -private: - void logTestOutcome(bool passed, uint32_t workgroupSize) - { - if (passed) - m_logger->log("Passed test #%u", ILogger::ELL_INFO, workgroupSize); - else - { - totalFailCount++; - m_logger->log("Failed test #%u", ILogger::ELL_ERROR, workgroupSize); - } - } - - // create pipeline (specialized every test) [TODO: turn into a future/async] - smart_refctd_ptr createPipeline(const ICPUShader* overridenUnspecialized, const uint8_t subgroupSizeLog2) - { - auto shader = m_device->createShader(overridenUnspecialized); - IGPUComputePipeline::SCreationParams params = {}; - params.layout = pipelineLayout.get(); - params.shader = { - .entryPoint = "main", - .shader = shader.get(), - .entries = nullptr, - .requiredSubgroupSize = static_cast(subgroupSizeLog2), - .requireFullSubgroups = true - }; - core::smart_refctd_ptr pipeline; - if (!m_device->createComputePipelines(m_spirv_isa_cache.get(),{¶ms,1},&pipeline)) - return nullptr; - return pipeline; - } - - /*template class Arithmetic, bool WorkgroupTest> - bool runTest(const smart_refctd_ptr& source, const uint32_t elementCount, const uint32_t workgroupSize, uint32_t itemsPerWG = ~0u) - { - return true; - }*/ - - template class Arithmetic, bool WorkgroupTest> - bool runTest(const smart_refctd_ptr& source, const uint32_t elementCount, const uint8_t subgroupSizeLog2, const uint32_t workgroupSize, uint32_t itemsPerWG = ~0u) - { - std::string arith_name = Arithmetic>::name; - - smart_refctd_ptr overridenUnspecialized; - if constexpr (WorkgroupTest) - { - overridenUnspecialized = CHLSLCompiler::createOverridenCopy( - source.get(), "#define OPERATION %s\n#define WORKGROUP_SIZE %d\n#define ITEMS_PER_WG %d\n", - (("workgroup::") + arith_name).c_str(), workgroupSize, itemsPerWG - ); - } - else - { - itemsPerWG = workgroupSize; - overridenUnspecialized = CHLSLCompiler::createOverridenCopy( - source.get(), "#define OPERATION %s\n#define WORKGROUP_SIZE %d\n", - (("subgroup::") + arith_name).c_str(), workgroupSize - ); - } - auto pipeline = createPipeline(overridenUnspecialized.get(),subgroupSizeLog2); - - // TODO: overlap dispatches with memory readbacks (requires multiple copies of `buffers`) - const uint32_t workgroupCount = elementCount / itemsPerWG; - cmdbuf->begin(IGPUCommandBuffer::USAGE::NONE); - cmdbuf->bindComputePipeline(pipeline.get()); - cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &descriptorSet.get()); - cmdbuf->dispatch(workgroupCount, 1, 1); - { - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t memoryBarrier[OutputBufferCount]; - for (auto i=0u; igetSize(),outputBuffers[i]} - }; - } - IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = {.memBarriers={},.bufBarriers=memoryBarrier}; - cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE,info); - } - cmdbuf->end(); - - const IQueue::SSubmitInfo::SSemaphoreInfo signal[1] = {{.semaphore=sema.get(),.value=++timelineValue}}; - const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufs[1] = {{.cmdbuf=cmdbuf.get()}}; - const IQueue::SSubmitInfo submits[1] = {{.commandBuffers=cmdbufs,.signalSemaphores=signal}}; - computeQueue->submit(submits); - const ISemaphore::SWaitInfo wait[1] = {{.semaphore=sema.get(),.value=timelineValue}}; - m_device->blockForSemaphores(wait); - - // check results - bool passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount); - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - if constexpr (WorkgroupTest) - passed = validateResults, WorkgroupTest>(itemsPerWG, workgroupCount) && passed; - - return passed; - } - - //returns true if result matches - template class Arithmetic, class Binop, bool WorkgroupTest> - bool validateResults(const uint32_t itemsPerWG, const uint32_t workgroupCount) - { - bool success = true; - - // download data - const SBufferRange bufferRange = {0u, resultsBuffer->getSize(), outputBuffers[Binop::BindingIndex]}; - m_utils->downloadBufferRangeViaStagingBufferAutoSubmit(SIntendedSubmitInfo{.queue=transferDownQueue},bufferRange,resultsBuffer->getPointer()); - - using type_t = typename Binop::type_t; - const auto dataFromBuffer = reinterpret_cast(resultsBuffer->getPointer()); - const auto subgroupSize = dataFromBuffer[0]; - if (subgroupSizenbl::hlsl::subgroup::MaxSubgroupSize) - { - m_logger->log("Unexpected Subgroup Size %u", ILogger::ELL_ERROR, subgroupSize); - return false; - } - - const auto testData = reinterpret_cast(dataFromBuffer + 1); - // TODO: parallel for (the temporary values need to be threadlocal or what?) - // now check if the data obtained has valid values - type_t* tmp = new type_t[itemsPerWG]; - type_t* ballotInput = new type_t[itemsPerWG]; - for (uint32_t workgroupID = 0u; success && workgroupID < workgroupCount; workgroupID++) - { - const auto workgroupOffset = workgroupID * itemsPerWG; - - if constexpr (WorkgroupTest) - { - if constexpr (std::is_same_v, Binop>) - { - for (auto i = 0u; i < itemsPerWG; i++) - ballotInput[i] = inputData[i + workgroupOffset] & 0x1u; - Arithmetic::impl(tmp, ballotInput, itemsPerWG); - } - else - Arithmetic::impl(tmp, inputData + workgroupOffset, itemsPerWG); - } - else - { - for (uint32_t pseudoSubgroupID = 0u; pseudoSubgroupID < itemsPerWG; pseudoSubgroupID += subgroupSize) - Arithmetic::impl(tmp + pseudoSubgroupID, inputData + workgroupOffset + pseudoSubgroupID, subgroupSize); - } - - for (uint32_t localInvocationIndex = 0u; localInvocationIndex < itemsPerWG; localInvocationIndex++) - { - const auto globalInvocationIndex = workgroupOffset + localInvocationIndex; - const auto cpuVal = tmp[localInvocationIndex]; - const auto gpuVal = testData[globalInvocationIndex]; - if (cpuVal != gpuVal) - { - m_logger->log( - "Failed test #%d (%s) (%s) Expected %u got %u for workgroup %d and localinvoc %d", - ILogger::ELL_ERROR, itemsPerWG, WorkgroupTest ? "workgroup" : "subgroup", Binop::name, - cpuVal, gpuVal, workgroupID, localInvocationIndex - ); - success = false; - break; - } - } - } - delete[] ballotInput; - delete[] tmp; - - return success; - } - - IQueue* transferDownQueue; - IQueue* computeQueue; - smart_refctd_ptr m_spirv_isa_cache; - smart_refctd_ptr m_spirv_isa_cache_output; - - uint32_t* inputData = nullptr; - constexpr static inline uint32_t OutputBufferCount = 8u; - smart_refctd_ptr outputBuffers[OutputBufferCount]; - smart_refctd_ptr descriptorSet; - smart_refctd_ptr pipelineLayout; - - smart_refctd_ptr sema; - uint64_t timelineValue = 0; - smart_refctd_ptr cmdbuf; - smart_refctd_ptr resultsBuffer; - - uint32_t totalFailCount = 0; -}; - -NBL_MAIN_FUNC(ArithmeticUnitTestApp) \ No newline at end of file diff --git a/24_ColorSpaceTest/main.cpp b/24_ColorSpaceTest/main.cpp index 844f058fe..84c55ef3a 100644 --- a/24_ColorSpaceTest/main.cpp +++ b/24_ColorSpaceTest/main.cpp @@ -1,10 +1,8 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" -#include "nbl/video/surface/CSurfaceVulkan.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nlohmann/json.hpp" @@ -19,14 +17,15 @@ using namespace system; using namespace asset; using namespace ui; using namespace video; +using namespace nbl::examples; // defines for sampler tests can be found in the file below #include "app_resources/push_constants.hlsl" -class ColorSpaceTestSampleApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class ColorSpaceTestSampleApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; using perf_clock_resolution_t = std::chrono::milliseconds; @@ -161,7 +160,7 @@ class ColorSpaceTestSampleApp final : public examples::SimpleWindowedApplication return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); // Load Custom Shader - auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -172,11 +171,11 @@ class ColorSpaceTestSampleApp final : public examples::SimpleWindowedApplication return nullptr; // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); if (!source) return nullptr; - return m_device->createShader(source.get()); + return m_device->compileShader({ source.get() }); }; auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); if (!fragmentShader) @@ -255,14 +254,14 @@ class ColorSpaceTestSampleApp final : public examples::SimpleWindowedApplication // Now create the pipeline { const asset::SPushConstantRange range = { - .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .stageFlags = ESS_FRAGMENT, .offset = 0, .size = sizeof(push_constants_t) }; auto layout = m_device->createPipelineLayout({ &range,1 }, nullptr, nullptr, nullptr, core::smart_refctd_ptr(dsLayout)); - const IGPUShader::SSpecInfo fragSpec = { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), .entryPoint = "main", - .shader = fragmentShader.get() }; m_pipeline = fsTriProtoPPln.createPipeline(fragSpec, layout.get(), scResources->getRenderpass()/*,default is subpass 0*/); if (!m_pipeline) @@ -796,7 +795,7 @@ class ColorSpaceTestSampleApp final : public examples::SimpleWindowedApplication cmdbuf->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); } cmdbuf->bindGraphicsPipeline(m_pipeline.get()); - cmdbuf->pushConstants(m_pipeline->getLayout(),IGPUShader::E_SHADER_STAGE::ESS_FRAGMENT,0,sizeof(push_constants_t),&pc); + cmdbuf->pushConstants(m_pipeline->getLayout(),hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(push_constants_t),&pc); cmdbuf->bindDescriptorSets(nbl::asset::EPBP_GRAPHICS,m_pipeline->getLayout(),3,1,&ds); ext::FullScreenTriangle::recordDrawCall(cmdbuf); cmdbuf->endRenderPass(); diff --git a/25_FilterTest/main.cpp b/25_FilterTest/main.cpp index a66227225..4ce68d66c 100644 --- a/25_FilterTest/main.cpp +++ b/25_FilterTest/main.cpp @@ -868,7 +868,7 @@ class BlitFilterTestApp final : public virtual application_templates::BasicMulti logger->log("Failed to fit the preload region in shared memory even for 1x1x1 workgroup!",ILogger::ELL_ERROR); return false; } - cmdbuf->pushConstants(layout,IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,0,sizeof(params),¶ms); + cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_COMPUTE,0,sizeof(params),¶ms); cmdbuf->dispatch(params.perWG.getWorkgroupCount(outExtent16)); if (m_alphaSemantic==IBlitUtilities::EAS_REFERENCE_OR_COVERAGE) { diff --git a/26_Blur/app_resources/shader.comp.hlsl b/26_Blur/app_resources/shader.comp.hlsl index 94baa8d2a..99e876ccc 100644 --- a/26_Blur/app_resources/shader.comp.hlsl +++ b/26_Blur/app_resources/shader.comp.hlsl @@ -131,6 +131,7 @@ struct ScanSharedMemoryProxy }; [numthreads(WORKGROUP_SIZE, 1, 1)] +[shader("compute")] void main() { ScanSharedMemoryProxy scanSmemAccessor; diff --git a/26_Blur/main.cpp b/26_Blur/main.cpp index 8217c4e51..83cf140d6 100644 --- a/26_Blur/main.cpp +++ b/26_Blur/main.cpp @@ -1,28 +1,29 @@ // Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + + +#include "nbl/examples/examples.hpp" + #include #include -#include "nabla.h" -#include "SimpleWindowedApplication.hpp" -#include "InputSystem.hpp" -#include "CEventCallback.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - using namespace nbl; using namespace nbl::core; using namespace nbl::system; using namespace nbl::asset; using namespace nbl::ui; using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" -class BlurApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication + + +class BlurApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; public: @@ -225,7 +226,7 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica if (!m_vertImg || !m_device->allocate(reqs, m_vertImg.get()).isValid()) return logFail("Could not create HDR Image"); - smart_refctd_ptr shader; + smart_refctd_ptr shader; { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -236,10 +237,10 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica return logFail("Failed to load shader from disk"); // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto sourceRaw = IAsset::castDown(assets[0]); + auto sourceRaw = IAsset::castDown(assets[0]); if (!sourceRaw) return logFail("Failed to load shader from disk"); - smart_refctd_ptr source = CHLSLCompiler::createOverridenCopy( + smart_refctd_ptr source = CHLSLCompiler::createOverridenCopy( sourceRaw.get(), "static const uint16_t WORKGROUP_SIZE = %d;\n" "static const uint16_t MAX_SCANLINE_SIZE = %d;\n" @@ -262,9 +263,9 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica ISPIRVOptimizer::EOP_LOCAL_MULTI_STORE_ELIM }; auto opt = make_smart_refctd_ptr(optPasses); - shader = m_device->createShader(source.get(), opt.get()); + shader = m_device->compileShader({ source.get(),opt.get() }); #else - shader = m_device->createShader(source.get()); + shader = m_device->compileShader({ source.get() }); #endif if (!shader) return false; @@ -272,26 +273,18 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica { const asset::SPushConstantRange ranges[] = { { - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = hlsl::ShaderStage::ESS_COMPUTE, .offset = 0, .size = sizeof(PushConstants) } }; auto layout = m_device->createPipelineLayout(ranges, smart_refctd_ptr(dsLayout)); - const IGPUComputePipeline::SCreationParams params[] = { { - { - .layout = layout.get() - }, - {}, - IGPUComputePipeline::SCreationParams::FLAGS::NONE, - { - .entryPoint = "main", - .shader = shader.get(), - .entries = nullptr, - .requiredSubgroupSize = static_cast(hlsl::findMSB(m_physicalDevice->getLimits().maxSubgroupSize)), - .requireFullSubgroups = true - } - }}; - if (!m_device->createComputePipelines(nullptr, params, &m_ppln)) + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.cached.requireFullSubgroups = true; + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_ppln)) return logFail("Failed to create Pipeline"); } @@ -626,7 +619,7 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {},.imgBarriers = {&vertImgBarrier,1} }); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &m_ds0.get()); PushConstants pc = { .radius = blurRadius, .activeAxis = 0, .edgeWrapMode = blurEdgeWrapMode }; - cb->pushConstants(layout, IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cb->pushConstants(layout, hlsl::ShaderStage::ESS_COMPUTE, 0, sizeof(pc), &pc); cb->dispatch(image_params.extent.height, 1, 1); image_memory_barrier_t horzImgBarrier = { @@ -646,7 +639,7 @@ class BlurApp final : public examples::SimpleWindowedApplication, public applica cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {}, .bufBarriers = {},.imgBarriers = {&horzImgBarrier,1} }); cb->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_COMPUTE, layout, 0, 1, &m_ds1.get()); pc.activeAxis = 1; - cb->pushConstants(layout, IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(pc), &pc); + cb->pushConstants(layout, hlsl::ShaderStage::ESS_COMPUTE, 0, sizeof(pc), &pc); cb->dispatch(image_params.extent.width, 1, 1); } diff --git a/27_MPMCScheduler/CMakeLists.txt b/27_MPMCScheduler/CMakeLists.txt index a434ff32a..92531a8d5 100644 --- a/27_MPMCScheduler/CMakeLists.txt +++ b/27_MPMCScheduler/CMakeLists.txt @@ -1,24 +1,46 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() +include(common) -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") +nbl_create_executable_project("" "" "" "") -if(NBL_EMBED_BUILTIN_RESOURCES) - set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) - set(RESOURCE_DIR "app_resources") +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(DEPENDS + app_resources/common.hlsl + app_resources/mpmc_queue.hlsl + app_resources/schedulers/mpmc.hlsl + app_resources/shader.comp.hlsl + app_resources/workgroup/pool_allocator.hlsl + app_resources/workgroup/stack.hlsl +) - get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) - get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) +set(JSON [=[ +[ + { + "INPUT": "app_resources/shader.comp.hlsl", + "KEY": "shader", + "COMPILE_OPTIONS": ["-T", "cs_6_8"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) - file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" CONFIGURE_DEPENDS "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") - foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) - LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") - endforeach() +NBL_CREATE_NSC_COMPILE_RULES( + TARGET ${EXECUTABLE_NAME}SPIRV + LINK_TO ${EXECUTABLE_NAME} + DEPENDS ${DEPENDS} + BINARY_DIR ${OUTPUT_DIRECTORY} + MOUNT_POINT_DEFINE NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + COMMON_OPTIONS -I ${CMAKE_CURRENT_SOURCE_DIR} + OUTPUT_VAR KEYS + INCLUDE nbl/this_example/builtin/build/spirv/keys.hpp + NAMESPACE nbl::this_example::builtin::build + INPUTS ${JSON} +) - ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") - - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) -endif() \ No newline at end of file +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin::build + TARGET ${EXECUTABLE_NAME}_builtinsBuild + LINK_TO ${EXECUTABLE_NAME} + BIND ${OUTPUT_DIRECTORY} + BUILTINS ${KEYS} +) \ No newline at end of file diff --git a/27_MPMCScheduler/app_resources/schedulers/mpmc.hlsl b/27_MPMCScheduler/app_resources/schedulers/mpmc.hlsl index 184f3702a..836c91576 100644 --- a/27_MPMCScheduler/app_resources/schedulers/mpmc.hlsl +++ b/27_MPMCScheduler/app_resources/schedulers/mpmc.hlsl @@ -1,8 +1,8 @@ #ifndef _NBL_HLSL_SCHEDULERS_MPMC_HLSL_ #define _NBL_HLSL_SCHEDULERS_MPMC_HLSL_ -//#include "../workgroup/stack.hlsl" -//#include "mpmc_queue.hlsl" +//#include "app_resources/workgroup/stack.hlsl" +//#include "app_resources/mpmc_queue.hlsl" #include "nbl/builtin/hlsl/workgroup/scratch_size.hlsl" #include "nbl/builtin/hlsl/workgroup/arithmetic.hlsl" diff --git a/27_MPMCScheduler/app_resources/shader.comp.hlsl b/27_MPMCScheduler/app_resources/shader.comp.hlsl index c49ad018c..3055ad618 100644 --- a/27_MPMCScheduler/app_resources/shader.comp.hlsl +++ b/27_MPMCScheduler/app_resources/shader.comp.hlsl @@ -1,6 +1,6 @@ //#include "nbl/builtin/hlsl/memory_accessor.hlsl" -#include "common.hlsl" +#include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/limits.hlsl" #include "nbl/builtin/hlsl/numbers.hlsl" @@ -156,7 +156,7 @@ struct SharedAccessor }; // -#include "schedulers/mpmc.hlsl" +#include "app_resources/schedulers/mpmc.hlsl" struct SubgroupCaps { NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; @@ -305,6 +305,7 @@ uint32_t3 gl_WorkGroupSize() {return uint32_t3(WorkgroupSizeX*WorkgroupSizeY,1,1 } } [numthreads(WorkgroupSizeX*WorkgroupSizeY,1,1)] +[shader("compute")] void main() { // manually push an explicit workload diff --git a/27_MPMCScheduler/app_resources/workgroup/pool_allocator.hlsl b/27_MPMCScheduler/app_resources/workgroup/pool_allocator.hlsl index 6685fd5fc..e1532f945 100644 --- a/27_MPMCScheduler/app_resources/workgroup/pool_allocator.hlsl +++ b/27_MPMCScheduler/app_resources/workgroup/pool_allocator.hlsl @@ -1,7 +1,7 @@ #ifndef _NBL_HLSL_WORKGROUP_POOL_ALLOCATOR_HLSL_ #define _NBL_HLSL_WORKGROUP_POOL_ALLOCATOR_HLSL_ -#include "workgroup/stack.hlsl" +#include "app_resources/workgroup/stack.hlsl" namespace nbl { diff --git a/27_MPMCScheduler/main.cpp b/27_MPMCScheduler/main.cpp index c380bf3c6..0963f86e5 100644 --- a/27_MPMCScheduler/main.cpp +++ b/27_MPMCScheduler/main.cpp @@ -1,9 +1,10 @@ // Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nabla.h" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "SimpleWindowedApplication.hpp" + + +#include "nbl/examples/examples.hpp" +#include "nbl/this_example/builtin/build/spirv/keys.hpp" using namespace nbl; using namespace nbl::core; @@ -11,13 +12,15 @@ using namespace nbl::system; using namespace nbl::asset; using namespace nbl::ui; using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" -class MPMCSchedulerApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication + +class MPMCSchedulerApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; @@ -69,24 +72,31 @@ class MPMCSchedulerApp final : public examples::SimpleWindowedApplication, publi if (!asset_base_t::onAppInitialized(std::move(system))) return false; - smart_refctd_ptr shader; + smart_refctd_ptr shader; { - IAssetLoader::SAssetLoadParams lp = {}; - lp.logger = m_logger.get(); - lp.workingDirectory = ""; // virtual root - auto assetBundle = m_assetMgr->getAsset("app_resources/shader.comp.hlsl", lp); - const auto assets = assetBundle.getContents(); - if (assets.empty()) - return logFail("Failed to load shader from disk"); - - // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto source = IAsset::castDown(assets[0]); - if (!source) - return logFail("Failed to load shader from disk"); - - shader = m_device->createShader(source.get()); - if (!shader) - return false; + // load shader + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; + + auto key = "app_resources/" + nbl::this_example::builtin::build::get_spirv_key<"shader">(m_device.get()); + const auto bundle = m_assetMgr->getAsset(key.data(), lp); + + const auto contents = bundle.getContents(); + + if (contents.empty()) + return logFail("Failed to load shader from disk"); + + if (bundle.getAssetType() != IAsset::ET_SHADER) + return logFail("Loaded asset has wrong type!"); + + shader = IAsset::castDown(contents[0]); + + if (!shader) + false; + } + } smart_refctd_ptr dsLayout; @@ -106,26 +116,17 @@ class MPMCSchedulerApp final : public examples::SimpleWindowedApplication, publi { const asset::SPushConstantRange ranges[] = {{ - .stageFlags = IGPUShader::E_SHADER_STAGE::ESS_COMPUTE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0, .size = sizeof(PushConstants) }}; auto layout = m_device->createPipelineLayout(ranges,smart_refctd_ptr(dsLayout)); - const IGPUComputePipeline::SCreationParams params[] = { { - { - .layout = layout.get() - }, - {}, - IGPUComputePipeline::SCreationParams::FLAGS::NONE, - { - .entryPoint = "main", - .shader = shader.get(), - .entries = nullptr, - .requiredSubgroupSize = IGPUShader::SSpecInfo::SUBGROUP_SIZE::UNKNOWN, - .requireFullSubgroups = true - } - }}; - if (!m_device->createComputePipelines(nullptr,params,&m_ppln)) + IGPUComputePipeline::SCreationParams params; + params.layout = layout.get(); + params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; + params.cached.requireFullSubgroups = true; + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &m_ppln)) return logFail("Failed to create Pipeline"); } @@ -306,7 +307,7 @@ class MPMCSchedulerApp final : public examples::SimpleWindowedApplication, publi .sharedAcceptableIdleCount = 0, .globalAcceptableIdleCount = 0 }; - cb->pushConstants(layout,IGPUShader::E_SHADER_STAGE::ESS_COMPUTE,0,sizeof(pc),&pc); + cb->pushConstants(layout,hlsl::ShaderStage::ESS_COMPUTE,0,sizeof(pc),&pc); cb->dispatch(WIN_W/WorkgroupSizeX,WIN_H/WorkgroupSizeY,1); } diff --git a/27_PLYSTLDemo/CMakeLists.txt b/27_PLYSTLDemo/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/27_PLYSTLDemo/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/27_PLYSTLDemo/config.json.template b/27_PLYSTLDemo/config.json.template deleted file mode 100644 index cb1b3b7a7..000000000 --- a/27_PLYSTLDemo/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [ "NBL_BUILD_MITSUBA_LOADER", "NBL_BUILD_OPTIX" ] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/27_PLYSTLDemo/main.cpp b/27_PLYSTLDemo/main.cpp deleted file mode 100644 index 1e6d470e2..000000000 --- a/27_PLYSTLDemo/main.cpp +++ /dev/null @@ -1,579 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - -#include "CCamera.hpp" -#include "../common/CommonAPI.h" -#include "nbl/ext/ScreenShot/ScreenShot.h" - -using namespace nbl; -using namespace core; - -/* - Uncomment for more detailed logging -*/ - -// #define NBL_MORE_LOGS - -/* - Uncomment for writing assets -*/ - -#define WRITE_ASSETS - -class PLYSTLDemo : public ApplicationBase -{ - static constexpr uint32_t WIN_W = 1280; - static constexpr uint32_t WIN_H = 720; - static constexpr uint32_t SC_IMG_COUNT = 3u; - static constexpr uint32_t FRAMES_IN_FLIGHT = 5u; - static constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - static constexpr size_t NBL_FRAMES_TO_AVERAGE = 100ull; - static_assert(FRAMES_IN_FLIGHT > SC_IMG_COUNT); - - using RENDERPASS_INDEPENDENT_PIPELINE_ADRESS = size_t; - using GPU_PIPELINE_HASH_CONTAINER = std::map>; - using DependentDrawData = std::tuple, core::smart_refctd_ptr, core::smart_refctd_ptr, uint32_t, const asset::IRenderpassIndependentPipelineMetadata*>; - -public: - nbl::core::smart_refctd_ptr windowManager; - nbl::core::smart_refctd_ptr window; - nbl::core::smart_refctd_ptr windowCallback; - nbl::core::smart_refctd_ptr gl; - nbl::core::smart_refctd_ptr surface; - nbl::core::smart_refctd_ptr utilities; - nbl::core::smart_refctd_ptr logicalDevice; - nbl::video::IPhysicalDevice* gpuPhysicalDevice; - std::array queues = { nullptr, nullptr, nullptr, nullptr }; - nbl::core::smart_refctd_ptr swapchain; - nbl::core::smart_refctd_ptr renderpass; - nbl::core::smart_refctd_dynamic_array> fbos; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; - nbl::core::smart_refctd_ptr system; - nbl::core::smart_refctd_ptr assetManager; - nbl::video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - nbl::core::smart_refctd_ptr logger; - nbl::core::smart_refctd_ptr inputSystem; - - nbl::core::smart_refctd_ptr gpuTransferFence; - nbl::core::smart_refctd_ptr gpuComputeFence; - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - - uint32_t acquiredNextFBO = {}; - int resourceIx = -1; - - core::smart_refctd_ptr commandBuffers[FRAMES_IN_FLIGHT]; - - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - - std::chrono::system_clock::time_point lastTime; - bool frameDataFilled = false; - size_t frame_count = 0ull; - double time_sum = 0; - double dtList[NBL_FRAMES_TO_AVERAGE] = {}; - - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - - GPU_PIPELINE_HASH_CONTAINER gpuPipelinesPly; - GPU_PIPELINE_HASH_CONTAINER gpuPipelinesStl; - - DependentDrawData plyDrawData; - DependentDrawData stlDrawData; - - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - void setSystem(core::smart_refctd_ptr&& s) override - { - system = std::move(s); - } - video::IAPIConnection* getAPIConnection() override - { - return gl.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return logicalDevice.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbos->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_D32_SFLOAT; - } - -APP_CONSTRUCTOR(PLYSTLDemo) - - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT); - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = SC_IMG_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = nbl::asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - window = std::move(initParams.window); - gl = std::move(initOutput.apiConnection); - surface = std::move(initOutput.surface); - gpuPhysicalDevice = std::move(initOutput.physicalDevice); - logicalDevice = std::move(initOutput.logicalDevice); - queues = std::move(initOutput.queues); - renderpass = std::move(initOutput.renderToSwapchainRenderpass); - commandPools = std::move(initOutput.commandPools); - assetManager = std::move(initOutput.assetManager); - logger = std::move(initOutput.logger); - inputSystem = std::move(initOutput.inputSystem); - system = std::move(initOutput.system); - windowCallback = std::move(initParams.windowCb); - utilities = std::move(initOutput.utilities); - m_swapchainCreationParams = std::move(initOutput.swapchainCreationParams); - - CommonAPI::createSwapchain(std::move(logicalDevice), m_swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbos = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - logicalDevice, swapchain, renderpass, - nbl::asset::EF_D32_SFLOAT - ); - - auto defaultComputeCommandPool = commandPools[CommonAPI::InitOutput::EQT_COMPUTE][0]; - auto defaultTransferUpCommandPool = commandPools[CommonAPI::InitOutput::EQT_TRANSFER_UP][0]; - - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - nbl::video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - - nbl::core::smart_refctd_ptr gpuTransferFence; - nbl::core::smart_refctd_ptr gpuTransferSemaphore; - - nbl::core::smart_refctd_ptr gpuComputeFence; - nbl::core::smart_refctd_ptr gpuComputeSemaphore; - - { - gpuTransferFence = logicalDevice->createFence(static_cast(0)); - gpuTransferSemaphore = logicalDevice->createSemaphore(); - - gpuComputeFence = logicalDevice->createFence(static_cast(0)); - gpuComputeSemaphore = logicalDevice->createSemaphore(); - - cpu2gpuParams.utilities = utilities.get(); - cpu2gpuParams.device = logicalDevice.get(); - cpu2gpuParams.assetManager = assetManager.get(); - cpu2gpuParams.pipelineCache = nullptr; - cpu2gpuParams.limits = gpuPhysicalDevice->getLimits(); - cpu2gpuParams.finalQueueFamIx = queues[decltype(initOutput)::EQT_GRAPHICS]->getFamilyIndex(); - - logicalDevice->createCommandBuffers(defaultTransferUpCommandPool.get(),video::IGPUCommandBuffer::EL_PRIMARY,1u,&cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_TRANSFER].cmdbuf); - cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_TRANSFER].queue = queues[decltype(initOutput)::EQT_TRANSFER_UP]; - cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_TRANSFER].semaphore = &gpuTransferSemaphore; - - logicalDevice->createCommandBuffers(defaultComputeCommandPool.get(),video::IGPUCommandBuffer::EL_PRIMARY,1u,&cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_COMPUTE].cmdbuf); - cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_COMPUTE].queue = queues[decltype(initOutput)::EQT_COMPUTE]; - cpu2gpuParams.perQueue[nbl::video::IGPUObjectFromAssetConverter::EQU_COMPUTE].semaphore = &gpuComputeSemaphore; - - cpu2gpuParams.beginCommandBuffers(); - } - - auto loadAndGetCpuMesh = [&](system::path path) -> std::pair, const asset::IAssetMetadata*> - { - auto meshes_bundle = assetManager->getAsset(path.string(), {}); - { - bool status = !meshes_bundle.getContents().empty(); - assert(status); - } - - auto mesh = core::smart_refctd_ptr_static_cast(meshes_bundle.getContents().begin()[0]); - auto metadata = meshes_bundle.getMetadata(); - return std::make_pair(mesh, metadata); - //return std::make_pair(core::smart_refctd_ptr_static_cast(meshes_bundle.getContents().begin()[0]), meshes_bundle.getMetadata()); - }; - - auto cpuBundlePLYData = loadAndGetCpuMesh(sharedInputCWD / "ply/Spanner-ply.ply"); - auto cpuBundleSTLData = loadAndGetCpuMesh(sharedInputCWD / "extrusionLogo_TEST_fixed.stl"); - - core::smart_refctd_ptr cpuMeshPly = cpuBundlePLYData.first; - auto metadataPly = cpuBundlePLYData.second->selfCast(); - - core::smart_refctd_ptr cpuMeshStl = cpuBundleSTLData.first; - auto metadataStl = cpuBundleSTLData.second->selfCast(); - -#ifdef WRITE_ASSETS - { - asset::IAssetWriter::SAssetWriteParams wp(cpuMeshPly.get()); - bool status = assetManager->writeAsset("Spanner_ply.ply", wp); - assert(status); - } - - { - asset::IAssetWriter::SAssetWriteParams wp(cpuMeshStl.get()); - bool status = assetManager->writeAsset("extrusionLogo_TEST_fixedTest.stl", wp); - assert(status); - } -#endif // WRITE_ASSETS - - /* - For the testing puposes we can safely assume all meshbuffers within mesh loaded from PLY & STL has same DS1 layout (used for camera-specific data) - */ - - auto getMeshDependentDrawData = [&](core::smart_refctd_ptr cpuMesh, bool isPLY) -> DependentDrawData - { - const asset::ICPUMeshBuffer* const firstMeshBuffer = cpuMesh->getMeshBuffers().begin()[0]; - const asset::ICPUDescriptorSetLayout* ds1layout = firstMeshBuffer->getPipeline()->getLayout()->getDescriptorSetLayout(1u); //! DS1 - const asset::IRenderpassIndependentPipelineMetadata* pipelineMetadata; - { - if (isPLY) - pipelineMetadata = metadataPly->getAssetSpecificMetadata(firstMeshBuffer->getPipeline()); - else - pipelineMetadata = metadataStl->getAssetSpecificMetadata(firstMeshBuffer->getPipeline()); - } - - /* - So we can create just one DescriptorSet - */ - - const uint32_t ds1UboBinding = ds1layout->getDescriptorRedirect(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER).getBinding(asset::ICPUDescriptorSetLayout::CBindingRedirect::storage_range_index_t{ 0 }).data; - - auto getNeededDS1UboByteSize = [&]() - { - size_t neededDS1UboSize = 0ull; - { - for (const auto& shaderInputs : pipelineMetadata->m_inputSemantics) - if (shaderInputs.descriptorSection.type == asset::IRenderpassIndependentPipelineMetadata::ShaderInput::E_TYPE::ET_UNIFORM_BUFFER && shaderInputs.descriptorSection.uniformBufferObject.set == 1u && shaderInputs.descriptorSection.uniformBufferObject.binding == ds1UboBinding) - neededDS1UboSize = std::max(neededDS1UboSize, shaderInputs.descriptorSection.uniformBufferObject.relByteoffset + shaderInputs.descriptorSection.uniformBufferObject.bytesize); - } - return neededDS1UboSize; - }; - - const uint64_t uboDS1ByteSize = getNeededDS1UboByteSize(); - - core::smart_refctd_ptr gpuds1layout; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&ds1layout, &ds1layout + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuds1layout = (*gpu_array)[0]; - } - - const uint32_t setCount = 1; - auto gpuUBODescriptorPool = logicalDevice->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, &gpuds1layout.get(), &gpuds1layout.get()+1ull, &setCount); - - video::IGPUBuffer::SCreationParams creationParams; - creationParams.usage = asset::IBuffer::E_USAGE_FLAGS(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF); - creationParams.queueFamilyIndices = 0u; - creationParams.queueFamilyIndices = nullptr; - creationParams.size = uboDS1ByteSize; - - auto gpuubo = logicalDevice->createBuffer(std::move(creationParams)); - auto gpuuboMemReqs = gpuubo->getMemoryReqs(); - gpuuboMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(gpuuboMemReqs, gpuubo.get()); - - auto gpuds1 = gpuUBODescriptorPool->createDescriptorSet(std::move(gpuds1layout)); - { - video::IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = gpuds1.get(); - write.binding = ds1UboBinding; - write.count = 1u; - write.arrayElement = 0u; - write.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuubo; - info.info.buffer.offset = 0ull; - info.info.buffer.size = uboDS1ByteSize; - } - write.info = &info; - logicalDevice->updateDescriptorSets(1u, &write, 0u, nullptr); - } - - core::smart_refctd_ptr gpumesh; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuMesh.get(), &cpuMesh.get() + 1, cpu2gpuParams); - cpu2gpuParams.waitForCreationToComplete(true); - cpu2gpuParams.beginCommandBuffers(); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpumesh = (*gpu_array)[0]; - } - - return std::make_tuple(gpumesh, gpuubo, gpuds1, ds1UboBinding, pipelineMetadata); - }; - - plyDrawData = getMeshDependentDrawData(cpuMeshPly, true); - stlDrawData = getMeshDependentDrawData(cpuMeshStl, false); - - { - auto fillGpuPipeline = [&](GPU_PIPELINE_HASH_CONTAINER& container, video::IGPUMesh* gpuMesh) - { - for (size_t i = 0; i < gpuMesh->getMeshBuffers().size(); ++i) - { - auto gpuIndependentPipeline = gpuMesh->getMeshBuffers().begin()[i]->getPipeline(); - - nbl::video::IGPUGraphicsPipeline::SCreationParams graphicsPipelineParams; - graphicsPipelineParams.renderpassIndependent = core::smart_refctd_ptr(const_cast(gpuIndependentPipeline)); - graphicsPipelineParams.renderpass = core::smart_refctd_ptr(renderpass); - - const RENDERPASS_INDEPENDENT_PIPELINE_ADRESS adress = reinterpret_cast(graphicsPipelineParams.renderpassIndependent.get()); - container[adress] = logicalDevice->createGraphicsPipeline(nullptr, std::move(graphicsPipelineParams)); - } - }; - - fillGpuPipeline(gpuPipelinesPly, std::get>(plyDrawData).get()); - fillGpuPipeline(gpuPipelinesStl, std::get>(stlDrawData).get()); - } - - core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.001, 1000); - camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), projectionMatrix, 0.01f, 1.f); - lastTime = std::chrono::system_clock::now(); - - for (size_t i = 0ull; i < NBL_FRAMES_TO_AVERAGE; ++i) - dtList[i] = 0.0; - - const auto& graphicsCommandPools = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - { - logicalDevice->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, commandBuffers+i); - imageAcquire[i] = logicalDevice->createSemaphore(); - renderFinished[i] = logicalDevice->createSemaphore(); - } - } - - void onAppTerminated_impl() override - { - const auto& fboCreationParams = fbos->begin()[acquiredNextFBO]->getCreationParameters(); - auto gpuSourceImageView = fboCreationParams.attachments[0]; - - //TODO: - bool status = ext::ScreenShot::createScreenShot( - logicalDevice.get(), - queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], - renderFinished[resourceIx].get(), - gpuSourceImageView.get(), - assetManager.get(), - "ScreenShot.png", - asset::IImage::EL_PRESENT_SRC, - asset::EAF_NONE); - assert(status); - } - - void workLoopBody() override - { - ++resourceIx; - if (resourceIx >= FRAMES_IN_FLIGHT) - resourceIx = 0; - - auto& commandBuffer = commandBuffers[resourceIx]; - auto& fence = frameComplete[resourceIx]; - - if (fence) - while (logicalDevice->waitForFences(1u, &fence.get(), false, MAX_TIMEOUT) == video::IGPUFence::ES_TIMEOUT) {} - else - fence = logicalDevice->createFence(static_cast(0)); - - auto renderStart = std::chrono::system_clock::now(); - const auto renderDt = std::chrono::duration_cast(renderStart - lastTime).count(); - lastTime = renderStart; - { // Calculate Simple Moving Average for FrameTime - time_sum -= dtList[frame_count]; - time_sum += renderDt; - dtList[frame_count] = renderDt; - frame_count++; - if (frame_count >= NBL_FRAMES_TO_AVERAGE) - { - frameDataFilled = true; - frame_count = 0; - } - - } - const double averageFrameTime = frameDataFilled ? (time_sum / (double)NBL_FRAMES_TO_AVERAGE) : (time_sum / frame_count); - -#ifdef NBL_MORE_LOGS - logger->log("renderDt = %f ------ averageFrameTime = %f", system::ILogger::ELL_INFO, renderDt, averageFrameTime); -#endif // NBL_MORE_LOGS - - auto averageFrameTimeDuration = std::chrono::duration(averageFrameTime); - auto nextPresentationTime = renderStart + averageFrameTimeDuration; - auto nextPresentationTimeStamp = std::chrono::duration_cast(nextPresentationTime.time_since_epoch()); - - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - camera.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const ui::IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const ui::IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, logger.get()); - camera.endInputProcessing(nextPresentationTimeStamp); - - const auto& viewMatrix = camera.getViewMatrix(); - const auto& viewProjectionMatrix = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - camera.getConcatenatedMatrix() - ); - - commandBuffer->reset(nbl::video::IGPUCommandBuffer::ERF_RELEASE_RESOURCES_BIT); - commandBuffer->begin(video::IGPUCommandBuffer::EU_ONE_TIME_SUBMIT_BIT); // TODO: Reset Frame's CommandPool - - asset::SViewport viewport; - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - commandBuffer->setViewport(0u, 1u, &viewport); - - swapchain->acquireNextImage(MAX_TIMEOUT, imageAcquire[resourceIx].get(), nullptr, &acquiredNextFBO); - - nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; - { - VkRect2D area; - area.offset = { 0,0 }; - area.extent = { WIN_W, WIN_H }; - asset::SClearValue clear[2] = {}; - clear[0].color.float32[0] = 1.f; - clear[0].color.float32[1] = 1.f; - clear[0].color.float32[2] = 1.f; - clear[0].color.float32[3] = 1.f; - clear[1].depthStencil.depth = 0.f; - - beginInfo.clearValueCount = 2u; - beginInfo.framebuffer = fbos->begin()[acquiredNextFBO]; - beginInfo.renderpass = renderpass; - beginInfo.renderArea = area; - beginInfo.clearValues = clear; - } - - commandBuffer->beginRenderPass(&beginInfo, nbl::asset::ESC_INLINE); - - auto renderMesh = [&](GPU_PIPELINE_HASH_CONTAINER& gpuPipelines, DependentDrawData& drawData, uint32_t index) - { - auto gpuMesh = std::get>(drawData); - auto gpuubo = std::get>(drawData); - auto gpuds1 = std::get>(drawData); - auto ds1UboBinding = std::get(drawData); - const auto* pipelineMetadata = std::get(drawData); - - core::matrix3x4SIMD modelMatrix; - - if (index == 1) - modelMatrix.setScale(core::vectorSIMDf(10, 10, 10)); - modelMatrix.setTranslation(nbl::core::vectorSIMDf(index * 150, 0, 0, 0)); - - core::matrix4SIMD mvp = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - - core::vector uboData(gpuubo->getSize()); - for (const auto& shaderInputs : pipelineMetadata->m_inputSemantics) - { - if (shaderInputs.descriptorSection.type == asset::IRenderpassIndependentPipelineMetadata::ShaderInput::E_TYPE::ET_UNIFORM_BUFFER && shaderInputs.descriptorSection.uniformBufferObject.set == 1u && shaderInputs.descriptorSection.uniformBufferObject.binding == ds1UboBinding) - { - switch (shaderInputs.type) - { - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW_PROJ: - { - memcpy(uboData.data() + shaderInputs.descriptorSection.uniformBufferObject.relByteoffset, mvp.pointer(), shaderInputs.descriptorSection.uniformBufferObject.bytesize); - } break; - - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW: - { - memcpy(uboData.data() + shaderInputs.descriptorSection.uniformBufferObject.relByteoffset, viewMatrix.pointer(), shaderInputs.descriptorSection.uniformBufferObject.bytesize); - } break; - - case asset::IRenderpassIndependentPipelineMetadata::ECSI_WORLD_VIEW_INVERSE_TRANSPOSE: - { - memcpy(uboData.data() + shaderInputs.descriptorSection.uniformBufferObject.relByteoffset, viewMatrix.pointer(), shaderInputs.descriptorSection.uniformBufferObject.bytesize); - } break; - } - } - } - - commandBuffer->updateBuffer(gpuubo.get(), 0ull, gpuubo->getSize(), uboData.data()); - - for (auto gpuMeshBuffer : gpuMesh->getMeshBuffers()) - { - auto gpuGraphicsPipeline = gpuPipelines[reinterpret_cast(gpuMeshBuffer->getPipeline())]; - - const video::IGPURenderpassIndependentPipeline* gpuRenderpassIndependentPipeline = gpuMeshBuffer->getPipeline(); - const video::IGPUDescriptorSet* ds3 = gpuMeshBuffer->getAttachedDescriptorSet(); - - commandBuffer->bindGraphicsPipeline(gpuGraphicsPipeline.get()); - - const video::IGPUDescriptorSet* gpuds1_ptr = gpuds1.get(); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 1u, 1u, &gpuds1_ptr, 0u); - const video::IGPUDescriptorSet* gpuds3_ptr = gpuMeshBuffer->getAttachedDescriptorSet(); - - if (gpuds3_ptr) - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 3u, 1u, &gpuds3_ptr, 0u); - if (gpuRenderpassIndependentPipeline->getLayout()->m_pushConstantRanges) - commandBuffer->pushConstants(gpuRenderpassIndependentPipeline->getLayout(), video::IGPUShader::ESS_FRAGMENT, 0u, gpuMeshBuffer->MAX_PUSH_CONSTANT_BYTESIZE, gpuMeshBuffer->getPushConstantsDataPtr()); - - commandBuffer->drawMeshBuffer(gpuMeshBuffer); - } - }; - - /* - Record PLY and STL rendering commands - */ - - renderMesh(gpuPipelinesPly, plyDrawData, 0); - renderMesh(gpuPipelinesStl, stlDrawData, 1); - - commandBuffer->endRenderPass(); - commandBuffer->end(); - - CommonAPI::Submit(logicalDevice.get(), commandBuffer.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], imageAcquire[resourceIx].get(), renderFinished[resourceIx].get(), fence.get()); - CommonAPI::Present(logicalDevice.get(), swapchain.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], renderFinished[resourceIx].get(), acquiredNextFBO); - } - - bool keepRunning() override - { - return windowCallback->isWindowOpen(); - } -}; - -NBL_COMMON_API_MAIN(PLYSTLDemo) \ No newline at end of file diff --git a/28_FFTBloom/app_resources/fft_common.hlsl b/28_FFTBloom/app_resources/fft_common.hlsl index 41f8821cc..9f2be1432 100644 --- a/28_FFTBloom/app_resources/fft_common.hlsl +++ b/28_FFTBloom/app_resources/fft_common.hlsl @@ -5,13 +5,13 @@ groupshared uint32_t sharedmem[FFTParameters::SharedMemoryDWORDs]; struct SharedMemoryAccessor { - template + template void set(IndexType idx, AccessType value) { sharedmem[idx] = value; } - template + template void get(IndexType idx, NBL_REF_ARG(AccessType) value) { value = sharedmem[idx]; @@ -36,14 +36,14 @@ struct PreloadedAccessorCommonBase struct PreloadedAccessorBase : PreloadedAccessorCommonBase { - template - void set(uint32_t idx, AccessType value) + template + void set(IndexType idx, AccessType value) { preloaded[idx >> WorkgroupSizeLog2] = value; } - template - void get(uint32_t idx, NBL_REF_ARG(AccessType) value) + template + void get(IndexType idx, NBL_REF_ARG(AccessType) value) { value = preloaded[idx >> WorkgroupSizeLog2]; } @@ -54,14 +54,14 @@ struct PreloadedAccessorBase : PreloadedAccessorCommonBase // In the case for preloading all channels at once we make it stateful so we track which channel we're running FFT on struct MultiChannelPreloadedAccessorBase : PreloadedAccessorCommonBase { - template - void set(uint32_t idx, AccessType value) + template + void set(IndexType idx, AccessType value) { preloaded[currentChannel][idx >> WorkgroupSizeLog2] = value; } - template - void get(uint32_t idx, NBL_REF_ARG(AccessType) value) + template + void get(IndexType idx, NBL_REF_ARG(AccessType) value) { value = preloaded[currentChannel][idx >> WorkgroupSizeLog2]; } diff --git a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl index 73d9d7850..07c2ec8cf 100644 --- a/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl +++ b/28_FFTBloom/app_resources/fft_convolve_ifft.hlsl @@ -223,6 +223,7 @@ NBL_CONSTEXPR_STATIC_INLINE float32_t2 PreloadedSecondAxisAccessor::KernelHalfPi NBL_CONSTEXPR_STATIC_INLINE vector PreloadedSecondAxisAccessor::One = {1.0f, 0.f}; [numthreads(FFTParameters::WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { SharedMemoryAccessor sharedmemAccessor; diff --git a/28_FFTBloom/app_resources/image_fft_first_axis.hlsl b/28_FFTBloom/app_resources/image_fft_first_axis.hlsl index 864c64b1e..f1478a8d6 100644 --- a/28_FFTBloom/app_resources/image_fft_first_axis.hlsl +++ b/28_FFTBloom/app_resources/image_fft_first_axis.hlsl @@ -76,6 +76,7 @@ struct PreloadedFirstAxisAccessor : MultiChannelPreloadedAccessorBase }; [numthreads(FFTParameters::WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { SharedMemoryAccessor sharedmemAccessor; diff --git a/28_FFTBloom/app_resources/image_ifft_first_axis.hlsl b/28_FFTBloom/app_resources/image_ifft_first_axis.hlsl index 9146073dd..b3bef3510 100644 --- a/28_FFTBloom/app_resources/image_ifft_first_axis.hlsl +++ b/28_FFTBloom/app_resources/image_ifft_first_axis.hlsl @@ -136,6 +136,7 @@ struct PreloadedFirstAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBase }; [numthreads(FFTParameters::WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { SharedMemoryAccessor sharedmemAccessor; diff --git a/28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl index 51f514c4a..741bac7db 100644 --- a/28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_first_axis.hlsl @@ -68,6 +68,7 @@ struct PreloadedFirstAxisAccessor : MultiChannelPreloadedAccessorBase }; [numthreads(FFTParameters::WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { SharedMemoryAccessor sharedmemAccessor; diff --git a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl index ab7216da2..eaecb5d0f 100644 --- a/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl +++ b/28_FFTBloom/app_resources/kernel_fft_second_axis.hlsl @@ -200,6 +200,7 @@ struct PreloadedSecondAxisAccessor : MultiChannelPreloadedAccessorMirrorTradeBas }; [numthreads(FFTParameters::WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { SharedMemoryAccessor sharedmemAccessor; diff --git a/28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl b/28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl index f2ef207d3..efe406301 100644 --- a/28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl +++ b/28_FFTBloom/app_resources/kernel_spectrum_normalize.hlsl @@ -2,6 +2,7 @@ [[vk::binding(2, 0)]] RWTexture2DArray kernelChannels; [numthreads(8, 8, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { const scalar_t powerReciprocal = vk::RawBufferLoad(pushConstants.rowMajorBufferAddress); diff --git a/28_FFTBloom/main.cpp b/28_FFTBloom/main.cpp index cc312c3be..85d746b75 100644 --- a/28_FFTBloom/main.cpp +++ b/28_FFTBloom/main.cpp @@ -2,27 +2,31 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "SimpleWindowedApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" + +#include "nbl/examples/examples.hpp" using namespace nbl; -using namespace core; -using namespace system; -using namespace asset; -using namespace video; -using namespace ui; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" #include "nbl/builtin/hlsl/bit.hlsl" + + // Defaults that match this example's image constexpr uint32_t WIN_W = 1280; constexpr uint32_t WIN_H = 720; -class FFTBloomApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class FFTBloomApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; // Windowed App members @@ -169,7 +173,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app float32_t totalSizeReciprocal; }; - inline core::smart_refctd_ptr createShader(const char* includeMainName, const SShaderConstevalParameters& shaderConstants) + inline core::smart_refctd_ptr createShader(const char* includeMainName, const SShaderConstevalParameters& shaderConstants) { // The annoying "const static member field must be initialized outside of struct" bug strikes again std::ostringstream kernelHalfPixelSizeStream; @@ -204,18 +208,17 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app - auto CPUShader = core::make_smart_refctd_ptr((prelude+"\n#include \"" + includeMainName + "\"\n").c_str(), - IShader::E_SHADER_STAGE::ESS_COMPUTE, + auto HLSLShader = core::make_smart_refctd_ptr((prelude+"\n#include \"" + includeMainName + "\"\n").c_str(), IShader::E_CONTENT_TYPE::ECT_HLSL, includeMainName); - assert(CPUShader); + assert(HLSLShader); #ifndef _NBL_DEBUG ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); - return m_device->createShader({ CPUShader.get(), opt.get(), m_readCache.get(), m_writeCache.get()}); + return m_device->compileShader({ HLSLShader.get(), opt.get(), m_readCache.get(), m_writeCache.get()}); #else - return m_device->createShader({ CPUShader.get(), nullptr, m_readCache.get(), m_writeCache.get() }); + return m_device->compileShader({ HLSLShader.get(), nullptr, m_readCache.get(), m_writeCache.get() }); #endif } @@ -461,7 +464,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app assert(m_kerImageView); // Going to need an IUtils to perform uploads/downloads - m_utils = make_smart_refctd_ptr(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); + m_utils = IUtilities::create(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger)); // Now convert uploads // Get graphics queue for image transfer @@ -709,7 +712,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app // Normalization shader needs this info uint16_t secondAxisFFTHalfLengthLog2 = elementsPerInvocationLog2 + workgroupSizeLog2 - 1; // Create shaders - smart_refctd_ptr shaders[3]; + smart_refctd_ptr shaders[3]; uint16_t2 kernelDimensions = { kerDim.width, kerDim.height }; SShaderConstevalParameters::SShaderConstevalParametersCreateInfo shaderConstevalInfo = { .useHalfFloats = m_useHalfFloats, .elementsPerInvocationLog2 = elementsPerInvocationLog2, .workgroupSizeLog2 = workgroupSizeLog2, .numWorkgroupsLog2 = secondAxisFFTHalfLengthLog2, .previousWorkgroupSizeLog2 = workgroupSizeLog2 }; SShaderConstevalParameters shaderConstevalParameters(shaderConstevalInfo); @@ -722,11 +725,11 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app for (auto i = 0u; i < 3; i++) { params[i].layout = pipelineLayout.get(); - params[i].shader.entryPoint = "main"; params[i].shader.shader = shaders[i].get(); + params[i].shader.entryPoint = "main"; // Normalization doesn't require full subgroups - params[i].shader.requireFullSubgroups = bool(2-i); - params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); + params[i].cached.requireFullSubgroups = bool(2-i); + params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); } smart_refctd_ptr pipelines[3]; @@ -884,7 +887,7 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app uint16_t firstAxisFFTHalfLengthLog2; uint16_t firstAxisFFTElementsPerInvocationLog2; uint16_t firstAxisFFTWorkgroupSizeLog2; - smart_refctd_ptr shaders[3]; + smart_refctd_ptr shaders[3]; { auto [elementsPerInvocationLog2, workgroupSizeLog2] = workgroup::fft::optimalFFTParameters(deviceLimits.maxOptimallyResidentWorkgroupInvocations, m_marginSrcDim.height, deviceLimits.maxSubgroupSize); SShaderConstevalParameters::SShaderConstevalParametersCreateInfo shaderConstevalInfo = { .useHalfFloats = m_useHalfFloats, .elementsPerInvocationLog2 = elementsPerInvocationLog2, .workgroupSizeLog2 = workgroupSizeLog2 }; @@ -926,10 +929,10 @@ class FFTBloomApp final : public examples::SimpleWindowedApplication, public app IGPUComputePipeline::SCreationParams params[3] = {}; for (auto i = 0u; i < 3; i++) { params[i].layout = pipelineLayout.get(); - params[i].shader.entryPoint = "main"; params[i].shader.shader = shaders[i].get(); - params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); - params[i].shader.requireFullSubgroups = true; + params[i].shader.entryPoint = "main"; + params[i].shader.requiredSubgroupSize = static_cast(hlsl::findMSB(deviceLimits.maxSubgroupSize)); + params[i].cached.requireFullSubgroups = true; } smart_refctd_ptr pipelines[3]; diff --git a/29_Arithmetic2Bench/CMakeLists.txt b/29_Arithmetic2Bench/CMakeLists.txt new file mode 100644 index 000000000..99c51769c --- /dev/null +++ b/29_Arithmetic2Bench/CMakeLists.txt @@ -0,0 +1,15 @@ +include(common) + +nbl_create_executable_project("" "" "" "") + +NBL_CREATE_RESOURCE_ARCHIVE( + NAMESPACE nbl::this_example::builtin + TARGET ${EXECUTABLE_NAME}_builtins + LINK_TO ${EXECUTABLE_NAME} + BIND app_resources + BUILTINS + benchmarkSubgroup.comp.hlsl + benchmarkWorkgroup.comp.hlsl + common.hlsl + shaderCommon.hlsl +) \ No newline at end of file diff --git a/29_Arithmetic2Bench/app_resources/benchmarkSubgroup.comp.hlsl b/29_Arithmetic2Bench/app_resources/benchmarkSubgroup.comp.hlsl new file mode 100644 index 000000000..018672386 --- /dev/null +++ b/29_Arithmetic2Bench/app_resources/benchmarkSubgroup.comp.hlsl @@ -0,0 +1,57 @@ +#pragma shader_stage(compute) + +#define operation_t nbl::hlsl::OPERATION + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +#include "app_resources/shaderCommon.hlsl" +#include "nbl/builtin/hlsl/workgroup2/basic.hlsl" + +template +using params_t = SUBGROUP_CONFIG_T; + +NBL_CONSTEXPR_STATIC_INLINE uint32_t ItemsPerInvocation = params_t::base_t, device_capabilities>::ItemsPerInvocation; + +typedef vector type_t; + +uint32_t globalIndex() +{ + return glsl::gl_WorkGroupID().x*WORKGROUP_SIZE+workgroup::SubgroupContiguousIndex(); +} + +template +static void subbench(NBL_CONST_REF_ARG(type_t) sourceVal) +{ + type_t value = sourceVal; + + const uint64_t outputBufAddr = pc.pOutputBuf[Binop::BindingIndex]; + + operation_t > func; + // [unroll] + for (uint32_t i = 0; i < NUM_LOOPS; i++) + value = func(value); + + vk::RawBufferStore(outputBufAddr + sizeof(type_t) * globalIndex(), value, sizeof(uint32_t)); +} + +void benchmark() +{ + const uint32_t invocationIndex = globalIndex(); + type_t sourceVal; + Xoroshiro64Star xoroshiro = Xoroshiro64Star::construct(uint32_t2(invocationIndex,invocationIndex+1)); + [unroll] + for (uint16_t i = 0; i < ItemsPerInvocation; i++) + sourceVal[i] = xoroshiro(); + + subbench >(sourceVal); +} + +[numthreads(WORKGROUP_SIZE,1,1)] +void main() +{ + benchmark(); +} diff --git a/29_Arithmetic2Bench/app_resources/benchmarkWorkgroup.comp.hlsl b/29_Arithmetic2Bench/app_resources/benchmarkWorkgroup.comp.hlsl new file mode 100644 index 000000000..8442ecc38 --- /dev/null +++ b/29_Arithmetic2Bench/app_resources/benchmarkWorkgroup.comp.hlsl @@ -0,0 +1,125 @@ +#pragma shader_stage(compute) + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_portability.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +using config_t = WORKGROUP_CONFIG_T; + +#include "app_resources/shaderCommon.hlsl" + +typedef vector type_t; + +// final (level 1/2) scan needs to fit in one subgroup exactly +groupshared uint32_t scratch[mpl::max_v]; + +#include "nbl/examples/workgroup/DataAccessors.hlsl" +using namespace nbl::hlsl::examples::workgroup; + +template +struct RandomizedInputDataProxy +{ + using dtype_t = vector; + + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = VirtualWorkgroupSize / WorkgroupSize; + + static RandomizedInputDataProxy create(uint64_t inputBuf, uint64_t outputBuf) + { + RandomizedInputDataProxy retval; + retval.data = DataProxy::create(inputBuf, outputBuf); + return retval; + } + + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloaded[ix>>WorkgroupSizeLog2]; + } + template + void set(const IndexType ix, const AccessType value) + { + preloaded[ix>>WorkgroupSizeLog2] = value; + } + + void preload() + { + const uint16_t invocationIndex = workgroup::SubgroupContiguousIndex(); + Xoroshiro64Star xoroshiro = Xoroshiro64Star::construct(uint32_t2(invocationIndex,invocationIndex+1)); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + [unroll] + for (uint16_t i = 0; i < ItemsPerInvocation; i++) + preloaded[idx][i] = xoroshiro(); + } + void unload() + { + const uint16_t invocationIndex = workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data.template set(idx * WorkgroupSize + invocationIndex, preloaded[idx]); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + //glsl::memoryBarrierShared(); implied by the above + } + + DataProxy data; + dtype_t preloaded[PreloadedDataCount]; +}; + +static ScratchProxy arithmeticAccessor; + +using data_proxy_t = RandomizedInputDataProxy; + +template +struct operation_t +{ + using binop_base_t = typename Binop::base_t; + using otype_t = typename Binop::type_t; + + void operator()(data_proxy_t dataAccessor) + { +#if IS_REDUCTION + otype_t value = +#endif + OPERATION::template __call(dataAccessor,arithmeticAccessor); + // we barrier before because we alias the accessors for Binop + arithmeticAccessor.workgroupExecutionAndMemoryBarrier(); +#if IS_REDUCTION + [unroll] + for (uint32_t i = 0; i < data_proxy_t::PreloadedDataCount; i++) + dataAccessor.preloaded[i] = value; +#endif + } +}; + +template +static void subbench() +{ + data_proxy_t dataAccessor = data_proxy_t::create(0, pc.pOutputBuf[Binop::BindingIndex]); + dataAccessor.preload(); + + operation_t func; + for (uint32_t i = 0; i < NUM_LOOPS; i++) + func(dataAccessor); + + dataAccessor.unload(); +} + +void benchmark() +{ + // only benchmark plus op + subbench >(); +} + + +[numthreads(config_t::WorkgroupSize,1,1)] +void main() +{ + benchmark(); +} diff --git a/29_Arithmetic2Bench/app_resources/common.hlsl b/29_Arithmetic2Bench/app_resources/common.hlsl new file mode 100644 index 000000000..cca5af987 --- /dev/null +++ b/29_Arithmetic2Bench/app_resources/common.hlsl @@ -0,0 +1,34 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/functional.hlsl" + +struct PushConstantData +{ + uint64_t pOutputBuf[2]; +}; + +namespace arithmetic +{ +template +struct plus : nbl::hlsl::plus +{ + using base_t = nbl::hlsl::plus; + + NBL_CONSTEXPR_STATIC_INLINE uint16_t BindingIndex = 0; +#ifndef __HLSL_VERSION + static inline constexpr const char* name = "plus"; +#endif +}; + +template +struct ballot : nbl::hlsl::plus +{ + using base_t = nbl::hlsl::plus; + + NBL_CONSTEXPR_STATIC_INLINE uint16_t BindingIndex = 1; +#ifndef __HLSL_VERSION + static inline constexpr const char* name = "bitcount"; +#endif +}; +} + +#include "nbl/builtin/hlsl/glsl_compat/subgroup_basic.hlsl" diff --git a/29_Arithmetic2Bench/app_resources/shaderCommon.hlsl b/29_Arithmetic2Bench/app_resources/shaderCommon.hlsl new file mode 100644 index 000000000..ec5824a21 --- /dev/null +++ b/29_Arithmetic2Bench/app_resources/shaderCommon.hlsl @@ -0,0 +1,26 @@ +#include "app_resources/common.hlsl" + +using namespace nbl; +using namespace hlsl; + +[[vk::push_constant]] PushConstantData pc; + +struct device_capabilities +{ +#ifdef TEST_NATIVE + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = true; +#else + NBL_CONSTEXPR_STATIC_INLINE bool shaderSubgroupArithmetic = false; +#endif +}; + +#ifndef OPERATION +#error "Define OPERATION!" +#endif + +#ifndef NUM_LOOPS +#error "Define NUM_LOOPS!" +#endif + +// NOTE added dummy output image to be able to profile with Nsight, which still doesn't support profiling headless compute shaders +[[vk::binding(2, 0)]] RWTexture2D outImage; // dummy diff --git a/29_SpecializationConstants/config.json.template b/29_Arithmetic2Bench/config.json.template similarity index 100% rename from 29_SpecializationConstants/config.json.template rename to 29_Arithmetic2Bench/config.json.template diff --git a/29_Arithmetic2Bench/main.cpp b/29_Arithmetic2Bench/main.cpp new file mode 100644 index 000000000..5809c4a9a --- /dev/null +++ b/29_Arithmetic2Bench/main.cpp @@ -0,0 +1,689 @@ +#include "nbl/examples/examples.hpp" +#include "app_resources/common.hlsl" +#include "nbl/builtin/hlsl/workgroup2/arithmetic_config.hlsl" +#include "nbl/builtin/hlsl/subgroup2/arithmetic_params.hlsl" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + + +template requires std::is_base_of_v +class CExplicitSurfaceFormatResizeSurface final : public ISimpleManagedSurface +{ +public: + using this_t = CExplicitSurfaceFormatResizeSurface; + + // Factory method so we can fail, requires a `_surface` created from a window and with a callback that inherits from `ICallback` declared just above + template requires std::is_base_of_v, Surface> + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr&& _surface) + { + if (!_surface) + return nullptr; + + auto _window = _surface->getWindow(); + ICallback* cb = nullptr; + if (_window) + cb = dynamic_cast(_window->getEventCallback()); + + return core::smart_refctd_ptr(new this_t(std::move(_surface), cb), core::dont_grab); + } + + // Factory method so we can fail, requires a `_surface` created from a native surface + template requires std::is_base_of_v, Surface> + static inline core::smart_refctd_ptr create(core::smart_refctd_ptr&& _surface, ICallback* cb) + { + if (!_surface) + return nullptr; + + return core::smart_refctd_ptr(new this_t(std::move(_surface), cb), core::dont_grab); + } + + // + inline bool init(CThreadSafeQueueAdapter* queue, std::unique_ptr&& scResources, const ISwapchain::SSharedCreationParams& sharedParams = {}) + { + if (!scResources || !base_init(queue)) + return init_fail(); + + m_sharedParams = sharedParams; + if (!m_sharedParams.deduce(queue->getOriginDevice()->getPhysicalDevice(), getSurface())) + return init_fail(); + + m_swapchainResources = std::move(scResources); + return true; + } + + // Can be public because we don't need to worry about mutexes unlike the Smooth Resize class + inline ISwapchainResources* getSwapchainResources() override { return m_swapchainResources.get(); } + + // need to see if the swapchain is invalidated (e.g. because we're starting from 0-area old Swapchain) and try to recreate the swapchain + inline SAcquireResult acquireNextImage() + { + if (!isWindowOpen()) + { + becomeIrrecoverable(); + return {}; + } + + if (!m_swapchainResources || (m_swapchainResources->getStatus() != ISwapchainResources::STATUS::USABLE && !recreateSwapchain(m_surfaceFormat))) + return {}; + + return ISimpleManagedSurface::acquireNextImage(); + } + + // its enough to just foward though + inline bool present(const uint8_t imageIndex, const std::span waitSemaphores) + { + return ISimpleManagedSurface::present(imageIndex, waitSemaphores); + } + + // + inline bool recreateSwapchain(const ISurface::SFormat& explicitSurfaceFormat) + { + assert(m_swapchainResources); + // dont assign straight to `m_swapchainResources` because of complex refcounting and cycles + core::smart_refctd_ptr newSwapchain; + // TODO: This block of code could be rolled up into `ISimpleManagedSurface::ISwapchainResources` eventually + { + auto* surface = getSurface(); + auto device = const_cast(getAssignedQueue()->getOriginDevice()); + // 0s are invalid values, so they indicate we want them deduced + m_sharedParams.width = 0; + m_sharedParams.height = 0; + // Question: should we re-query the supported queues, formats, present modes, etc. just-in-time?? + auto* swapchain = m_swapchainResources->getSwapchain(); + if (swapchain ? swapchain->deduceRecreationParams(m_sharedParams) : m_sharedParams.deduce(device->getPhysicalDevice(), surface)) + { + // super special case, we can't re-create the swapchain but its possible to recover later on + if (m_sharedParams.width == 0 || m_sharedParams.height == 0) + { + // we need to keep the old-swapchain around, but can drop the rest + m_swapchainResources->invalidate(); + return false; + } + // now lets try to create a new swapchain + if (swapchain) + newSwapchain = swapchain->recreate(m_sharedParams); + else + { + ISwapchain::SCreationParams params = { + .surface = core::smart_refctd_ptr(surface), + .surfaceFormat = explicitSurfaceFormat, + .sharedParams = m_sharedParams + // we're not going to support concurrent sharing in this simple class + }; + m_surfaceFormat = explicitSurfaceFormat; + newSwapchain = CVulkanSwapchain::create(core::smart_refctd_ptr(device), std::move(params)); + } + } + else // parameter deduction failed + return false; + } + + if (newSwapchain) + { + m_swapchainResources->invalidate(); + return m_swapchainResources->onCreateSwapchain(getAssignedQueue()->getFamilyIndex(), std::move(newSwapchain)); + } + else + becomeIrrecoverable(); + + return false; + } + +protected: + using ISimpleManagedSurface::ISimpleManagedSurface; + + // + inline void deinit_impl() override final + { + becomeIrrecoverable(); + } + + // + inline void becomeIrrecoverable() override { m_swapchainResources = nullptr; } + + // gets called when OUT_OF_DATE upon an acquire + inline SAcquireResult handleOutOfDate() override final + { + // recreate swapchain and try to acquire again + if (recreateSwapchain(m_surfaceFormat)) + return ISimpleManagedSurface::acquireNextImage(); + return {}; + } + +private: + // Because the surface can start minimized (extent={0,0}) we might not be able to create the swapchain right away, so store creation parameters until we can create it. + ISwapchain::SSharedCreationParams m_sharedParams = {}; + // The swapchain might not be possible to create or recreate right away, so this might be + // either nullptr before the first successful acquire or the old to-be-retired swapchain. + std::unique_ptr m_swapchainResources = {}; + + ISurface::SFormat m_surfaceFormat = {}; +}; + +// NOTE added swapchain + drawing frames to be able to profile with Nsight, which still doesn't support profiling headless compute shaders +class ArithmeticBenchApp final : public examples::SimpleWindowedApplication, public examples::BuiltinResourcesApplication +{ + using device_base_t = examples::SimpleWindowedApplication; + using asset_base_t = examples::BuiltinResourcesApplication; + + constexpr static inline uint32_t WIN_W = 1280; + constexpr static inline uint32_t WIN_H = 720; + constexpr static inline uint32_t MaxFramesInFlight = 5; + +public: + ArithmeticBenchApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : + system::IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WIN_W; + params.height = WIN_H; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ArithmeticBenchApp"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CExplicitSurfaceFormatResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!device_base_t::onAppInitialized(std::move(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + ISwapchain::SCreationParams swapchainParams = { .surface = m_surface->getSurface() }; + asset::E_FORMAT preferredFormats[] = { asset::EF_R8G8B8A8_UNORM }; + if (!swapchainParams.deduceFormat(m_physicalDevice, preferredFormats)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + swapchainParams.sharedParams.imageUsage = IGPUImage::E_USAGE_FLAGS::EUF_RENDER_ATTACHMENT_BIT | IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT; + + auto graphicsQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(graphicsQueue, std::make_unique(), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + auto pool = m_device->createCommandPool(graphicsQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(swapchainParams.surfaceFormat); + + transferDownQueue = getTransferDownQueue(); + computeQueue = getComputeQueue(); + + // create 2 buffers for 2 operations + for (auto i=0u; icreateBuffer(std::move(params)); + auto mreq = outputBuffers[i]->getMemoryReqs(); + mreq.memoryTypeBits &= m_physicalDevice->getDeviceLocalMemoryTypeBits(); + assert(mreq.memoryTypeBits); + + auto bufferMem = m_device->allocate(mreq, outputBuffers[i].get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + assert(bufferMem.isValid()); + } + for (auto i = 0u; i < OutputBufferCount; i++) + pc.pOutputBuf[i] = outputBuffers[i]->getDeviceAddress(); + + // create image views for swapchain images + for (uint32_t i = 0; i < ISwapchain::MaxImages; i++) + { + IGPUImage* scImg = m_surface->getSwapchainResources()->getImage(i); + if (scImg == nullptr) + continue; + IGPUImageView::SCreationParams viewParams = { + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT, + .image = smart_refctd_ptr(scImg), + .viewType = IGPUImageView::ET_2D, + .format = scImg->getCreationParameters().format + }; + swapchainImageViews[i] = m_device->createImageView(std::move(viewParams)); + } + + // create Descriptor Sets and Pipeline Layouts + smart_refctd_ptr benchPplnLayout; + { + // set and transient pool + smart_refctd_ptr benchLayout; + { + IGPUDescriptorSetLayout::SBinding binding[1]; + binding[0] = { {},2,IDescriptor::E_TYPE::ET_STORAGE_IMAGE,IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,IShader::E_SHADER_STAGE::ESS_COMPUTE,1u,nullptr }; + benchLayout = m_device->createDescriptorSetLayout(binding); + } + + const uint32_t setCount = ISwapchain::MaxImages; + benchPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, { &benchLayout.get(),1 }, &setCount); + for (auto i = 0u; i < ISwapchain::MaxImages; i++) + { + benchDs[i] = benchPool->createDescriptorSet(smart_refctd_ptr(benchLayout)); + if (!benchDs[i]) + return logFail("Could not create Descriptor Set!"); + } + + SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0,.size = sizeof(PushConstantData) }; + benchPplnLayout = m_device->createPipelineLayout({ &pcRange, 1 }, std::move(benchLayout)); + } + if (UseNativeArithmetic && !m_physicalDevice->getProperties().limits.shaderSubgroupArithmetic) + { + logFail("UseNativeArithmetic is true but device does not support shaderSubgroupArithmetic!"); + return false; + } + + IGPUDescriptorSet::SWriteDescriptorSet dsWrites[ISwapchain::MaxImages]; + for (auto i = 0u; i < ISwapchain::MaxImages; i++) + { + if (swapchainImageViews[i].get() == nullptr) + continue; + + video::IGPUDescriptorSet::SDescriptorInfo dsInfo; + dsInfo.info.image.imageLayout = IImage::LAYOUT::GENERAL; + dsInfo.desc = swapchainImageViews[i]; + + dsWrites[i] = + { + .dstSet = benchDs[i].get(), + .binding = 2u, + .arrayElement = 0u, + .count = 1u, + .info = &dsInfo, + }; + m_device->updateDescriptorSets(1u, &dsWrites[i], 0u, nullptr); + } + + + // load shader source from file + auto getShaderSource = [&](const char* filePath) -> auto + { + IAssetLoader::SAssetLoadParams lparams = {}; + lparams.logger = m_logger.get(); + lparams.workingDirectory = ""; + auto bundle = m_assetMgr->getAsset(filePath, lparams); + if (bundle.getContents().empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + { + m_logger->log("Shader %s not found!", ILogger::ELL_ERROR, filePath); + exit(-1); + } + auto firstAssetInBundle = bundle.getContents()[0]; + return smart_refctd_ptr_static_cast(firstAssetInBundle); + }; + + // for each workgroup size (manually adjust items per invoc, operation else uses up a lot of ram) + const auto MaxSubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + smart_refctd_ptr shaderSource; + if constexpr (DoWorkgroupBenchmarks) + shaderSource = getShaderSource("app_resources/benchmarkWorkgroup.comp.hlsl"); + else + shaderSource = getShaderSource("app_resources/benchmarkSubgroup.comp.hlsl"); + + for (uint32_t op = 0; op < arithmeticOperations.size(); op++) + for (uint32_t i = 0; i < workgroupSizes.size(); i++) + benchSets[op*workgroupSizes.size()+i] = createBenchmarkPipelines(shaderSource, benchPplnLayout.get(), ElementCount, arithmeticOperations[op], hlsl::findMSB(MaxSubgroupSize), workgroupSizes[i], ItemsPerInvocation, NumLoops); + + m_winMgr->show(m_window.get()); + + return true; + } + + virtual bool onAppTerminated() override + { + return true; + } + + // the unit test is carried out on init + void workLoopBody() override + { + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + + m_currentImageAcquire = m_surface->acquireNextImage(); + if (!m_currentImageAcquire) + return; + + auto* const cmdbuf = m_cmdBufs.data()[resourceIx].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + const auto MaxSubgroupSize = m_physicalDevice->getLimits().maxSubgroupSize; + const auto SubgroupSizeLog2 = hlsl::findMSB(MaxSubgroupSize); + + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, benchSets[0].pipeline->getLayout(), 0u, 1u, &benchDs[m_currentImageAcquire.imageIndex].get()); + cmdbuf->pushConstants(benchSets[0].pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PushConstantData), &pc); + + for (uint32_t i = 0; i < benchSets.size(); i++) + runBenchmark(cmdbuf, benchSets[i], ElementCount, SubgroupSizeLog2); + + // barrier transition to PRESENT + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstAccessMask = ACCESS_FLAGS::NONE + } + }; + imageBarriers[0].image = m_surface->getSwapchainResources()->getImage(m_currentImageAcquire.imageIndex); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = IImage::LAYOUT::UNDEFINED; + imageBarriers[0].newLayout = IImage::LAYOUT::PRESENT_SRC; + + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + cmdbuf->end(); + + // submit + { + auto* queue = getGraphicsQueue(); + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + if (queue->submit(infos) == IQueue::RESULT::SUCCESS) + { + const nbl::video::ISemaphore::SWaitInfo waitInfos[] = + { { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + } }; + + m_device->blockForSemaphores(waitInfos); // this is not solution, quick wa to not throw validation errors + } + else + --m_realFrameIx; + } + } + + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + + numSubmits++; + } + + // + bool keepRunning() override { return numSubmits < MaxNumSubmits; } + +private: + // create pipeline (specialized every test) [TODO: turn into a future/async] + smart_refctd_ptr createPipeline(const IShader* overridenUnspecialized, const IGPUPipelineLayout* layout, const uint8_t subgroupSizeLog2) + { + auto shader = m_device->compileShader({ overridenUnspecialized }); + IGPUComputePipeline::SCreationParams params = {}; + params.layout = layout; + params.shader = { + .shader = shader.get(), + .entryPoint = "main", + .requiredSubgroupSize = static_cast(subgroupSizeLog2), + .entries = nullptr, + }; + params.cached.requireFullSubgroups = true; + core::smart_refctd_ptr pipeline; + if (!m_device->createComputePipelines(nullptr,{¶ms,1},&pipeline)) + return nullptr; + return pipeline; + } + + struct BenchmarkSet + { + smart_refctd_ptr pipeline; + uint32_t workgroupSize; + uint32_t itemsPerInvocation; + }; + + template + BenchmarkSet createBenchmarkPipelines(const smart_refctd_ptr&source, const IGPUPipelineLayout* layout, const uint32_t elementCount, const std::string& arith_name, const uint8_t subgroupSizeLog2, const uint32_t workgroupSize, uint32_t itemsPerInvoc = 1u, uint32_t numLoops = 8u) + { + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#else + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; +#endif + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + + auto* includeFinder = compiler->getDefaultIncludeFinder(); + options.preprocessorOptions.includeFinder = includeFinder; + + const uint32_t subgroupSize = 0x1u << subgroupSizeLog2; + const uint32_t workgroupSizeLog2 = hlsl::findMSB(workgroupSize); + hlsl::workgroup2::SArithmeticConfiguration wgConfig; + wgConfig.init(workgroupSizeLog2, subgroupSizeLog2, itemsPerInvoc); + const uint32_t itemsPerWG = wgConfig.VirtualWorkgroupSize * wgConfig.ItemsPerInvocation_0; + smart_refctd_ptr overriddenUnspecialized; + if constexpr (WorkgroupBench) + { + const std::string definitions[4] = { + "workgroup2::" + arith_name, + wgConfig.getConfigTemplateStructString(), + std::to_string(numLoops), + std::to_string(arith_name=="reduction") + }; + + const IShaderCompiler::SMacroDefinition defines[5] = { + { "OPERATION", definitions[0] }, + { "WORKGROUP_CONFIG_T", definitions[1] }, + { "NUM_LOOPS", definitions[2] }, + { "IS_REDUCTION", definitions[3] }, + { "TEST_NATIVE", "1" } + }; + if (UseNativeArithmetic) + options.preprocessorOptions.extraDefines = { defines, defines + 5 }; + else + options.preprocessorOptions.extraDefines = { defines, defines + 4 }; + + overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + } + else + { + hlsl::subgroup2::SArithmeticParams sgParams; + sgParams.init(subgroupSizeLog2, itemsPerInvoc); + + const std::string definitions[4] = { + "subgroup2::" + arith_name, + std::to_string(workgroupSize), + sgParams.getParamTemplateStructString(), + std::to_string(numLoops) + }; + + const IShaderCompiler::SMacroDefinition defines[5] = { + { "OPERATION", definitions[0] }, + { "WORKGROUP_SIZE", definitions[1] }, + { "SUBGROUP_CONFIG_T", definitions[2] }, + { "NUM_LOOPS", definitions[3] }, + { "TEST_NATIVE", "1" } + }; + if (UseNativeArithmetic) + options.preprocessorOptions.extraDefines = { defines, defines + 5 }; + else + options.preprocessorOptions.extraDefines = { defines, defines + 4 }; + + overriddenUnspecialized = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + } + + BenchmarkSet set; + set.pipeline = createPipeline(overriddenUnspecialized.get(), layout, subgroupSizeLog2); + if constexpr (WorkgroupBench) + { + set.workgroupSize = itemsPerWG; + } + else + { + set.workgroupSize = workgroupSize; + } + set.itemsPerInvocation = itemsPerInvoc; + + return set; + }; + + template + void runBenchmark(IGPUCommandBuffer* cmdbuf, const BenchmarkSet& set, const uint32_t elementCount, const uint8_t subgroupSizeLog2) + { + uint32_t workgroupCount; + if constexpr (WorkgroupBench) + workgroupCount = elementCount / set.workgroupSize; + else + workgroupCount = elementCount / (set.workgroupSize * set.itemsPerInvocation); + + cmdbuf->bindComputePipeline(set.pipeline.get()); + cmdbuf->dispatch(workgroupCount, 1, 1); + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t memoryBarrier[OutputBufferCount]; + for (auto i = 0u; i < OutputBufferCount; i++) + { + memoryBarrier[i] = { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + // in theory we don't need the HOST BITS cause we block on a semaphore but might as well add them + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT | PIPELINE_STAGE_FLAGS::HOST_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS | ACCESS_FLAGS::HOST_READ_BIT + } + }, + .range = {0ull,outputBuffers[i]->getSize(),outputBuffers[i]} + }; + } + IGPUCommandBuffer::SPipelineBarrierDependencyInfo info = { .memBarriers = {},.bufBarriers = memoryBarrier }; + cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE, info); + } + } + + IQueue* transferDownQueue; + IQueue* computeQueue; + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + smart_refctd_ptr m_inputSystem; + + std::array, ISwapchain::MaxImages> swapchainImageViews; + + constexpr static inline uint32_t MaxNumSubmits = 30; + uint32_t numSubmits = 0; + constexpr static inline uint32_t ElementCount = 1024 * 1024; + + /* PARAMETERS TO CHANGE FOR DIFFERENT BENCHMARKS */ + constexpr static inline bool DoWorkgroupBenchmarks = true; + constexpr static inline bool UseNativeArithmetic = true; + uint32_t ItemsPerInvocation = 4u; + constexpr static inline uint32_t NumLoops = 1000u; + constexpr static inline uint32_t NumBenchmarks = 6u; + std::array workgroupSizes = { 32, 64, 128, 256, 512, 1024 }; + std::array arithmeticOperations = { "reduction", "inclusive_scan", "exclusive_scan" }; + + + std::array benchSets; + smart_refctd_ptr benchPool; + std::array, ISwapchain::MaxImages> benchDs; + + constexpr static inline uint32_t OutputBufferCount = 2u; + smart_refctd_ptr outputBuffers[OutputBufferCount]; + smart_refctd_ptr gpuOutputAddressesBuffer; + PushConstantData pc; + + uint64_t timelineValue = 0; +}; + +NBL_MAIN_FUNC(ArithmeticBenchApp) \ No newline at end of file diff --git a/56_RayQuery/pipeline.groovy b/29_Arithmetic2Bench/pipeline.groovy similarity index 85% rename from 56_RayQuery/pipeline.groovy rename to 29_Arithmetic2Bench/pipeline.groovy index beba797c3..7ea9947e0 100644 --- a/56_RayQuery/pipeline.groovy +++ b/29_Arithmetic2Bench/pipeline.groovy @@ -2,9 +2,9 @@ import org.DevshGraphicsProgramming.Agent import org.DevshGraphicsProgramming.BuilderInfo import org.DevshGraphicsProgramming.IBuilder -class CRayQueryBuilder extends IBuilder +class CArithemticUnitTestBuilder extends IBuilder { - public CRayQueryBuilder(Agent _agent, _info) + public CArithemticUnitTestBuilder(Agent _agent, _info) { super(_agent, _info) } @@ -44,7 +44,7 @@ class CRayQueryBuilder extends IBuilder def create(Agent _agent, _info) { - return new CRayQueryBuilder(_agent, _info) + return new CArithemticUnitTestBuilder(_agent, _info) } return this \ No newline at end of file diff --git a/29_SpecializationConstants/CMakeLists.txt b/29_SpecializationConstants/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/29_SpecializationConstants/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/29_SpecializationConstants/main.cpp b/29_SpecializationConstants/main.cpp deleted file mode 100644 index 11b73a330..000000000 --- a/29_SpecializationConstants/main.cpp +++ /dev/null @@ -1,566 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include - -#include "../common/CommonAPI.h" -using namespace nbl; -using namespace core; -using namespace ui; - -struct UBOCompute -{ - //xyz - gravity point, w - dt - core::vectorSIMDf gravPointAndDt; -}; - -class SpecializationConstantsSampleApp : public ApplicationBase -{ - constexpr static uint32_t WIN_W = 1280u; - constexpr static uint32_t WIN_H = 720u; - constexpr static uint32_t SC_IMG_COUNT = 3u; - constexpr static uint32_t FRAMES_IN_FLIGHT = 5u; - static constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - static_assert(FRAMES_IN_FLIGHT > SC_IMG_COUNT); - - core::smart_refctd_ptr window; - core::smart_refctd_ptr system; - core::smart_refctd_ptr windowCb; - core::smart_refctd_ptr api; - core::smart_refctd_ptr surface; - core::smart_refctd_ptr utils; - core::smart_refctd_ptr device; - video::IPhysicalDevice* gpu; - std::array queues; - core::smart_refctd_ptr swapchain; - core::smart_refctd_ptr renderpass; - nbl::core::smart_refctd_dynamic_array> fbo; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; - core::smart_refctd_ptr filesystem; - core::smart_refctd_ptr assetManager; - video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - core::smart_refctd_ptr logger; - core::smart_refctd_ptr inputSystem; - video::IGPUObjectFromAssetConverter cpu2gpu; - - constexpr static uint32_t COMPUTE_SET = 0u; - constexpr static uint32_t PARTICLE_BUF_BINDING = 0u; - constexpr static uint32_t COMPUTE_DATA_UBO_BINDING = 1u; - constexpr static uint32_t WORKGROUP_SIZE = 256u; - constexpr static uint32_t PARTICLE_COUNT = 1u << 21; - constexpr static uint32_t PARTICLE_COUNT_PER_AXIS = 1u << 7; - constexpr static uint32_t POS_BUF_IX = 0u; - constexpr static uint32_t VEL_BUF_IX = 1u; - constexpr static uint32_t BUF_COUNT = 2u; - constexpr static uint32_t GRAPHICS_SET = 0u; - constexpr static uint32_t GRAPHICS_DATA_UBO_BINDING = 0u; - - std::chrono::high_resolution_clock::time_point m_lastTime; - int32_t m_resourceIx = -1; - core::smart_refctd_ptr m_cmdbuf[FRAMES_IN_FLIGHT]; - core::smart_refctd_ptr m_frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr m_imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr m_renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - core::vectorSIMDf m_cameraPosition; - core::vectorSIMDf m_camFront; - UBOCompute m_uboComputeData; - asset::SBufferRange m_computeUBORange; - asset::SBufferRange m_graphicsUBORange; - core::smart_refctd_ptr m_gpuComputePipeline; - core::smart_refctd_ptr m_graphicsPipeline; - core::smart_refctd_ptr m_gpuds0Compute; - core::smart_refctd_ptr m_gpuds0Graphics; - asset::SBasicViewParameters m_viewParams; - core::matrix4SIMD m_viewProj; - core::smart_refctd_ptr m_gpuParticleBuf; - core::smart_refctd_ptr m_rpIndependentPipeline; - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - -public: - - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - void setSystem(core::smart_refctd_ptr&& s) override - { - system = std::move(s); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - video::IAPIConnection* getAPIConnection() override - { - return api.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return device.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbo->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_UNKNOWN; - } - - APP_CONSTRUCTOR(SpecializationConstantsSampleApp); - - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT | asset::IImage::EUF_STORAGE_BIT); - const asset::E_FORMAT depthFormat = asset::EF_UNKNOWN; - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = SC_IMG_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = depthFormat; - initParams.physicalDeviceFilter.minimumLimits.workgroupSizeFromSpecConstant = true; - auto initOutp = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - window = std::move(initParams.window); - system = std::move(initOutp.system); - windowCb = std::move(initParams.windowCb); - api = std::move(initOutp.apiConnection); - surface = std::move(initOutp.surface); - device = std::move(initOutp.logicalDevice); - gpu = std::move(initOutp.physicalDevice); - queues = std::move(initOutp.queues); - renderpass = std::move(initOutp.renderToSwapchainRenderpass); - commandPools = std::move(initOutp.commandPools); - assetManager = std::move(initOutp.assetManager); - filesystem = std::move(initOutp.system); - cpu2gpuParams = std::move(initOutp.cpu2gpuParams); - utils = std::move(initOutp.utilities); - m_swapchainCreationParams = std::move(initOutp.swapchainCreationParams); - - CommonAPI::createSwapchain(std::move(device), m_swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbo = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - device, swapchain, renderpass, - depthFormat - ); - - video::IGPUObjectFromAssetConverter CPU2GPU; - m_cameraPosition = core::vectorSIMDf(0, 0, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(90.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.01, 100); - matrix3x4SIMD view = matrix3x4SIMD::buildCameraLookAtMatrixRH(m_cameraPosition, core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 1, 0)); - m_viewProj = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - matrix4SIMD::concatenateBFollowedByA(proj, matrix4SIMD(view)) - ); - m_camFront = view[2]; - - // auto glslExts = device->getSupportedGLSLExtensions(); - asset::CSPIRVIntrospector introspector; - - const char* pathToCompShader = "../particles.comp"; - auto compilerSet = assetManager->getCompilerSet(); - core::smart_refctd_ptr computeUnspec = nullptr; - core::smart_refctd_ptr computeUnspecSPIRV = nullptr; - { - auto csBundle = assetManager->getAsset(pathToCompShader, {}); - auto csContents = csBundle.getContents(); - if (csContents.empty()) - assert(false); - - asset::ICPUSpecializedShader* csSpec = static_cast(csContents.begin()->get()); - computeUnspec = core::smart_refctd_ptr(csSpec->getUnspecialized()); - - auto compiler = compilerSet->getShaderCompiler(computeUnspec->getContentType()); - - asset::IShaderCompiler::SPreprocessorOptions preprocessOptions = {}; - preprocessOptions.sourceIdentifier = pathToCompShader; - preprocessOptions.includeFinder = compiler->getDefaultIncludeFinder(); - computeUnspec = compilerSet->preprocessShader(computeUnspec.get(), preprocessOptions); - } - - core::smart_refctd_ptr introspection = nullptr; - { - //! This example first preprocesses and then compiles the shader, although it could've been done by calling compileToSPIRV with setting compilerOptions.preprocessorOptions - asset::IShaderCompiler::SCompilerOptions compilerOptions = {}; - // compilerOptions.entryPoint = "main"; - compilerOptions.stage = computeUnspec->getStage(); - compilerOptions.debugInfoFlags = asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; // should be DIF_SOURCE_BIT for introspection - compilerOptions.preprocessorOptions.sourceIdentifier = computeUnspec->getFilepathHint(); // already preprocessed but for logging it's best to fill sourceIdentifier - computeUnspecSPIRV = compilerSet->compileToSPIRV(computeUnspec.get(), compilerOptions); - - asset::CSPIRVIntrospector::SIntrospectionParams params = { "main", computeUnspecSPIRV }; - introspection = introspector.introspect(params); - } - - asset::ISpecializedShader::SInfo specInfo; - { - struct SpecConstants - { - int32_t wg_size; - int32_t particle_count; - int32_t pos_buf_ix; - int32_t vel_buf_ix; - int32_t buf_count; - }; - SpecConstants swapchain{ WORKGROUP_SIZE, PARTICLE_COUNT, POS_BUF_IX, VEL_BUF_IX, BUF_COUNT }; - - auto it_particleBufDescIntro = std::find_if(introspection->descriptorSetBindings[COMPUTE_SET].begin(), introspection->descriptorSetBindings[COMPUTE_SET].end(), - [=](auto b) { return b.binding == PARTICLE_BUF_BINDING; } - ); - assert(it_particleBufDescIntro->descCountIsSpecConstant); - const uint32_t buf_count_specID = it_particleBufDescIntro->count_specID; - auto& particleDataArrayIntro = it_particleBufDescIntro->get().members.array[0]; - assert(particleDataArrayIntro.countIsSpecConstant); - const uint32_t particle_count_specID = particleDataArrayIntro.count_specID; - - auto backbuf = asset::ICPUBuffer::create({ sizeof(swapchain) }); - memcpy(backbuf->getPointer(), &swapchain, sizeof(swapchain)); - auto entries = core::make_refctd_dynamic_array>(5u); - (*entries)[0] = { 0u,offsetof(SpecConstants,wg_size),sizeof(int32_t) };//currently local_size_{x|y|z}_id is not queryable via introspection API - (*entries)[1] = { particle_count_specID,offsetof(SpecConstants,particle_count),sizeof(int32_t) }; - (*entries)[2] = { 2u,offsetof(SpecConstants,pos_buf_ix),sizeof(int32_t) }; - (*entries)[3] = { 3u,offsetof(SpecConstants,vel_buf_ix),sizeof(int32_t) }; - (*entries)[4] = { buf_count_specID,offsetof(SpecConstants,buf_count),sizeof(int32_t) }; - - specInfo = asset::ISpecializedShader::SInfo(std::move(entries), std::move(backbuf), "main"); - } - - auto compute = core::make_smart_refctd_ptr(std::move(computeUnspecSPIRV), std::move(specInfo)); - - auto computePipeline = introspector.createApproximateComputePipelineFromIntrospection(compute.get()); - auto computeLayout = core::make_smart_refctd_ptr(nullptr, nullptr, core::smart_refctd_ptr(computePipeline->getLayout()->getDescriptorSetLayout(0))); - computePipeline->setLayout(core::smart_refctd_ptr(computeLayout)); - - // These conversions don't require command buffers - m_gpuComputePipeline = CPU2GPU.getGPUObjectsFromAssets(&computePipeline.get(), &computePipeline.get() + 1, cpu2gpuParams)->front(); - auto* ds0layoutCompute = computeLayout->getDescriptorSetLayout(0); - core::smart_refctd_ptr gpuDs0layoutCompute = CPU2GPU.getGPUObjectsFromAssets(&ds0layoutCompute, &ds0layoutCompute + 1, cpu2gpuParams)->front(); - - core::vector particlePosAndVel; - particlePosAndVel.reserve(PARTICLE_COUNT * 2); - for (int32_t i = 0; i < PARTICLE_COUNT_PER_AXIS; ++i) - for (int32_t j = 0; j < PARTICLE_COUNT_PER_AXIS; ++j) - for (int32_t k = 0; k < PARTICLE_COUNT_PER_AXIS; ++k) - particlePosAndVel.push_back(core::vector3df_SIMD(i, j, k) * 0.5f); - - for (int32_t i = 0; i < PARTICLE_COUNT; ++i) - particlePosAndVel.push_back(core::vector3df_SIMD(0.0f)); - - constexpr size_t BUF_SZ = 4ull * sizeof(float) * PARTICLE_COUNT; - video::IGPUBuffer::SCreationParams bufferCreationParams = {}; - bufferCreationParams.usage = static_cast(asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_STORAGE_BUFFER_BIT | asset::IBuffer::EUF_VERTEX_BUFFER_BIT); - bufferCreationParams.size = 2ull * BUF_SZ; - m_gpuParticleBuf = device->createBuffer(std::move(bufferCreationParams)); - m_gpuParticleBuf->setObjectDebugName("m_gpuParticleBuf"); - auto particleBufMemReqs = m_gpuParticleBuf->getMemoryReqs(); - particleBufMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(particleBufMemReqs, m_gpuParticleBuf.get()); - asset::SBufferRange range; - range.buffer = m_gpuParticleBuf; - range.offset = 0ull; - range.size = BUF_SZ * 2ull; - utils->updateBufferRangeViaStagingBufferAutoSubmit(range, particlePosAndVel.data(), queues[CommonAPI::InitOutput::EQT_GRAPHICS]); - particlePosAndVel.clear(); - - video::IGPUBuffer::SCreationParams uboComputeCreationParams = {}; - uboComputeCreationParams.usage = static_cast(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF); - uboComputeCreationParams.size = core::roundUp(sizeof(UBOCompute), 64ull); - auto gpuUboCompute = device->createBuffer(std::move(uboComputeCreationParams)); - auto gpuUboComputeMemReqs = gpuUboCompute->getMemoryReqs(); - gpuUboComputeMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(gpuUboComputeMemReqs, gpuUboCompute.get()); - - asset::SBufferBinding vtxBindings[video::IGPUMeshBuffer::MAX_ATTR_BUF_BINDING_COUNT]; - vtxBindings[0].buffer = m_gpuParticleBuf; - vtxBindings[0].offset = 0u; - //auto meshbuffer = core::make_smart_refctd_ptr(nullptr, nullptr, vtxBindings, asset::SBufferBinding{}); - //meshbuffer->setIndexCount(PARTICLE_COUNT); - //meshbuffer->setIndexType(asset::EIT_UNKNOWN); - - - auto createSpecShader = [&](const char* filepath, asset::IShader::E_SHADER_STAGE stage) - { - auto shaderBundle = assetManager->getAsset(filepath, {}); - auto shaderContents = shaderBundle.getContents(); - if (shaderContents.empty()) - assert(false); - - auto specializedShader = static_cast(shaderContents.begin()->get()); - auto unspecShader = specializedShader->getUnspecialized(); - - auto compiler = compilerSet->getShaderCompiler(computeUnspec->getContentType()); - asset::IShaderCompiler::SCompilerOptions compilerOptions = {}; - // compilerOptions.entryPoint = specializedShader->getSpecializationInfo().entryPoint; - compilerOptions.stage = unspecShader->getStage(); - compilerOptions.debugInfoFlags = asset::IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; - compilerOptions.preprocessorOptions.sourceIdentifier = unspecShader->getFilepathHint(); // already preprocessed but for logging it's best to fill sourceIdentifier - compilerOptions.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); - auto unspecSPIRV = compilerSet->compileToSPIRV(unspecShader, compilerOptions); - - return core::make_smart_refctd_ptr(std::move(unspecSPIRV), asset::ISpecializedShader::SInfo(specializedShader->getSpecializationInfo())); - }; - auto vs = createSpecShader("../particles.vert", asset::IShader::ESS_VERTEX); - auto fs = createSpecShader("../particles.frag", asset::IShader::ESS_FRAGMENT); - - asset::ICPUSpecializedShader* shaders[2] = { vs.get(),fs.get() }; - auto pipeline = introspector.createApproximateRenderpassIndependentPipelineFromIntrospection({ shaders, shaders + 2 }); - { - auto& vtxParams = pipeline->getVertexInputParams(); - vtxParams.attributes[0].binding = 0u; - vtxParams.attributes[0].format = asset::EF_R32G32B32_SFLOAT; - vtxParams.attributes[0].relativeOffset = 0u; - vtxParams.bindings[0].inputRate = asset::EVIR_PER_VERTEX; - vtxParams.bindings[0].stride = 4u * sizeof(float); - - pipeline->getPrimitiveAssemblyParams().primitiveType = asset::EPT_POINT_LIST; - - auto& blendParams = pipeline->getBlendParams(); - blendParams.logicOpEnable = false; - blendParams.logicOp = nbl::asset::ELO_NO_OP; - } - auto gfxLayout = core::make_smart_refctd_ptr(nullptr, nullptr, core::smart_refctd_ptr(pipeline->getLayout()->getDescriptorSetLayout(0))); - pipeline->setLayout(core::smart_refctd_ptr(gfxLayout)); - - m_rpIndependentPipeline = CPU2GPU.getGPUObjectsFromAssets(&pipeline.get(), &pipeline.get() + 1, cpu2gpuParams)->front(); - auto* ds0layoutGraphics = gfxLayout->getDescriptorSetLayout(0); - core::smart_refctd_ptr gpuDs0layoutGraphics = CPU2GPU.getGPUObjectsFromAssets(&ds0layoutGraphics, &ds0layoutGraphics + 1, cpu2gpuParams)->front(); - - video::IGPUDescriptorSetLayout* gpuDSLayouts_raw[2] = { gpuDs0layoutCompute.get(), gpuDs0layoutGraphics.get() }; - const uint32_t setCount[2] = { 1u, 1u }; - auto dscPool = device->createDescriptorPoolForDSLayouts(video::IDescriptorPool::ECF_NONE, gpuDSLayouts_raw, gpuDSLayouts_raw + 2ull, setCount); - - m_gpuds0Compute = dscPool->createDescriptorSet(std::move(gpuDs0layoutCompute)); - { - video::IGPUDescriptorSet::SDescriptorInfo i[3]; - video::IGPUDescriptorSet::SWriteDescriptorSet w[2]; - w[0].arrayElement = 0u; - w[0].binding = PARTICLE_BUF_BINDING; - w[0].count = BUF_COUNT; - w[0].descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER; - w[0].dstSet = m_gpuds0Compute.get(); - w[0].info = i; - w[1].arrayElement = 0u; - w[1].binding = COMPUTE_DATA_UBO_BINDING; - w[1].count = 1u; - w[1].descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - w[1].dstSet = m_gpuds0Compute.get(); - w[1].info = i + 2u; - i[0].desc = m_gpuParticleBuf; - i[0].info.buffer.offset = 0ull; - i[0].info.buffer.size = BUF_SZ; - i[1].desc = m_gpuParticleBuf; - i[1].info.buffer.offset = BUF_SZ; - i[1].info.buffer.size = BUF_SZ; - i[2].desc = gpuUboCompute; - i[2].info.buffer.offset = 0ull; - i[2].info.buffer.size = gpuUboCompute->getSize(); - - device->updateDescriptorSets(2u, w, 0u, nullptr); - } - - - m_gpuds0Graphics = dscPool->createDescriptorSet(std::move(gpuDs0layoutGraphics)); - - video::IGPUGraphicsPipeline::SCreationParams gp_params; - gp_params.rasterizationSamples = asset::IImage::ESCF_1_BIT; - gp_params.renderpass = core::smart_refctd_ptr(renderpass); - gp_params.renderpassIndependent = core::smart_refctd_ptr(m_rpIndependentPipeline); - gp_params.subpassIx = 0u; - - m_graphicsPipeline = device->createGraphicsPipeline(nullptr, std::move(gp_params)); - - video::IGPUBuffer::SCreationParams gfxUboCreationParams = {}; - gfxUboCreationParams.usage = static_cast(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF); - gfxUboCreationParams.size = sizeof(m_viewParams); - auto gpuUboGraphics = device->createBuffer(std::move(gfxUboCreationParams)); - auto gpuUboGraphicsMemReqs = gpuUboGraphics->getMemoryReqs(); - gpuUboGraphicsMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - - device->allocate(gpuUboGraphicsMemReqs, gpuUboGraphics.get()); - { - video::IGPUDescriptorSet::SWriteDescriptorSet w; - video::IGPUDescriptorSet::SDescriptorInfo i; - w.arrayElement = 0u; - w.binding = GRAPHICS_DATA_UBO_BINDING; - w.count = 1u; - w.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - w.dstSet = m_gpuds0Graphics.get(); - w.info = &i; - i.desc = gpuUboGraphics; - i.info.buffer.offset = 0u; - i.info.buffer.size = gpuUboGraphics->getSize(); // gpuUboGraphics->getSize(); - - device->updateDescriptorSets(1u, &w, 0u, nullptr); - } - - m_lastTime = std::chrono::high_resolution_clock::now(); - constexpr uint32_t FRAME_COUNT = 500000u; - constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - m_computeUBORange = { 0, gpuUboCompute->getSize(), gpuUboCompute }; - m_graphicsUBORange = { 0, gpuUboGraphics->getSize(), gpuUboGraphics }; - - const auto& graphicsCommandPools = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - { - device->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, m_cmdbuf+i); - m_imageAcquire[i] = device->createSemaphore(); - m_renderFinished[i] = device->createSemaphore(); - } - } - - void onAppTerminated_impl() override - { - device->waitIdle(); - } - - void workLoopBody() override - { - m_resourceIx++; - if (m_resourceIx >= FRAMES_IN_FLIGHT) - m_resourceIx = 0; - - auto& cb = m_cmdbuf[m_resourceIx]; - auto& fence = m_frameComplete[m_resourceIx]; - if (fence) - { - auto retval = device->waitForFences(1u, &fence.get(), false, MAX_TIMEOUT); - assert(retval == video::IGPUFence::ES_TIMEOUT || retval == video::IGPUFence::ES_SUCCESS); - device->resetFences(1u, &fence.get()); - } - else - { - fence = device->createFence(static_cast(0)); - } - - // safe to proceed - cb->begin(video::IGPUCommandBuffer::EU_ONE_TIME_SUBMIT_BIT); // TODO: Reset Frame's CommandPool - - { - auto time = std::chrono::high_resolution_clock::now(); - core::vector3df_SIMD gravPoint = m_cameraPosition + m_camFront * 250.f; - m_uboComputeData.gravPointAndDt = gravPoint; - m_uboComputeData.gravPointAndDt.w = std::chrono::duration_cast(time - m_lastTime).count() * 1e-4; - - m_lastTime = time; - cb->updateBuffer(m_computeUBORange.buffer.get(), m_computeUBORange.offset, m_computeUBORange.size, &m_uboComputeData); - } - cb->bindComputePipeline(m_gpuComputePipeline.get()); - cb->bindDescriptorSets(asset::EPBP_COMPUTE, - m_gpuComputePipeline->getLayout(), - COMPUTE_SET, - 1u, - &m_gpuds0Compute.get(), - 0u); - cb->dispatch(PARTICLE_COUNT / WORKGROUP_SIZE, 1u, 1u); - - asset::SMemoryBarrier memBarrier; - memBarrier.srcAccessMask = asset::EAF_SHADER_WRITE_BIT; - memBarrier.dstAccessMask = asset::EAF_VERTEX_ATTRIBUTE_READ_BIT; - cb->pipelineBarrier( - asset::EPSF_COMPUTE_SHADER_BIT, - asset::EPSF_VERTEX_INPUT_BIT, - static_cast(0u), - 1, &memBarrier, - 0, nullptr, - 0, nullptr); - - { - memcpy(m_viewParams.MVP, &m_viewProj, sizeof(m_viewProj)); - cb->updateBuffer(m_graphicsUBORange.buffer.get(), m_graphicsUBORange.offset, m_graphicsUBORange.size, &m_viewParams); - } - { - asset::SViewport vp; - vp.minDepth = 1.f; - vp.maxDepth = 0.f; - vp.x = 0u; - vp.y = 0u; - vp.width = WIN_W; - vp.height = WIN_H; - cb->setViewport(0u, 1u, &vp); - - VkRect2D scissor; - scissor.offset = { 0, 0 }; - scissor.extent = { WIN_W, WIN_H }; - cb->setScissor(0u, 1u, &scissor); - } - // renderpass - uint32_t imgnum = 0u; - swapchain->acquireNextImage(MAX_TIMEOUT, m_imageAcquire[m_resourceIx].get(), nullptr, &imgnum); - { - video::IGPUCommandBuffer::SRenderpassBeginInfo info; - asset::SClearValue clear; - clear.color.float32[0] = 0.f; - clear.color.float32[1] = 0.f; - clear.color.float32[2] = 0.f; - clear.color.float32[3] = 1.f; - info.renderpass = renderpass; - info.framebuffer = fbo->begin()[imgnum]; - info.clearValueCount = 1u; - info.clearValues = &clear; - info.renderArea.offset = { 0, 0 }; - info.renderArea.extent = { WIN_W, WIN_H }; - cb->beginRenderPass(&info, asset::ESC_INLINE); - } - // individual draw - { - cb->bindGraphicsPipeline(m_graphicsPipeline.get()); - size_t vbOffset = 0; - cb->bindVertexBuffers(0, 1, &m_gpuParticleBuf.get(), &vbOffset); - cb->bindDescriptorSets(asset::EPBP_GRAPHICS, m_rpIndependentPipeline->getLayout(), GRAPHICS_SET, 1u, &m_gpuds0Graphics.get(), 0u); - cb->draw(PARTICLE_COUNT, 1, 0, 0); - } - cb->endRenderPass(); - cb->end(); - - CommonAPI::Submit( - device.get(), - cb.get(), - queues[CommonAPI::InitOutput::EQT_GRAPHICS], - m_imageAcquire[m_resourceIx].get(), - m_renderFinished[m_resourceIx].get(), - fence.get()); - - CommonAPI::Present( - device.get(), - swapchain.get(), - queues[CommonAPI::InitOutput::EQT_GRAPHICS], - m_renderFinished[m_resourceIx].get(), - imgnum); - } - - bool keepRunning() override - { - return windowCb->isWindowOpen(); - } -}; - -NBL_COMMON_API_MAIN(SpecializationConstantsSampleApp) - -extern "C" { _declspec(dllexport) DWORD NvOptimusEnablement = 0x00000001; } \ No newline at end of file diff --git a/29_SpecializationConstants/particles.comp b/29_SpecializationConstants/particles.comp deleted file mode 100644 index 5889af74c..000000000 --- a/29_SpecializationConstants/particles.comp +++ /dev/null @@ -1,39 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 430 core - -layout (constant_id = 1) const int PARTICLE_COUNT = 256; -layout (constant_id = 2) const int POS_BUF_IX = 0; -layout (constant_id = 3) const int VEL_BUF_IX = 1; -layout (constant_id = 4) const int BUF_COUNT = 2; - -layout (local_size_x_id = 0) in; - -layout (set = 0, binding = 0, std430) restrict buffer PARTICLE_DATA -{ - vec3 p[PARTICLE_COUNT]; -} data[BUF_COUNT]; -layout (set = 0, binding = 1, std140) uniform UBO -{ - vec3 gravP; - float dt; -} ubo; - -void main() -{ - uint GID = gl_GlobalInvocationID.x; - - vec3 p = data[POS_BUF_IX].p[GID]; - vec3 v = data[VEL_BUF_IX].p[GID]; - - v *= 1.0 - 0.99*ubo.dt; - float d = distance(ubo.gravP,p); - float a = 10000.0 / max(1.0, 0.01*pow(d,1.5)); - v += (ubo.gravP-p)/d * a * ubo.dt; - p += v*ubo.dt; - - data[POS_BUF_IX].p[GID] = p; - data[VEL_BUF_IX].p[GID] = v; -} \ No newline at end of file diff --git a/29_SpecializationConstants/particles.frag b/29_SpecializationConstants/particles.frag deleted file mode 100644 index c03ba9afc..000000000 --- a/29_SpecializationConstants/particles.frag +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 430 core - -layout (location = 0) out vec4 Color; - -void main() -{ - Color = vec4(1.0); -} \ No newline at end of file diff --git a/29_SpecializationConstants/particles.vert b/29_SpecializationConstants/particles.vert deleted file mode 100644 index f87486cac..000000000 --- a/29_SpecializationConstants/particles.vert +++ /dev/null @@ -1,21 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 430 core - -layout (location = 0) in vec3 vPos; - -#include -#include - -layout (set = 0, binding = 0, row_major, std140) uniform UBO -{ - nbl_glsl_SBasicViewParameters params; -} CamData; - -void main() -{ - gl_PointSize = 1; - gl_Position = nbl_glsl_pseudoMul4x4with3x1(CamData.params.MVP, vPos); -} \ No newline at end of file diff --git a/29_SpecializationConstants/pipeline.groovy b/29_SpecializationConstants/pipeline.groovy deleted file mode 100644 index d61a3c808..000000000 --- a/29_SpecializationConstants/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CSpecializationConstantsBuilder extends IBuilder -{ - public CSpecializationConstantsBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CSpecializationConstantsBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/app_resources/common.glsl b/30_ComputeShaderPathTracer/app_resources/common.glsl index 2463f82cf..65ed0609e 100644 --- a/30_ComputeShaderPathTracer/app_resources/common.glsl +++ b/30_ComputeShaderPathTracer/app_resources/common.glsl @@ -352,9 +352,9 @@ struct Payload_t vec3 accumulation; float otherTechniqueHeuristic; vec3 throughput; - #ifdef KILL_DIFFUSE_SPECULAR_PATHS +#ifdef KILL_DIFFUSE_SPECULAR_PATHS bool hasDiffuse; - #endif +#endif }; struct Ray_t @@ -491,6 +491,7 @@ layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; #include +// TODO: use PCG hash + XOROSHIRO and don't read any textures mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) { mat2x3 retval; @@ -552,6 +553,7 @@ nbl_glsl_LightSample nbl_glsl_light_generate_and_remainder_and_pdf(out vec3 rema } uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection); +// returns whether to stop tracing bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nbl_glsl_xoroshiro64star_state_t scramble_state) { const MutableRay_t _mutable = ray._mutable; @@ -594,7 +596,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb if (BSDFNode_isNotDiffuse(bsdf)) { if (ray._payload.hasDiffuse) - return true; + return false; } else ray._payload.hasDiffuse = true; @@ -602,7 +604,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb const bool isBSDF = BSDFNode_isBSDF(bsdf); //rand - mat2x3 epsilon = rand3d(depth,_sample,scramble_state); + mat2x3 epsilon = rand3d(depth*2,_sample,scramble_state); // thresholds const float bsdfPdfThreshold = 0.0001; @@ -611,47 +613,55 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb const float monochromeEta = dot(throughputCIE_Y,BSDFNode_getEta(bsdf)[0])/(throughputCIE_Y.r+throughputCIE_Y.g+throughputCIE_Y.b); // do NEE - const float neeProbability = 1.0;// BSDFNode_getNEEProb(bsdf); +#ifndef NEE_ONLY + // to turn off NEE, set this to 0 + const float neeProbability = BSDFNode_getNEEProb(bsdf); float rcpChoiceProb; - if (!nbl_glsl_partitionRandVariable(neeProbability,epsilon[0].z,rcpChoiceProb) && depth<2u) + if (!nbl_glsl_partitionRandVariable(neeProbability,epsilon[0].z,rcpChoiceProb)) { +#endif vec3 neeContrib; float lightPdf, t; nbl_glsl_LightSample nee_sample = nbl_glsl_light_generate_and_remainder_and_pdf( neeContrib, lightPdf, t, intersection, interaction, isBSDF, epsilon[0], depth ); - // We don't allow non watertight transmitters in this renderer + // We don't allow non watertight transmitters in this renderer & scene, one cannot reach a light from the backface (optimization) bool validPath = nee_sample.NdotL>nbl_glsl_FLT_MIN; // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself nbl_glsl_AnisotropicMicrofacetCache _cache; validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); + // infinite PDF would mean a point light or a thin line, but our lights have finite radiance per steradian (area lights) if (lightPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) - ray._payload.accumulation += neeContrib; - }} + if (bsdfPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) + ray._payload.accumulation += neeContrib; + } + } +#ifndef NEE_ONLY } -#if NEE_ONLY - return false; -#endif + // sample BSDF float bsdfPdf; vec3 bsdfSampleL; { @@ -680,6 +690,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb #endif return true; } +#endif } return false; } @@ -748,15 +759,15 @@ void main() ray._payload.accumulation = vec3(0.0); ray._payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths ray._payload.throughput = vec3(1.0); - #ifdef KILL_DIFFUSE_SPECULAR_PATHS +#ifdef KILL_DIFFUSE_SPECULAR_PATHS ray._payload.hasDiffuse = false; - #endif +#endif } // bounces { bool hit = true; bool rayAlive = true; - for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d+=2) + for (int d=1; d<=PTPushConstant.depth && hit && rayAlive; d++) { ray._mutable.intersectionT = nbl_glsl_FLT_MAX; ray._mutable.objectID = traceRay(ray._mutable.intersectionT,ray._immutable.origin,ray._immutable.direction); diff --git a/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp b/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp index ff3dd8095..3745ca512 100644 --- a/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp +++ b/30_ComputeShaderPathTracer/include/nbl/this_example/common.hpp @@ -1,17 +1,11 @@ -#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#include - -// common api -#include "CCamera.hpp" -#include "SimpleWindowedApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "CEventCallback.hpp" +#include "nbl/examples/examples.hpp" // example's own headers -#include "nbl/ui/ICursorControl.h" +#include "nbl/ui/ICursorControl.h" // TODO: why not in nabla.h ? #include "nbl/ext/ImGui/ImGui.h" #include "imgui/imgui_internal.h" -#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/30_ComputeShaderPathTracer/main.cpp b/30_ComputeShaderPathTracer/main.cpp index 26d673002..54bc64495 100644 --- a/30_ComputeShaderPathTracer/main.cpp +++ b/30_ComputeShaderPathTracer/main.cpp @@ -2,31 +2,37 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/this_example/common.hpp" -#include "nbl/asset/interchange/IImageAssetHandlerBase.h" + +#include "nbl/examples/examples.hpp" + #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" #include "nbl/builtin/hlsl/surface_transform.h" -using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; +#include "nbl/this_example/common.hpp" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; + +// TODO: share push constants struct PTPushConstant { matrix4SIMD invMVP; int sampleCount; int depth; }; -// TODO: Add a QueryPool for timestamping once its ready +// TODO: Add a QueryPool for timestamping once its ready (actually add IMGUI mspf plotter) // TODO: Do buffer creation using assConv -class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class ComputeShaderPathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; enum E_LIGHT_GEOMETRY : uint8_t @@ -313,12 +319,11 @@ class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication std::exit(-1); } - auto source = IAsset::castDown(assets[0]); + auto source = IAsset::castDown(assets[0]); // The down-cast should not fail! assert(source); - // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple - auto shader = m_device->createShader(source.get()); + auto shader = m_device->compileShader({ .source = source.get(), .stage = ESS_COMPUTE }); if (!shader) { m_logger->log("Shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); @@ -353,8 +358,8 @@ class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication params.shader.shader = ptShader.get(); params.shader.entryPoint = "main"; params.shader.entries = nullptr; - params.shader.requireFullSubgroups = true; - params.shader.requiredSubgroupSize = static_cast(5); + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTPipelines.data() + index)) { return logFail("Failed to create compute pipeline!\n"); } @@ -373,9 +378,9 @@ class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication if (!fragmentShader) return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); - const IGPUShader::SSpecInfo fragSpec = { + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), .entryPoint = "main", - .shader = fragmentShader.get() }; auto presentLayout = m_device->createPipelineLayout( @@ -533,6 +538,9 @@ class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication region.imageExtent = scrambleMapCPU->getCreationParameters().extent; scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + + // programmatically user-created IPreHashed need to have their hash computed (loaders do it while loading) + scrambleMapCPU->setContentHash(scrambleMapCPU->computeContentHash()); } std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; @@ -859,7 +867,7 @@ class ComputeShaderPathtracer final : public examples::SimpleWindowedApplication ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); ImGui::ListBox("Shader", &PTPipline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); - ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 3); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 6); ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); diff --git a/31_HLSLPathTracer/CMakeLists.txt b/31_HLSLPathTracer/CMakeLists.txt new file mode 100644 index 000000000..07b0fd396 --- /dev/null +++ b/31_HLSLPathTracer/CMakeLists.txt @@ -0,0 +1,37 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() + + diff --git a/42_FragmentShaderPathTracer/common.glsl b/31_HLSLPathTracer/app_resources/glsl/common.glsl similarity index 93% rename from 42_FragmentShaderPathTracer/common.glsl rename to 31_HLSLPathTracer/app_resources/glsl/common.glsl index 20f7a7359..6b6e96710 100644 --- a/42_FragmentShaderPathTracer/common.glsl +++ b/31_HLSLPathTracer/app_resources/glsl/common.glsl @@ -2,27 +2,27 @@ // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -// basic settings -#define MAX_DEPTH 3 -#define SAMPLES 128 - // firefly and variance reduction techniques //#define KILL_DIFFUSE_SPECULAR_PATHS //#define VISUALIZE_HIGH_VARIANCE -layout(set = 2, binding = 0) uniform sampler2D envMap; +// debug +//#define NEE_ONLY + +layout(set = 2, binding = 0) uniform sampler2D envMap; layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; layout(set = 2, binding = 2) uniform usampler2D scramblebuf; layout(set=0, binding=0, rgba16f) uniform image2D outImage; #ifndef _NBL_GLSL_WORKGROUP_SIZE_ -#define _NBL_GLSL_WORKGROUP_SIZE_ 16 -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=_NBL_GLSL_WORKGROUP_SIZE_, local_size_z=1) in; +#define _NBL_GLSL_WORKGROUP_SIZE_ 512 +layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; #endif ivec2 getCoordinates() { - return ivec2(gl_GlobalInvocationID.xy); + ivec2 imageSize = imageSize(outImage); + return ivec2(gl_GlobalInvocationID.x % imageSize.x, gl_GlobalInvocationID.x / imageSize.x); } vec2 getTexCoords() { @@ -35,15 +35,18 @@ vec2 getTexCoords() { #include #include #include -#include +#ifdef PERSISTENT_WORKGROUPS +#include +#endif #include -layout(set = 1, binding = 0, row_major, std140) uniform UBO +layout(push_constant, row_major) uniform constants { - nbl_glsl_SBasicViewParameters params; -} cameraData; - + mat4 invMVP; + int sampleCount; + int depth; +} PTPushConstant; #define INVALID_ID_16BIT 0xffffu struct Sphere @@ -51,7 +54,7 @@ struct Sphere vec3 position; float radius2; uint bsdfLightIDs; -}; +}; Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) { @@ -188,7 +191,7 @@ void Rectangle_getNormalBasis(in Rectangle rect, out mat3 basis, out vec2 extent basis[0] = rect.edge0/extents[0]; basis[1] = rect.edge1/extents[1]; basis[2] = normalize(cross(basis[0],basis[1])); -} +} // return intersection distance if found, nbl_glsl_FLT_NAN otherwise float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) @@ -222,7 +225,7 @@ vec3 Rectangle_getNormalTimesArea(in Rectangle rect) #define OP_BITS_OFFSET 0 #define OP_BITS_SIZE 2 struct BSDFNode -{ +{ uvec4 data[2]; }; @@ -386,13 +389,13 @@ vec2 SampleSphericalMap(vec3 v) { vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); uv *= nbl_glsl_RECIPROCAL_PI*0.5; - uv += 0.5; + uv += 0.5; return uv; } void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) { - vec3 finalContribution = _payload.throughput; + vec3 finalContribution = _payload.throughput; // #define USE_ENVMAP #ifdef USE_ENVMAP vec2 uv = SampleSphericalMap(_immutable.direction); @@ -415,7 +418,7 @@ nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfa { const float a = BSDFNode_getRoughness(bsdf); const mat2x3 ior = BSDFNode_getEta(bsdf); - + // fresnel stuff for dielectrics float orientedEta, rcpOrientedEta; const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); @@ -519,7 +522,7 @@ int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) intersectionT = closerIntersection ? t : intersectionT; objectID = closerIntersection ? i:objectID; - + // allowing early out results in a performance regression, WTF!? //if (anyHit && closerIntersection) //break; @@ -543,7 +546,7 @@ nbl_glsl_LightSample nbl_glsl_light_generate_and_remainder_and_pdf(out vec3 rema { // normally we'd pick from set of lights, using `xi.z` const Light light = lights[0]; - + vec3 L = nbl_glsl_light_generate_and_pdf(pdf,newRayMaxT,origin,interaction,isBSDF,xi,Light_getObjectID(light)); newRayMaxT *= getEndTolerance(depth); @@ -640,7 +643,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb float bsdfPdf; neeContrib *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,nee_sample,interaction,bsdf,monochromeEta,_cache)*throughput; const float otherGenOverChoice = bsdfPdf*rcpChoiceProb; -#if 0 +#ifndef NEE_ONLY const float otherGenOverLightAndChoice = otherGenOverChoice/lightPdf; neeContrib *= otherGenOverChoice/(1.f+otherGenOverLightAndChoice*otherGenOverLightAndChoice); // MIS weight #else @@ -650,7 +653,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb ray._payload.accumulation += neeContrib; }} } -#if 1 +#if NEE_ONLY return false; #endif // sample BSDF @@ -663,7 +666,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb // bsdfSampleL = bsdf_sample.L; } - + // additional threshold const float lumaThroughputThreshold = lumaContributionThreshold; if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) @@ -671,7 +674,7 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb ray._payload.throughput = throughput; ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; - + // trace new ray ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); ray._immutable.direction = bsdfSampleL; @@ -688,27 +691,45 @@ bool closestHitProgram(in uint depth, in uint _sample, inout Ray_t ray, inout nb void main() { const ivec2 imageExtents = imageSize(outImage); + +#ifdef PERSISTENT_WORKGROUPS + uint virtualThreadIndex; + for (uint virtualThreadBase = gl_WorkGroupID.x * _NBL_GLSL_WORKGROUP_SIZE_; virtualThreadBase < 1920*1080; virtualThreadBase += gl_NumWorkGroups.x * _NBL_GLSL_WORKGROUP_SIZE_) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + gl_LocalInvocationIndex.x; + const ivec2 coords = ivec2(nbl_glsl_morton_decode2d32b(virtualThreadIndex)); +#else const ivec2 coords = getCoordinates(); +#endif + vec2 texCoord = vec2(coords) / vec2(imageExtents); texCoord.y = 1.0 - texCoord.y; if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageExtents,coords)))) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else return; +#endif } - if (((MAX_DEPTH-1)>>MAX_DEPTH_LOG2)>0 || ((SAMPLES-1)>>MAX_SAMPLES_LOG2)>0) + if (((PTPushConstant.depth-1)>>MAX_DEPTH_LOG2)>0 || ((PTPushConstant.sampleCount-1)>>MAX_SAMPLES_LOG2)>0) { vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); imageStore(outImage, coords, pixelCol); +#ifdef PERSISTENT_WORKGROUPS + continue; +#else return; +#endif } - nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; + nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); - const mat4 invMVP = inverse(cameraData.params.MVP); - + const mat4 invMVP = PTPushConstant.invMVP; + vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); vec3 camPos; { @@ -719,8 +740,8 @@ void main() vec3 color = vec3(0.0); float meanLumaSquared = 0.0; - // TODO: if we collapse the nested for loop, then all GPUs will get `MAX_DEPTH` factor speedup, not just NV with separate PC - for (int i=0; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ + +template // TODO make type T Spectrum +struct Payload +{ + using this_t = Payload; + using scalar_type = T; + using vector3_type = vector; + + vector3_type accumulation; + scalar_type otherTechniqueHeuristic; + vector3_type throughput; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // bool hasDiffuse; + // #endif +}; + +enum ProceduralShapeType : uint16_t +{ + PST_NONE = 0, + PST_SPHERE, + PST_TRIANGLE, + PST_RECTANGLE +}; + +struct ObjectID +{ + static ObjectID create(uint32_t id, uint32_t mode, ProceduralShapeType shapeType) + { + ObjectID retval; + retval.id = id; + retval.mode = mode; + retval.shapeType = shapeType; + return retval; + } + + uint32_t id; + uint32_t mode; + ProceduralShapeType shapeType; +}; + +template +struct Ray +{ + using this_t = Ray; + using scalar_type = T; + using vector3_type = vector; + + // immutable + vector3_type origin; + vector3_type direction; + + // polygon method == PPM_APPROX_PROJECTED_SOLID_ANGLE + vector3_type normalAtOrigin; + bool wasBSDFAtOrigin; + + // mutable + scalar_type intersectionT; + ObjectID objectID; + + Payload payload; +}; + +template +struct Light +{ + using spectral_type = Spectrum; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, uint32_t objId, uint32_t mode, ProceduralShapeType shapeType) + { + Light retval; + retval.radiance = radiance; + retval.objectID = ObjectID::create(objId, mode, shapeType); + return retval; + } + + static Light create(NBL_CONST_REF_ARG(spectral_type) radiance, NBL_CONST_REF_ARG(ObjectID) objectID) + { + Light retval; + retval.radiance = radiance; + retval.objectID = objectID; + return retval; + } + + spectral_type radiance; + ObjectID objectID; +}; + +template +struct BxDFNode +{ + using spectral_type = Spectrum; + using params_type = bxdf::SBxDFCreationParams; + + NBL_CONSTEXPR_STATIC_INLINE uint32_t INVALID_ID = 0xffffu; + + // for diffuse bxdfs + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(float32_t2) A, NBL_CONST_REF_ARG(spectral_type) albedo) + { + BxDFNode retval; + retval.albedo = albedo; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, (float32_t2)1e-4); + retval.params.ior0 = (spectral_type)1.0; + retval.params.ior1 = (spectral_type)1.0; + return retval; + } + + // for conductor + dielectric + static BxDFNode create(uint32_t materialType, bool isAniso, NBL_CONST_REF_ARG(float32_t2) A, NBL_CONST_REF_ARG(spectral_type) ior0, NBL_CONST_REF_ARG(spectral_type) ior1) + { + BxDFNode retval; + retval.albedo = (spectral_type)1.0; + retval.materialType = materialType; + retval.params.is_aniso = isAniso; + retval.params.A = hlsl::max(A, (float32_t2)1e-4); + retval.params.ior0 = ior0; + retval.params.ior1 = ior1; + return retval; + } + + spectral_type albedo; + uint32_t materialType; + params_type params; +}; + +template +struct Tolerance +{ + NBL_CONSTEXPR_STATIC_INLINE float INTERSECTION_ERROR_BOUND_LOG2 = -8.0; + + static T __common(uint32_t depth) + { + float depthRcp = 1.0 / float(depth); + return INTERSECTION_ERROR_BOUND_LOG2; + } + + static T getStart(uint32_t depth) + { + return nbl::hlsl::exp2(__common(depth)); + } + + static T getEnd(uint32_t depth) + { + return 1.0 - nbl::hlsl::exp2(__common(depth) + 1.0); + } +}; + +enum PTPolygonMethod : uint16_t +{ + PPM_AREA, + PPM_SOLID_ANGLE, + PPM_APPROX_PROJECTED_SOLID_ANGLE +}; + +enum IntersectMode : uint32_t +{ + IM_RAY_QUERY, + IM_RAY_TRACING, + IM_PROCEDURAL +}; + +template +struct Shape; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) position, float32_t radius2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.position = position; + retval.radius2 = radius2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) position, float32_t radius, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(position, radius * radius, bsdfLightIDs); + } + + // return intersection distance if found, nan otherwise + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + float32_t3 relOrigin = origin - position; + float relOriginLen2 = hlsl::dot(relOrigin, relOrigin); + + float dirDotRelOrigin = hlsl::dot(direction, relOrigin); + float det = radius2 - relOriginLen2 + dirDotRelOrigin * dirDotRelOrigin; + + // do some speculative math here + float detsqrt = hlsl::sqrt(det); + return -dirDotRelOrigin + (relOriginLen2 > radius2 ? (-detsqrt) : detsqrt); + } + + float32_t3 getNormal(NBL_CONST_REF_ARG(float32_t3) hitPosition) + { + const float radiusRcp = hlsl::rsqrt(radius2); + return (hitPosition - position) * radiusRcp; + } + + float getSolidAngle(NBL_CONST_REF_ARG(float32_t3) origin) + { + float32_t3 dist = position - origin; + float cosThetaMax = hlsl::sqrt(1.0 - radius2 / hlsl::dot(dist, dist)); + return 2.0 * numbers::pi * (1.0 - cosThetaMax); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 5; + + float32_t3 position; + float32_t radius2; + uint32_t bsdfLightIDs; +}; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) vertex0, NBL_CONST_REF_ARG(float32_t3) vertex1, NBL_CONST_REF_ARG(float32_t3) vertex2, uint32_t bsdfLightIDs) + { + Shape retval; + retval.vertex0 = vertex0; + retval.vertex1 = vertex1; + retval.vertex2 = vertex2; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) vertex0, NBL_CONST_REF_ARG(float32_t3) vertex1, NBL_CONST_REF_ARG(float32_t3) vertex2, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(vertex0, vertex1, vertex2, bsdfLightIDs); + } + + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + const float32_t3 edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + + const float32_t3 h = hlsl::cross(direction, edges[1]); + const float a = hlsl::dot(edges[0], h); + + const float32_t3 relOrigin = origin - vertex0; + + const float u = hlsl::dot(relOrigin, h) / a; + + const float32_t3 q = hlsl::cross(relOrigin, edges[0]); + const float v = hlsl::dot(direction, q) / a; + + const float t = hlsl::dot(edges[1], q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && (u + v) <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + float32_t3 getNormalTimesArea() + { + const float32_t3 edges[2] = { vertex1 - vertex0, vertex2 - vertex0 }; + return hlsl::cross(edges[0], edges[1]) * 0.5f; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + float32_t3 vertex0; + float32_t3 vertex1; + float32_t3 vertex2; + uint32_t bsdfLightIDs; +}; + +template<> +struct Shape +{ + static Shape create(NBL_CONST_REF_ARG(float32_t3) offset, NBL_CONST_REF_ARG(float32_t3) edge0, NBL_CONST_REF_ARG(float32_t3) edge1, uint32_t bsdfLightIDs) + { + Shape retval; + retval.offset = offset; + retval.edge0 = edge0; + retval.edge1 = edge1; + retval.bsdfLightIDs = bsdfLightIDs; + return retval; + } + + static Shape create(NBL_CONST_REF_ARG(float32_t3) offset, NBL_CONST_REF_ARG(float32_t3) edge0, NBL_CONST_REF_ARG(float32_t3) edge1, uint32_t bsdfID, uint32_t lightID) + { + uint32_t bsdfLightIDs = glsl::bitfieldInsert(bsdfID, lightID, 16, 16); + return create(offset, edge0, edge1, bsdfLightIDs); + } + + float intersect(NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(float32_t3) direction) + { + const float32_t3 h = hlsl::cross(direction, edge1); + const float a = hlsl::dot(edge0, h); + + const float32_t3 relOrigin = origin - offset; + + const float u = hlsl::dot(relOrigin,h)/a; + + const float32_t3 q = hlsl::cross(relOrigin, edge0); + const float v = hlsl::dot(direction, q) / a; + + const float t = hlsl::dot(edge1, q) / a; + + const bool intersection = t > 0.f && u >= 0.f && v >= 0.f && u <= 1.f && v <= 1.f; + return intersection ? t : bit_cast(numeric_limits::infinity); + } + + float32_t3 getNormalTimesArea() + { + return hlsl::cross(edge0, edge1); + } + + void getNormalBasis(NBL_REF_ARG(float32_t3x3) basis, NBL_REF_ARG(float32_t2) extents) + { + extents = float32_t2(nbl::hlsl::length(edge0), nbl::hlsl::length(edge1)); + basis[0] = edge0 / extents[0]; + basis[1] = edge1 / extents[1]; + basis[2] = normalize(cross(basis[0],basis[1])); + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t ObjSize = 10; + + float32_t3 offset; + float32_t3 edge0; + float32_t3 edge1; + uint32_t bsdfLightIDs; +}; + +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl new file mode 100644 index 000000000..e59fdc2c3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/intersector.hlsl @@ -0,0 +1,88 @@ +#ifndef _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ +#define _NBL_HLSL_EXT_INTERSECTOR_INCLUDED_ + +#include "common.hlsl" +#include "scene.hlsl" +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace Intersector +{ + +template +struct Comprehensive +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + + using light_type = Light; + using bxdfnode_type = BxdfNode; + using scene_type = Scene; + + static ObjectID traceRay(NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + ObjectID objectID; + objectID.id = -1; + + // prodedural shapes + for (int i = 0; i < scene.sphereCount; i++) + { + float t = scene.spheres[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_SPHERE; + } + } + for (int i = 0; i < scene.triangleCount; i++) + { + float t = scene.triangles[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + for (int i = 0; i < scene.rectangleCount; i++) + { + float t = scene.rectangles[i].intersect(ray.origin, ray.direction); + + bool closerIntersection = t > 0.0 && t < ray.intersectionT; + + if (closerIntersection) + { + ray.intersectionT = t; + objectID.id = i; + objectID.mode = IM_PROCEDURAL; + objectID.shapeType = PST_TRIANGLE; + } + } + + // TODO: trace AS + + return objectID; + } +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl new file mode 100644 index 000000000..4e2fdc5a0 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/material_system.hlsl @@ -0,0 +1,205 @@ +#ifndef _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ +#define _NBL_HLSL_EXT_MATERIAL_SYSTEM_INCLUDED_ + +#include +#include +#include + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace MaterialSystem +{ + +enum MaterialType : uint32_t // enum class? +{ + DIFFUSE, + CONDUCTOR, + DIELECTRIC +}; + +template +struct MaterialParams +{ + using this_t = MaterialParams; + using sample_type = typename DiffuseBxDF::sample_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + + using diffuse_params_type = typename DiffuseBxDF::params_isotropic_t; + using conductor_params_type = typename ConductorBxDF::params_isotropic_t; + using dielectric_params_type = typename DielectricBxDF::params_isotropic_t; + + // we're only doing isotropic for this example + static this_t create(sample_type _sample, isotropic_interaction_type _interaction, isocache_type _cache, bxdf::BxDFClampMode _clamp) + { + this_t retval; + retval._Sample = _sample; + retval.interaction = _interaction; + retval.cache = _cache; + retval.clampMode = _clamp; + return retval; + } + + diffuse_params_type getDiffuseParams() + { + return diffuse_params_type::create(_Sample, interaction, clampMode); + } + + conductor_params_type getConductorParams() + { + return conductor_params_type::create(_Sample, interaction, cache, clampMode); + } + + dielectric_params_type getDielectricParams() + { + return dielectric_params_type::create(_Sample, interaction, cache, clampMode); + } + + sample_type _Sample; + isotropic_interaction_type interaction; + isocache_type cache; + bxdf::BxDFClampMode clampMode; +}; + +template // NOTE: these bxdfs should match the ones in Scene BxDFNode +struct System +{ + using this_t = System; + using scalar_type = typename DiffuseBxDF::scalar_type; // types should be same across all 3 bxdfs + using vector2_type = vector; + using vector3_type = vector; + using measure_type = typename DiffuseBxDF::spectral_type; + using sample_type = typename DiffuseBxDF::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using quotient_pdf_type = typename DiffuseBxDF::quotient_pdf_type; + using anisotropic_interaction_type = typename DiffuseBxDF::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename ConductorBxDF::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using params_t = MaterialParams; + using create_params_t = bxdf::SBxDFCreationParams; + + using diffuse_op_type = DiffuseBxDF; + using conductor_op_type = ConductorBxDF; + using dielectric_op_type = DielectricBxDF; + + static this_t create(NBL_CONST_REF_ARG(create_params_t) diffuseParams, NBL_CONST_REF_ARG(create_params_t) conductorParams, NBL_CONST_REF_ARG(create_params_t) dielectricParams) + { + this_t retval; + retval.diffuseBxDF = diffuse_op_type::create(diffuseParams); + retval.conductorBxDF = conductor_op_type::create(conductorParams); + retval.dielectricBxDF = dielectric_op_type::create(dielectricParams); + return retval; + } + + measure_type eval(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(params_t) params) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return (measure_type)diffuseBxDF.eval(params.getDiffuseParams()); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.eval(params.getConductorParams()); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.eval(params.getDielectricParams()); + } + break; + default: + return (measure_type)0.0; + } + } + + sample_type generate(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(anisotropic_interaction_type) interaction, NBL_CONST_REF_ARG(vector3_type) u, NBL_REF_ARG(anisocache_type) _cache) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return diffuseBxDF.generate(interaction, u.xy); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.generate(interaction, u.xy, _cache); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.generate(interaction, u, _cache); + } + break; + default: + { + ray_dir_info_type L; + L.direction = (vector3_type)0; + return sample_type::create(L, 0, (vector3_type)0); + } + } + + ray_dir_info_type L; + L.direction = (vector3_type)0; + return sample_type::create(L, 0, (vector3_type)0); + } + + quotient_pdf_type quotient_and_pdf(uint32_t material, NBL_CONST_REF_ARG(create_params_t) cparams, NBL_CONST_REF_ARG(params_t) params) + { + const float minimumProjVectorLen = 0.00000001; + if (params.interaction.getNdotV() > minimumProjVectorLen && params._Sample.getNdotL() > minimumProjVectorLen) + { + switch(material) + { + case MaterialType::DIFFUSE: + { + diffuseBxDF.init(cparams); + return diffuseBxDF.quotient_and_pdf(params.getDiffuseParams()); + } + break; + case MaterialType::CONDUCTOR: + { + conductorBxDF.init(cparams); + return conductorBxDF.quotient_and_pdf(params.getConductorParams()); + } + break; + case MaterialType::DIELECTRIC: + { + dielectricBxDF.init(cparams); + return dielectricBxDF.quotient_and_pdf(params.getDielectricParams()); + } + break; + default: + return quotient_pdf_type::create((measure_type)0.0, 0.0); + } + } + return quotient_pdf_type::create((measure_type)0.0, 0.0); + } + + DiffuseBxDF diffuseBxDF; + ConductorBxDF conductorBxDF; + DielectricBxDF dielectricBxDF; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl new file mode 100644 index 000000000..ac74b1abf --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/next_event_estimator.hlsl @@ -0,0 +1,446 @@ +#ifndef _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ +#define _NBL_HLSL_EXT_NEXT_EVENT_ESTIMATOR_INCLUDED_ + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace NextEventEstimator +{ + +template +struct ShapeSampling; + +template +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) sphere) + { + ShapeSampling retval; + retval.sphere = sphere; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + return 1.0 / sphere.getSolidAngle(ray.origin); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float32_t3 Z = sphere.position - origin; + const float distanceSQ = hlsl::dot(Z,Z); + const float cosThetaMax2 = 1.0 - sphere.radius2 / distanceSQ; + if (cosThetaMax2 > 0.0) + { + const float rcpDistance = 1.0 / hlsl::sqrt(distanceSQ); + Z *= rcpDistance; + + const float cosThetaMax = hlsl::sqrt(cosThetaMax2); + const float cosTheta = hlsl::mix(1.0, cosThetaMax, xi.x); + + float32_t3 L = Z * cosTheta; + + const float cosTheta2 = cosTheta * cosTheta; + const float sinTheta = hlsl::sqrt(1.0 - cosTheta2); + float sinPhi, cosPhi; + math::sincos(2.0 * numbers::pi * xi.y - numbers::pi, sinPhi, cosPhi); + float32_t3 X, Y; + math::frisvad(Z, X, Y); + + L += (X * cosPhi + Y * sinPhi) * sinTheta; + + newRayMaxT = (cosTheta - hlsl::sqrt(cosTheta2 - cosThetaMax2)) / rcpDistance; + pdf = 1.0 / (2.0 * numbers::pi * (1.0 - cosThetaMax)); + return L; + } + pdf = 0.0; + return float32_t3(0.0,0.0,0.0); + } + + Shape sphere; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float dist = ray.intersectionT; + const float32_t3 L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(tri.getNormalTimesArea(), L)); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 edge0 = tri.vertex1 - tri.vertex0; + const float32_t3 edge1 = tri.vertex2 - tri.vertex0; + const float sqrtU = hlsl::sqrt(xi.x); + float32_t3 pnt = tri.vertex0 + edge0 * (1.0 - sqrtU) + edge1 * sqrtU * xi.y; + float32_t3 L = pnt - origin; + + const float distanceSq = hlsl::dot(L,L); + const float rcpDistance = 1.0 / hlsl::sqrt(distanceSq); + L *= rcpDistance; + + pdf = distanceSq / hlsl::abs(hlsl::dot(hlsl::cross(edge0, edge1) * 0.5f, L)); + newRayMaxT = 1.0 / rcpDistance; + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + const float rcpProb = st.solidAngleOfTriangle(); + // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 + return rcpProb > numeric_limits::min ? (1.0 / rcpProb) : numeric_limits::max; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::SphericalTriangle sst = sampling::SphericalTriangle::create(st); + + const float32_t3 L = sst.generate(rcpPdf, xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const float32_t3 N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) tri) + { + ShapeSampling retval; + retval.tri = tri; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float32_t3 L = ray.direction; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, ray.origin); + sampling::ProjectedSphericalTriangle pst = sampling::ProjectedSphericalTriangle::create(st); + const float pdf = pst.pdf(ray.normalAtOrigin, ray.wasBSDFAtOrigin, L); + // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small + return pdf < numeric_limits::max ? pdf : numeric_limits::max; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + float rcpPdf; + shapes::SphericalTriangle st = shapes::SphericalTriangle::create(tri.vertex0, tri.vertex1, tri.vertex2, origin); + sampling::ProjectedSphericalTriangle sst = sampling::ProjectedSphericalTriangle::create(st); + + const float32_t3 L = sst.generate(rcpPdf, interaction.isotropic.N, isBSDF, xi.xy); + + pdf = rcpPdf > numeric_limits::min ? (1.0 / rcpPdf) : numeric_limits::max; + + const float32_t3 N = tri.getNormalTimesArea(); + newRayMaxT = hlsl::dot(N, tri.vertex0 - origin) / hlsl::dot(N, L); + return L; + } + + Shape tri; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + const float dist = ray.intersectionT; + const float32_t3 L = ray.direction; + return dist * dist / hlsl::abs(hlsl::dot(rect.getNormalTimesArea(), L)); + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 N = rect.getNormalTimesArea(); + const float32_t3 origin2origin = rect.offset - origin; + + float32_t3 L = origin2origin + rect.edge0 * xi.x + rect.edge1 * xi.y; + const float distSq = hlsl::dot(L, L); + const float rcpDist = 1.0 / hlsl::sqrt(distSq); + L *= rcpDist; + pdf = distSq / hlsl::abs(hlsl::dot(N, L)); + newRayMaxT = 1.0 / rcpDist; + return L; + } + + Shape rect; +}; + +template<> +struct ShapeSampling +{ + static ShapeSampling create(NBL_CONST_REF_ARG(Shape) rect) + { + ShapeSampling retval; + retval.rect = rect; + return retval; + } + + template + float deferredPdf(NBL_CONST_REF_ARG(Ray) ray) + { + float pdf; + float32_t3x3 rectNormalBasis; + float32_t2 rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(ray.origin, rect.offset, rectNormalBasis); + float solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + if (solidAngle > numeric_limits::min) + pdf = 1.f / solidAngle; + else + pdf = bit_cast(numeric_limits::infinity); + return pdf; + } + + template + float32_t3 generate_and_pdf(NBL_REF_ARG(float32_t) pdf, NBL_REF_ARG(float32_t) newRayMaxT, NBL_CONST_REF_ARG(float32_t3) origin, NBL_CONST_REF_ARG(Aniso) interaction, bool isBSDF, NBL_CONST_REF_ARG(float32_t3) xi) + { + const float32_t3 N = rect.getNormalTimesArea(); + const float32_t3 origin2origin = rect.offset - origin; + + float32_t3x3 rectNormalBasis; + float32_t2 rectExtents; + rect.getNormalBasis(rectNormalBasis, rectExtents); + shapes::SphericalRectangle sphR0 = shapes::SphericalRectangle::create(origin, rect.offset, rectNormalBasis); + float32_t3 L = (float32_t3)0.0; + float solidAngle = sphR0.solidAngleOfRectangle(rectExtents); + + sampling::SphericalRectangle ssph = sampling::SphericalRectangle::create(sphR0); + float32_t2 sphUv = ssph.generate(rectExtents, xi.xy, solidAngle); + if (solidAngle > numeric_limits::min) + { + float32_t3 sph_sample = sphUv[0] * rect.edge0 + sphUv[1] * rect.edge1 + rect.offset; + L = sph_sample - origin; + L = hlsl::mix(nbl::hlsl::normalize(L), (float32_t3)0.0, hlsl::abs(L) > (float32_t3)numeric_limits::min); // TODO? sometimes L is vec3(0), find cause + pdf = 1.f / solidAngle; + } + else + pdf = bit_cast(numeric_limits::infinity); + + newRayMaxT = hlsl::dot(N, origin2origin) / hlsl::dot(N, L); + return L; + } + + Shape rect; +}; + +// PPM_APPROX_PROJECTED_SOLID_ANGLE not available for PST_TRIANGLE + + +template +struct Estimator; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // affected by https://github.com/microsoft/DirectXShaderCompiler/issues/7007 + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape sphere = scene.spheres[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(sphere); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape sphere = scene.spheres[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(sphere); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape tri = scene.triangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(tri); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape tri = scene.triangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(tri); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +template +struct Estimator +{ + using scalar_type = typename Ray::scalar_type; + using vector3_type = vector; + using ray_type = Ray; + using scene_type = Scene; + using light_type = typename Scene::light_type; + using spectral_type = typename light_type::spectral_type; + using interaction_type = Aniso; + using quotient_pdf_type = sampling::quotient_and_pdf; + using sample_type = LightSample; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + + // NBL_CONSTEXPR_STATIC_INLINE PTPolygonMethod PolygonMethod = PPM; + enum : uint16_t { PolygonMethod = PPM }; + + static spectral_type deferredEvalAndPdf(NBL_REF_ARG(scalar_type) pdf, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(ray_type) ray) + { + pdf = 1.0 / scene.lightCount; + const light_type light = scene.lights[lightID]; + const Shape rect = scene.rectangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(rect); + pdf *= sampling.template deferredPdf(ray); + + return light.radiance; + } + + static sample_type generate_and_quotient_and_pdf(NBL_REF_ARG(quotient_pdf_type) quotient_pdf, NBL_REF_ARG(scalar_type) newRayMaxT, NBL_CONST_REF_ARG(scene_type) scene, uint32_t lightID, NBL_CONST_REF_ARG(vector3_type) origin, NBL_CONST_REF_ARG(interaction_type) interaction, bool isBSDF, NBL_CONST_REF_ARG(vector3_type) xi, uint32_t depth) + { + const light_type light = scene.lights[lightID]; + const Shape rect = scene.rectangles[light.objectID.id]; + const ShapeSampling sampling = ShapeSampling::create(rect); + + scalar_type pdf; + const vector3_type sampleL = sampling.template generate_and_pdf(pdf, newRayMaxT, origin, interaction, isBSDF, xi); + const vector3_type V = interaction.isotropic.V.getDirection(); + const scalar_type VdotL = nbl::hlsl::dot(V, sampleL); + ray_dir_info_type rayL; + rayL.direction = sampleL; + sample_type L = sample_type::create(rayL,VdotL,interaction.T,interaction.B,interaction.isotropic.N); + + newRayMaxT *= Tolerance::getEnd(depth); + pdf *= 1.0 / scalar_type(scene.lightCount); + spectral_type quo = light.radiance / pdf; + quotient_pdf = quotient_pdf_type::create(quo, pdf); + + return L; + } +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl new file mode 100644 index 000000000..add1eb8a9 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/pathtracer.hlsl @@ -0,0 +1,320 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACER_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACER_INCLUDED_ + +#include +#include +#include +#include + +#include "rand_gen.hlsl" +#include "ray_gen.hlsl" +#include "intersector.hlsl" +#include "material_system.hlsl" +#include "next_event_estimator.hlsl" +#include "scene.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace PathTracer +{ + +template +struct PathTracerCreationParams +{ + // rng gen + uint32_t2 rngState; + + // ray gen + vector pixOffsetParam; + vector camPos; + vector NDC; + matrix invMVP; + + // mat + BxDFCreation diffuseParams; + BxDFCreation conductorParams; + BxDFCreation dielectricParams; +}; + +template +struct Unidirectional +{ + using this_t = Unidirectional; + using randgen_type = RandGen; + using raygen_type = RayGen; + using intersector_type = Intersector; + using material_system_type = MaterialSystem; + using nee_type = NextEventEstimator; + + using scalar_type = typename MaterialSystem::scalar_type; + using vector3_type = vector; + using measure_type = typename MaterialSystem::measure_type; + using sample_type = typename NextEventEstimator::sample_type; + using ray_dir_info_type = typename sample_type::ray_dir_info_type; + using ray_type = typename RayGen::ray_type; + using light_type = Light; + using bxdfnode_type = BxDFNode; + using anisotropic_interaction_type = typename MaterialSystem::anisotropic_interaction_type; + using isotropic_interaction_type = typename anisotropic_interaction_type::isotropic_interaction_type; + using anisocache_type = typename MaterialSystem::anisocache_type; + using isocache_type = typename anisocache_type::isocache_type; + using quotient_pdf_type = typename NextEventEstimator::quotient_pdf_type; + using params_type = typename MaterialSystem::params_t; + using create_params_type = typename MaterialSystem::create_params_t; + using scene_type = Scene; + + using diffuse_op_type = typename MaterialSystem::diffuse_op_type; + using conductor_op_type = typename MaterialSystem::conductor_op_type; + using dielectric_op_type = typename MaterialSystem::dielectric_op_type; + + static this_t create(NBL_CONST_REF_ARG(PathTracerCreationParams) params) + { + this_t retval; + retval.randGen = randgen_type::create(params.rngState); + retval.rayGen = raygen_type::create(params.pixOffsetParam, params.camPos, params.NDC, params.invMVP); + retval.materialSystem = material_system_type::create(params.diffuseParams, params.conductorParams, params.dielectricParams); + return retval; + } + + vector3_type rand3d(uint32_t protoDimension, uint32_t _sample, uint32_t i) + { + uint32_t address = glsl::bitfieldInsert(protoDimension, _sample, MAX_DEPTH_LOG2, MAX_SAMPLES_LOG2); + uint32_t3 seqVal = sampleSequence[address + i].xyz; + seqVal ^= randGen(); + return vector3_type(seqVal) * bit_cast(0x2f800004u); + } + + scalar_type getLuma(NBL_CONST_REF_ARG(vector3_type) col) + { + return hlsl::dot(hlsl::transpose(colorspace::scRGBtoXYZ)[1], col); + } + + // TODO: probably will only work with procedural shapes, do the other ones + bool closestHitProgram(uint32_t depth, uint32_t _sample, NBL_REF_ARG(ray_type) ray, NBL_CONST_REF_ARG(scene_type) scene) + { + const ObjectID objectID = ray.objectID; + const vector3_type intersection = ray.origin + ray.direction * ray.intersectionT; + + uint32_t bsdfLightIDs; + anisotropic_interaction_type interaction; + isotropic_interaction_type iso_interaction; + uint32_t mode = objectID.mode; + switch (mode) + { + // TODO + case IM_RAY_QUERY: + case IM_RAY_TRACING: + break; + case IM_PROCEDURAL: + { + bsdfLightIDs = scene.getBsdfLightIDs(objectID); + vector3_type N = scene.getNormal(objectID, intersection); + N = nbl::hlsl::normalize(N); + ray_dir_info_type V; + V.direction = -ray.direction; + isotropic_interaction_type iso_interaction = isotropic_interaction_type::create(V, N); + interaction = anisotropic_interaction_type::create(iso_interaction); + } + break; + default: + break; + } + + vector3_type throughput = ray.payload.throughput; + + // emissive + const uint32_t lightID = glsl::bitfieldExtract(bsdfLightIDs, 16, 16); + if (lightID != light_type::INVALID_ID) + { + float _pdf; + ray.payload.accumulation += nee.deferredEvalAndPdf(_pdf, scene, lightID, ray) * throughput / (1.0 + _pdf * _pdf * ray.payload.otherTechniqueHeuristic); + } + + const uint32_t bsdfID = glsl::bitfieldExtract(bsdfLightIDs, 0, 16); + if (bsdfID == bxdfnode_type::INVALID_ID) + return false; + + bxdfnode_type bxdf = scene.bxdfs[bsdfID]; + + // TODO: ifdef kill diffuse specular paths + + const bool isBSDF = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIFFUSE) ? bxdf::traits::type == bxdf::BT_BSDF : + (bxdf.materialType == ext::MaterialSystem::MaterialType::CONDUCTOR) ? bxdf::traits::type == bxdf::BT_BSDF : + bxdf::traits::type == bxdf::BT_BSDF; + + vector3_type eps0 = rand3d(depth, _sample, 0u); + vector3_type eps1 = rand3d(depth, _sample, 1u); + + // thresholds + const scalar_type bxdfPdfThreshold = 0.0001; + const scalar_type lumaContributionThreshold = getLuma(colorspace::eotf::sRGB((vector3_type)1.0 / 255.0)); // OETF smallest perceptible value + const vector3_type throughputCIE_Y = hlsl::transpose(colorspace::sRGBtoXYZ)[1] * throughput; // TODO: this only works if spectral_type is dim 3 + const measure_type eta = bxdf.params.ior0 / bxdf.params.ior1; // assume it's real, not imaginary? + const scalar_type monochromeEta = hlsl::dot(throughputCIE_Y, eta) / (throughputCIE_Y.r + throughputCIE_Y.g + throughputCIE_Y.b); // TODO: imaginary eta? + + // sample lights + const scalar_type neeProbability = 1.0; // BSDFNode_getNEEProb(bsdf); + scalar_type rcpChoiceProb; + if (!math::partitionRandVariable(neeProbability, eps0.z, rcpChoiceProb) && depth < 2u) + { + uint32_t randLightID = uint32_t(float32_t(randGen().x) / numeric_limits::max) * scene.lightCount; + quotient_pdf_type neeContrib_pdf; + scalar_type t; + sample_type nee_sample = nee.generate_and_quotient_and_pdf( + neeContrib_pdf, t, + scene, randLightID, intersection, interaction, + isBSDF, eps0, depth + ); + + // We don't allow non watertight transmitters in this renderer + bool validPath = nee_sample.getNdotL() > numeric_limits::min; + // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself + bxdf::fresnel::OrientedEtas orientedEta = bxdf::fresnel::OrientedEtas::create(interaction.getNdotV(), monochromeEta); + anisocache_type _cache = anisocache_type::template create(interaction, nee_sample, orientedEta); + validPath = validPath && _cache.getNdotH() >= 0.0; + bxdf.params.eta = monochromeEta; + + if (neeContrib_pdf.pdf < numeric_limits::max) + { + if (nbl::hlsl::any(isnan(nee_sample.getL().getDirection()))) + ray.payload.accumulation += vector3_type(1000.f, 0.f, 0.f); + else if (nbl::hlsl::all((vector3_type)69.f == nee_sample.getL().getDirection())) + ray.payload.accumulation += vector3_type(0.f, 1000.f, 0.f); + else if (validPath) + { + bxdf::BxDFClampMode _clamp; + _clamp = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIELECTRIC) ? bxdf::BxDFClampMode::BCM_ABS : bxdf::BxDFClampMode::BCM_MAX; + // example only uses isotropic bxdfs + params_type params = params_type::create(nee_sample, interaction.isotropic, _cache.iso_cache, _clamp); + + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, params); + neeContrib_pdf.quotient *= bxdf.albedo * throughput * bsdf_quotient_pdf.quotient; + const scalar_type otherGenOverChoice = bsdf_quotient_pdf.pdf * rcpChoiceProb; + const scalar_type otherGenOverLightAndChoice = otherGenOverChoice / bsdf_quotient_pdf.pdf; + neeContrib_pdf.quotient *= otherGenOverChoice / (1.f + otherGenOverLightAndChoice * otherGenOverLightAndChoice); // balance heuristic + + // TODO: ifdef NEE only + // neeContrib_pdf.quotient *= otherGenOverChoice; + + ray_type nee_ray; + nee_ray.origin = intersection + nee_sample.getL().getDirection() * t * Tolerance::getStart(depth); + nee_ray.direction = nee_sample.getL().getDirection(); + nee_ray.intersectionT = t; + if (bsdf_quotient_pdf.pdf < numeric_limits::max && getLuma(neeContrib_pdf.quotient) > lumaContributionThreshold && intersector_type::traceRay(nee_ray, scene).id == -1) + ray.payload.accumulation += neeContrib_pdf.quotient; + } + } + } + + // return false; // NEE only + + // sample BSDF + scalar_type bxdfPdf; + vector3_type bxdfSample; + { + anisocache_type _cache; + sample_type bsdf_sample = materialSystem.generate(bxdf.materialType, bxdf.params, interaction, eps1, _cache); + + bxdf::BxDFClampMode _clamp; + _clamp = (bxdf.materialType == ext::MaterialSystem::MaterialType::DIELECTRIC) ? bxdf::BxDFClampMode::BCM_ABS : bxdf::BxDFClampMode::BCM_MAX; + // example only uses isotropic bxdfs + params_type params = params_type::create(bsdf_sample, interaction.isotropic, _cache.iso_cache, _clamp); + + // the value of the bsdf divided by the probability of the sample being generated + quotient_pdf_type bsdf_quotient_pdf = materialSystem.quotient_and_pdf(bxdf.materialType, bxdf.params, params); + throughput *= bxdf.albedo * bsdf_quotient_pdf.quotient; + bxdfPdf = bsdf_quotient_pdf.pdf; + bxdfSample = bsdf_sample.getL().getDirection(); + } + + // additional threshold + const float lumaThroughputThreshold = lumaContributionThreshold; + if (bxdfPdf > bxdfPdfThreshold && getLuma(throughput) > lumaThroughputThreshold) + { + ray.payload.throughput = throughput; + scalar_type otherTechniqueHeuristic = neeProbability / bxdfPdf; // numerically stable, don't touch + ray.payload.otherTechniqueHeuristic = otherTechniqueHeuristic * otherTechniqueHeuristic; + + // trace new ray + ray.origin = intersection + bxdfSample * (1.0/*kSceneSize*/) * Tolerance::getStart(depth); + ray.direction = bxdfSample; + if ((PTPolygonMethod)nee_type::PolygonMethod == PPM_APPROX_PROJECTED_SOLID_ANGLE) + { + ray.normalAtOrigin = interaction.getN(); + ray.wasBSDFAtOrigin = isBSDF; + } + return true; + } + + return false; + } + + void missProgram(NBL_REF_ARG(ray_type) ray) + { + vector3_type finalContribution = ray.payload.throughput; + // #ifdef USE_ENVMAP + // vec2 uv = SampleSphericalMap(_immutable.direction); + // finalContribution *= textureLod(envMap, uv, 0.0).rgb; + // #else + const vector3_type kConstantEnvLightRadiance = vector3_type(0.15, 0.21, 0.3); // TODO: match spectral_type + finalContribution *= kConstantEnvLightRadiance; + ray.payload.accumulation += finalContribution; + // #endif + } + + // Li + measure_type getMeasure(uint32_t numSamples, uint32_t depth, NBL_CONST_REF_ARG(scene_type) scene) + { + measure_type Li = (measure_type)0.0; + scalar_type meanLumaSq = 0.0; + for (uint32_t i = 0; i < numSamples; i++) + { + vector3_type uvw = rand3d(0u, i, randGen.rng()); // TODO: take from scramblebuf? + ray_type ray = rayGen.generate(uvw); + + // bounces + bool hit = true; + bool rayAlive = true; + for (int d = 1; (d <= depth) && hit && rayAlive; d += 2) + { + ray.intersectionT = numeric_limits::max; + ray.objectID = intersector_type::traceRay(ray, scene); + + hit = ray.objectID.id != -1; + if (hit) + rayAlive = closestHitProgram(1, i, ray, scene); + } + if (!hit) + missProgram(ray); + + measure_type accumulation = ray.payload.accumulation; + scalar_type rcpSampleSize = 1.0 / (i + 1); + Li += (accumulation - Li) * rcpSampleSize; + + // TODO: visualize high variance + + // TODO: russian roulette early exit? + } + + return Li; + } + + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_DEPTH_LOG2 = 4u; + NBL_CONSTEXPR_STATIC_INLINE uint32_t MAX_SAMPLES_LOG2 = 10u; + + randgen_type randGen; + raygen_type rayGen; + material_system_type materialSystem; + nee_type nee; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl new file mode 100644 index 000000000..22695657c --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); +} \ No newline at end of file diff --git a/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl new file mode 100644 index 000000000..4f5302fea --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/rand_gen.hlsl @@ -0,0 +1,38 @@ +#ifndef _NBL_HLSL_EXT_RANDGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RANDGEN_INCLUDED_ + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace RandGen +{ + +template +struct Uniform3D +{ + using rng_type = RNG; + + static Uniform3D create(uint32_t2 seed) + { + Uniform3D retval; + retval.rng = rng_type::construct(seed); + return retval; + } + + uint32_t3 operator()() + { + return uint32_t3(rng(), rng(), rng()); + } + + rng_type rng; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl new file mode 100644 index 000000000..0759b1cd3 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/ray_gen.hlsl @@ -0,0 +1,82 @@ +#ifndef _NBL_HLSL_EXT_RAYGEN_INCLUDED_ +#define _NBL_HLSL_EXT_RAYGEN_INCLUDED_ + +#include + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ +namespace RayGen +{ + +template +struct Basic +{ + using this_t = Basic; + using ray_type = Ray; + using scalar_type = typename Ray::scalar_type; + using vector3_type = typename Ray::vector3_type; + + using vector2_type = vector; + using vector4_type = vector; + using matrix4x4_type = matrix; + + static this_t create(NBL_CONST_REF_ARG(vector2_type) pixOffsetParam, NBL_CONST_REF_ARG(vector3_type) camPos, NBL_CONST_REF_ARG(vector4_type) NDC, NBL_CONST_REF_ARG(matrix4x4_type) invMVP) + { + this_t retval; + retval.pixOffsetParam = pixOffsetParam; + retval.camPos = camPos; + retval.NDC = NDC; + retval.invMVP = invMVP; + return retval; + } + + ray_type generate(NBL_CONST_REF_ARG(vector3_type) randVec) + { + ray_type ray; + ray.origin = camPos; + + vector4_type tmp = NDC; + // apply stochastic reconstruction filter + const float gaussianFilterCutoff = 2.5; + const float truncation = nbl::hlsl::exp(-0.5 * gaussianFilterCutoff * gaussianFilterCutoff); + vector2_type remappedRand = randVec.xy; + remappedRand.x *= 1.0 - truncation; + remappedRand.x += truncation; + tmp.xy += pixOffsetParam * nbl::hlsl::boxMullerTransform(remappedRand, 1.5); + // for depth of field we could do another stochastic point-pick + tmp = nbl::hlsl::mul(invMVP, tmp); + ray.direction = nbl::hlsl::normalize(tmp.xyz / tmp.w - camPos); + + // #if POLYGON_METHOD==2 + // ray._immutable.normalAtOrigin = vec3(0.0,0.0,0.0); + // ray._immutable.wasBSDFAtOrigin = false; + // #endif + + ray.payload.accumulation = (vector3_type)0.0; + ray.payload.otherTechniqueHeuristic = 0.0; // needed for direct eye-light paths + ray.payload.throughput = (vector3_type)1.0; + // #ifdef KILL_DIFFUSE_SPECULAR_PATHS + // ray._payload.hasDiffuse = false; + // #endif + + return ray; + } + + vector2_type pixOffsetParam; + vector3_type camPos; + vector4_type NDC; + matrix4x4_type invMVP; +}; + +} +} +} +} + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl new file mode 100644 index 000000000..a40eb3dd0 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render.comp.hlsl @@ -0,0 +1,226 @@ +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/random/pcg.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" +#ifdef PERSISTENT_WORKGROUPS +#include "nbl/builtin/hlsl/math/morton.hlsl" +#endif + +#include "nbl/builtin/hlsl/bxdf/reflection.hlsl" +#include "nbl/builtin/hlsl/bxdf/transmission.hlsl" + +// add these defines (one at a time) using -D argument to dxc +// #define SPHERE_LIGHT +// #define TRIANGLE_LIGHT +// #define RECTANGLE_LIGHT + +#ifdef SPHERE_LIGHT +#define SPHERE_COUNT 9 +#define TRIANGLE_COUNT 0 +#define RECTANGLE_COUNT 0 +#endif + +#ifdef TRIANGLE_LIGHT +#define TRIANGLE_COUNT 1 +#define SPHERE_COUNT 8 +#define RECTANGLE_COUNT 0 +#endif + +#ifdef RECTANGLE_LIGHT +#define RECTANGLE_COUNT 1 +#define SPHERE_COUNT 8 +#define TRIANGLE_COUNT 0 +#endif + +#define LIGHT_COUNT 1 +#define BXDF_COUNT 7 + +#include "render_common.hlsl" +#include "pathtracer.hlsl" + +using namespace nbl; +using namespace hlsl; + +NBL_CONSTEXPR uint32_t WorkgroupSize = 512; +NBL_CONSTEXPR uint32_t MAX_DEPTH_LOG2 = 4; +NBL_CONSTEXPR uint32_t MAX_SAMPLES_LOG2 = 10; + +#ifdef SPHERE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_SPHERE; +#endif +#ifdef TRIANGLE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_TRIANGLE; +#endif +#ifdef RECTANGLE_LIGHT +NBL_CONSTEXPR ext::ProceduralShapeType LIGHT_TYPE = ext::PST_RECTANGLE; +#endif + +NBL_CONSTEXPR ext::PTPolygonMethod POLYGON_METHOD = ext::PPM_SOLID_ANGLE; + +int32_t2 getCoordinates() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + return int32_t2(glsl::gl_GlobalInvocationID().x % width, glsl::gl_GlobalInvocationID().x / width); +} + +float32_t2 getTexCoords() +{ + uint32_t width, height; + outImage.GetDimensions(width, height); + int32_t2 iCoords = getCoordinates(); + return float32_t2(float(iCoords.x) / width, 1.0 - float(iCoords.y) / height); +} + +using ray_dir_info_t = bxdf::ray_dir_info::SBasic; +using iso_interaction = bxdf::surface_interactions::SIsotropic; +using aniso_interaction = bxdf::surface_interactions::SAnisotropic; +using sample_t = bxdf::SLightSample; +using iso_cache = bxdf::SIsotropicMicrofacetCache; +using aniso_cache = bxdf::SAnisotropicMicrofacetCache; +using quotient_pdf_t = sampling::quotient_and_pdf; +using spectral_t = vector; +using create_params_t = bxdf::SBxDFCreationParams; + +using diffuse_bxdf_type = bxdf::reflection::SOrenNayarBxDF; +using conductor_bxdf_type = bxdf::reflection::SGGXBxDF; +using dielectric_bxdf_type = bxdf::transmission::SGGXDielectricBxDF; + +using ray_type = ext::Ray; +using light_type = ext::Light; +using bxdfnode_type = ext::BxDFNode; +using scene_type = ext::Scene; +using randgen_type = ext::RandGen::Uniform3D; +using raygen_type = ext::RayGen::Basic; +using intersector_type = ext::Intersector::Comprehensive; +using material_system_type = ext::MaterialSystem::System; +using nee_type = ext::NextEventEstimator::Estimator; +using pathtracer_type = ext::PathTracer::Unidirectional; + +static const ext::Shape spheres[SPHERE_COUNT] = { + ext::Shape::create(float3(0.0, -100.5, -1.0), 100.0, 0u, light_type::INVALID_ID), + ext::Shape::create(float3(2.0, 0.0, -1.0), 0.5, 1u, light_type::INVALID_ID), + ext::Shape::create(float3(0.0, 0.0, -1.0), 0.5, 2u, light_type::INVALID_ID), + ext::Shape::create(float3(-2.0, 0.0, -1.0), 0.5, 3u, light_type::INVALID_ID), + ext::Shape::create(float3(2.0, 0.0, 1.0), 0.5, 4u, light_type::INVALID_ID), + ext::Shape::create(float3(0.0, 0.0, 1.0), 0.5, 4u, light_type::INVALID_ID), + ext::Shape::create(float3(-2.0, 0.0, 1.0), 0.5, 5u, light_type::INVALID_ID), + ext::Shape::create(float3(0.5, 1.0, 0.5), 0.5, 6u, light_type::INVALID_ID) +#ifdef SPHERE_LIGHT + ,ext::Shape::create(float3(-1.5, 1.5, 0.0), 0.3, bxdfnode_type::INVALID_ID, 0u) +#endif +}; + +#ifdef TRIANGLE_LIGHT +static const ext::Shape triangles[TRIANGLE_COUNT] = { + ext::Shape::create(float3(-1.8,0.35,0.3) * 10.0, float3(-1.2,0.35,0.0) * 10.0, float3(-1.5,0.8,-0.3) * 10.0, bxdfnode_type::INVALID_ID, 0u) +}; +#else +static const ext::Shape triangles[1]; +#endif + +#ifdef RECTANGLE_LIGHT +static const ext::Shape rectangles[RECTANGLE_COUNT] = { + ext::Shape::create(float3(-3.8,0.35,1.3), normalize(float3(2,0,-1))*7.0, normalize(float3(2,-5,4))*0.1, bxdfnode_type::INVALID_ID, 0u) +}; +#else +static const ext::Shape rectangles[1]; +#endif + +static const light_type lights[LIGHT_COUNT] = { + light_type::create(spectral_t(30.0,25.0,15.0), +#ifdef SPHERE_LIGHT + 8u, +#else + 0u, +#endif + ext::IntersectMode::IM_PROCEDURAL, LIGHT_TYPE) +}; + +static const bxdfnode_type bxdfs[BXDF_COUNT] = { + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.8,0.8)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.8,0.4,0.4)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIFFUSE, false, float2(0,0), spectral_t(0.4,0.8,0.4)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.02,1.3), spectral_t(1.0,1.0,2.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0,0), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::CONDUCTOR, false, float2(0.15,0.15), spectral_t(1.02,1.3,1.02), spectral_t(1.0,2.0,1.0)), + bxdfnode_type::create(ext::MaterialSystem::MaterialType::DIELECTRIC, false, float2(0.0625,0.0625), spectral_t(1,1,1), spectral_t(0.71,0.69,0.67)) +}; + +static const ext::Scene scene = ext::Scene::create( + spheres, triangles, rectangles, + SPHERE_COUNT, TRIANGLE_COUNT, RECTANGLE_COUNT, + lights, LIGHT_COUNT, bxdfs, BXDF_COUNT +); + +[numthreads(WorkgroupSize, 1, 1)] +void main(uint32_t3 threadID : SV_DispatchThreadID) +{ + uint32_t width, height; + outImage.GetDimensions(width, height); +#ifdef PERSISTENT_WORKGROUPS + uint32_t virtualThreadIndex; + [loop] + for (uint32_t virtualThreadBase = glsl::gl_WorkGroupID().x * WorkgroupSize; virtualThreadBase < 1920*1080; virtualThreadBase += glsl::gl_NumWorkGroups().x * WorkgroupSize) // not sure why 1280*720 doesn't cover draw surface + { + virtualThreadIndex = virtualThreadBase + glsl::gl_LocalInvocationIndex().x; + const int32_t2 coords = (int32_t2)math::Morton::decode2d(virtualThreadIndex); +#else + const int32_t2 coords = getCoordinates(); +#endif + float32_t2 texCoord = float32_t2(coords) / float32_t2(width, height); + texCoord.y = 1.0 - texCoord.y; + + if (false == (all((int32_t2)0 < coords)) && all(int32_t2(width, height) < coords)) { +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + if (((pc.depth - 1) >> MAX_DEPTH_LOG2) > 0 || ((pc.sampleCount - 1) >> MAX_SAMPLES_LOG2) > 0) + { + float32_t4 pixelCol = float32_t4(1.0,0.0,0.0,1.0); + outImage[coords] = pixelCol; +#ifdef PERSISTENT_WORKGROUPS + continue; +#else + return; +#endif + } + + int flatIdx = glsl::gl_GlobalInvocationID().y * glsl::gl_NumWorkGroups().x * WorkgroupSize + glsl::gl_GlobalInvocationID().x; + + // set up path tracer + ext::PathTracer::PathTracerCreationParams ptCreateParams; + ptCreateParams.rngState = scramblebuf[coords].rg; + + uint2 scrambleDim; + scramblebuf.GetDimensions(scrambleDim.x, scrambleDim.y); + ptCreateParams.pixOffsetParam = (float2)1.0 / float2(scrambleDim); + + float4 NDC = float4(texCoord * float2(2.0, -2.0) + float2(-1.0, 1.0), 0.0, 1.0); + { + float4 tmp = mul(pc.invMVP, NDC); + ptCreateParams.camPos = tmp.xyz / tmp.w; + NDC.z = 1.0; + } + + ptCreateParams.NDC = NDC; + ptCreateParams.invMVP = pc.invMVP; + + ptCreateParams.diffuseParams = bxdfs[0].params; + ptCreateParams.conductorParams = bxdfs[3].params; + ptCreateParams.dielectricParams = bxdfs[6].params; + + pathtracer_type pathtracer = pathtracer_type::create(ptCreateParams); + + float32_t3 color = pathtracer.getMeasure(pc.sampleCount, pc.depth, scene); + float32_t4 pixCol = float32_t4(color, 1.0); + outImage[coords] = pixCol; + +#ifdef PERSISTENT_WORKGROUPS + } +#endif +} diff --git a/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl new file mode 100644 index 000000000..5e5cf89da --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/render_common.hlsl @@ -0,0 +1,23 @@ +#ifndef _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ +#define _NBL_HLSL_PATHTRACER_RENDER_COMMON_INCLUDED_ + +struct SPushConstants +{ + float32_t4x4 invMVP; + int sampleCount; + int depth; +}; + +[[vk::push_constant]] SPushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(0, 2)]] Texture2D envMap; // unused +[[vk::combinedImageSampler]][[vk::binding(0, 2)]] SamplerState envSampler; + +[[vk::binding(1, 2)]] Buffer sampleSequence; + +[[vk::combinedImageSampler]][[vk::binding(2, 2)]] Texture2D scramblebuf; // unused +[[vk::combinedImageSampler]][[vk::binding(2, 2)]] SamplerState scrambleSampler; + +[[vk::image_format("rgba16f")]][[vk::binding(0, 0)]] RWTexture2D outImage; + +#endif diff --git a/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl new file mode 100644 index 000000000..40fb01057 --- /dev/null +++ b/31_HLSLPathTracer/app_resources/hlsl/scene.hlsl @@ -0,0 +1,111 @@ +#ifndef _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ +#define _NBL_HLSL_EXT_PATHTRACING_SCENE_INCLUDED_ + +#include "common.hlsl" + +namespace nbl +{ +namespace hlsl +{ +namespace ext +{ + +template +struct Scene +{ + using light_type = Light; + using bxdfnode_type = BxdfNode; + using this_t = Scene; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxSphereCount = 25; + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxTriangleCount = 12; + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxRectangleCount = 12; + +#if SPHERE_COUNT < 1 +#define SCENE_SPHERE_COUNT 1 +#else +#define SCENE_SPHERE_COUNT SPHERE_COUNT +#endif + +#if TRIANGLE_COUNT < 1 +#define SCENE_TRIANGLE_COUNT 1 +#else +#define SCENE_TRIANGLE_COUNT TRIANGLE_COUNT +#endif + +#if RECTANGLE_COUNT < 1 +#define SCENE_RECTANGLE_COUNT 1 +#else +#define SCENE_RECTANGLE_COUNT RECTANGLE_COUNT +#endif + + Shape spheres[SCENE_SPHERE_COUNT]; + Shape triangles[SCENE_TRIANGLE_COUNT]; + Shape rectangles[SCENE_RECTANGLE_COUNT]; + + uint32_t sphereCount; + uint32_t triangleCount; + uint32_t rectangleCount; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxLightCount = 4; + + light_type lights[LIGHT_COUNT]; + uint32_t lightCount; + + // NBL_CONSTEXPR_STATIC_INLINE uint32_t maxBxdfCount = 16; + + bxdfnode_type bxdfs[BXDF_COUNT]; + uint32_t bxdfCount; + + // AS ases; + + static this_t create( + NBL_CONST_REF_ARG(Shape) spheres[SCENE_SPHERE_COUNT], + NBL_CONST_REF_ARG(Shape) triangles[SCENE_TRIANGLE_COUNT], + NBL_CONST_REF_ARG(Shape) rectangles[SCENE_RECTANGLE_COUNT], + uint32_t sphereCount, uint32_t triangleCount, uint32_t rectangleCount, + NBL_CONST_REF_ARG(light_type) lights[LIGHT_COUNT], uint32_t lightCount, + NBL_CONST_REF_ARG(bxdfnode_type) bxdfs[BXDF_COUNT], uint32_t bxdfCount) + { + this_t retval; + retval.spheres = spheres; + retval.triangles = triangles; + retval.rectangles = rectangles; + retval.sphereCount = sphereCount; + retval.triangleCount = triangleCount; + retval.rectangleCount = rectangleCount; + + retval.lights = lights; + retval.lightCount = lightCount; + + retval.bxdfs = bxdfs; + retval.bxdfCount = bxdfCount; + return retval; + } + +#undef SCENE_SPHERE_COUNT +#undef SCENE_TRIANGLE_COUNT +#undef SCENE_RECTANGLE_COUNT + + // TODO: get these to work with AS types as well + uint32_t getBsdfLightIDs(NBL_CONST_REF_ARG(ObjectID) objectID) + { + return (objectID.shapeType == PST_SPHERE) ? spheres[objectID.id].bsdfLightIDs : + (objectID.shapeType == PST_TRIANGLE) ? triangles[objectID.id].bsdfLightIDs : + (objectID.shapeType == PST_RECTANGLE) ? rectangles[objectID.id].bsdfLightIDs : -1; + } + + float32_t3 getNormal(NBL_CONST_REF_ARG(ObjectID) objectID, NBL_CONST_REF_ARG(float32_t3) intersection) + { + return (objectID.shapeType == PST_SPHERE) ? spheres[objectID.id].getNormal(intersection) : + (objectID.shapeType == PST_TRIANGLE) ? triangles[objectID.id].getNormalTimesArea() : + (objectID.shapeType == PST_RECTANGLE) ? rectangles[objectID.id].getNormalTimesArea() : + (float32_t3)0.0; + } +}; + +} +} +} + +#endif diff --git a/60_ClusteredRendering/config.json.template b/31_HLSLPathTracer/config.json.template similarity index 99% rename from 60_ClusteredRendering/config.json.template rename to 31_HLSLPathTracer/config.json.template index f961745c1..24adf54fb 100644 --- a/60_ClusteredRendering/config.json.template +++ b/31_HLSLPathTracer/config.json.template @@ -25,4 +25,4 @@ "outputs": [] } ] -} \ No newline at end of file +} diff --git a/31_HLSLPathTracer/include/nbl/this_example/common.hpp b/31_HLSLPathTracer/include/nbl/this_example/common.hpp new file mode 100644 index 000000000..db051bb3e --- /dev/null +++ b/31_HLSLPathTracer/include/nbl/this_example/common.hpp @@ -0,0 +1,17 @@ +#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ + +#include + +// common api +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/examples.hpp" +#include "nbl/examples/cameras/CCamera.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +// example's own headers +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file diff --git a/31_HLSLPathTracer/main.cpp b/31_HLSLPathTracer/main.cpp new file mode 100644 index 000000000..2e139af8d --- /dev/null +++ b/31_HLSLPathTracer/main.cpp @@ -0,0 +1,1425 @@ +// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#include "nbl/this_example/common.hpp" +#include "nbl/asset/interchange/IImageAssetHandlerBase.h" +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/surface_transform.h" + +using namespace nbl; +using namespace core; +using namespace hlsl; +using namespace system; +using namespace asset; +using namespace ui; +using namespace video; +using namespace nbl::examples; + +struct PTPushConstant { + matrix4SIMD invMVP; + int sampleCount; + int depth; +}; + +// TODO: Add a QueryPool for timestamping once its ready +// TODO: Do buffer creation using assConv +class HLSLComputePathtracer final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + enum E_LIGHT_GEOMETRY : uint8_t + { + ELG_SPHERE, + ELG_TRIANGLE, + ELG_RECTANGLE, + ELG_COUNT + }; + + enum E_RENDER_MODE : uint8_t + { + ERM_GLSL, + ERM_HLSL, + // ERM_CHECKERED, + ERM_COUNT + }; + + constexpr static inline uint32_t2 WindowDimensions = { 1280, 720 }; + constexpr static inline uint32_t MaxFramesInFlight = 5; + constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + constexpr static inline uint32_t DefaultWorkGroupSize = 512u; + constexpr static inline uint32_t MaxDescriptorCount = 256u; + constexpr static inline uint32_t MaxDepthLog2 = 4u; // 5 + constexpr static inline uint32_t MaxSamplesLog2 = 10u; // 18 + constexpr static inline uint32_t MaxBufferDimensions = 3u << MaxDepthLog2; + constexpr static inline uint32_t MaxBufferSamples = 1u << MaxSamplesLog2; + constexpr static inline uint8_t MaxUITextureCount = 1u; + static inline std::string DefaultImagePathsFile = "envmap/envmap_0.exr"; + static inline std::string OwenSamplerFilePath = "owen_sampler_buffer.bin"; + static inline std::array PTGLSLShaderPaths = { "app_resources/glsl/litBySphere.comp", "app_resources/glsl/litByTriangle.comp", "app_resources/glsl/litByRectangle.comp" }; + static inline std::string PTHLSLShaderPath = "app_resources/hlsl/render.comp.hlsl"; + static inline std::array PTHLSLShaderVariants = { "SPHERE_LIGHT", "TRIANGLE_LIGHT", "RECTANGLE_LIGHT" }; + static inline std::string PresentShaderPath = "app_resources/hlsl/present.frag.hlsl"; + + const char* shaderNames[E_LIGHT_GEOMETRY::ELG_COUNT] = { + "ELG_SPHERE", + "ELG_TRIANGLE", + "ELG_RECTANGLE" + }; + + const char* shaderTypes[E_RENDER_MODE::ERM_COUNT] = { + "ERM_GLSL", + "ERM_HLSL" + }; + + public: + inline HLSLComputePathtracer(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} + + inline bool isComputeOnly() const override { return false; } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WindowDimensions.x; + params.height = WindowDimensions.y; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "ComputeShaderPathtracer"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + // Init systems + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + // Remember to call the base class initialization! + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + if (!asset_base_t::onAppInitialized(std::move(system))) + return false; + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + + if (!m_semaphore) + return logFail("Failed to create semaphore!"); + } + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + // image upload utils + { + m_scratchSemaphore = m_device->createSemaphore(0); + if (!m_scratchSemaphore) + return logFail("Could not create Scratch Semaphore"); + m_scratchSemaphore->setObjectDebugName("Scratch Semaphore"); + // we don't want to overcomplicate the example with multi-queue + m_intendedSubmit.queue = getGraphicsQueue(); + // wait for nothing before upload + m_intendedSubmit.waitSemaphores = {}; + m_intendedSubmit.waitSemaphores = {}; + // fill later + m_intendedSubmit.scratchCommandBuffers = {}; + m_intendedSubmit.scratchSemaphore = { + .semaphore = m_scratchSemaphore.get(), + .value = 0, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + }; + } + + // Create command pool and buffers + { + auto gQueue = getGraphicsQueue(); + m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!m_cmdPool) + return logFail("Couldn't create Command Pool!"); + + if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data(), MaxFramesInFlight })) + return logFail("Couldn't create Command Buffer!"); + } + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + // Create descriptors and pipeline for the pathtracer + { + auto convertDSLayoutCPU2GPU = [&](smart_refctd_ptr cpuLayout) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuLayout.get(),1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuLayout = reservation.getGPUObjects().front().value; + if (!gpuLayout) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSetLayout handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuLayout; + }; + auto convertDSCPU2GPU = [&](smart_refctd_ptr cpuDS) { + auto converter = CAssetConverter::create({ .device = m_device.get() }); + CAssetConverter::SInputs inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = { &cpuDS.get(), 1 }; + // don't need to assert that we don't need to provide patches since layouts are not patchable + //assert(true); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuDS = reservation.getGPUObjects().front().value; + if (!gpuDS) { + m_logger->log("Failed to convert %s into an IGPUDescriptorSet handle", ILogger::ELL_ERROR); + std::exit(-1); + } + + return gpuDS; + }; + + std::array descriptorSet0Bindings = {}; + std::array descriptorSet3Bindings = {}; + std::array presentDescriptorSetBindings; + + descriptorSet0Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[1] = { + .binding = 1u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + descriptorSet3Bindings[2] = { + .binding = 2u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1u, + .immutableSamplers = nullptr + }; + presentDescriptorSetBindings[0] = { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + }; + + auto cpuDescriptorSetLayout0 = make_smart_refctd_ptr(descriptorSet0Bindings); + auto cpuDescriptorSetLayout2 = make_smart_refctd_ptr(descriptorSet3Bindings); + + auto gpuDescriptorSetLayout0 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout0); + auto gpuDescriptorSetLayout2 = convertDSLayoutCPU2GPU(cpuDescriptorSetLayout2); + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(presentDescriptorSetBindings); + + auto cpuDescriptorSet0 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout0)); + auto cpuDescriptorSet2 = make_smart_refctd_ptr(std::move(cpuDescriptorSetLayout2)); + + m_descriptorSet0 = convertDSCPU2GPU(cpuDescriptorSet0); + m_descriptorSet2 = convertDSCPU2GPU(cpuDescriptorSet2); + + smart_refctd_ptr presentDSPool; + { + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + presentDSPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + } + m_presentDescriptorSet = presentDSPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + // Create Shaders + auto loadAndCompileGLSLShader = [&](const std::string& pathToShader, bool persistentWorkGroups = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CGLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; // should be compute + options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const IShaderCompiler::SMacroDefinition persistentDefine = { "PERSISTENT_WORKGROUPS", "1" }; + if (persistentWorkGroups) + options.preprocessorOptions.extraDefines = { &persistentDefine, &persistentDefine + 1 }; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + // this time we skip the use of the asset converter since the ICPUShader->IGPUShader path is quick and simple + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("GLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + auto loadAndCompileHLSLShader = [&](const std::string& pathToShader, const std::string& defineMacro = "", bool persistentWorkGroups = false) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.workingDirectory = localInputCWD; + auto assetBundle = m_assetMgr->getAsset(pathToShader, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + { + m_logger->log("Could not load shader: ", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + auto source = smart_refctd_ptr_static_cast(assets[0]); + // The down-cast should not fail! + assert(source); + + auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + CHLSLCompiler::SOptions options = {}; + options.stage = IShader::E_SHADER_STAGE::ESS_COMPUTE; + options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.spirvOptimizer = nullptr; +#ifndef _NBL_DEBUG + ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; + auto opt = make_smart_refctd_ptr(std::span(&optPasses, 1)); + options.spirvOptimizer = opt.get(); +#endif + options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_LINE_BIT; + options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); + options.preprocessorOptions.logger = m_logger.get(); + options.preprocessorOptions.includeFinder = compiler->getDefaultIncludeFinder(); + + const IShaderCompiler::SMacroDefinition defines[2] = { {defineMacro, ""}, { "PERSISTENT_WORKGROUPS", "1" } }; + if (!defineMacro.empty() && persistentWorkGroups) + options.preprocessorOptions.extraDefines = { defines, defines + 2 }; + else if (!defineMacro.empty() && !persistentWorkGroups) + options.preprocessorOptions.extraDefines = { defines, defines + 1 }; + + source = compiler->compileToSPIRV((const char*)source->getContent()->getPointer(), options); + + auto shader = m_device->compileShader({ source.get(), nullptr, nullptr, nullptr }); + if (!shader) + { + m_logger->log("HLSL shader creationed failed: %s!", ILogger::ELL_ERROR, pathToShader); + std::exit(-1); + } + + return shader; + }; + + // Create compute pipelines + { + for (int index = 0; index < E_LIGHT_GEOMETRY::ELG_COUNT; index++) { + const nbl::asset::SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, + .offset = 0, + .size = sizeof(PTPushConstant) + }; + auto ptPipelineLayout = m_device->createPipelineLayout( + { &pcRange, 1 }, + core::smart_refctd_ptr(gpuDescriptorSetLayout0), + nullptr, + core::smart_refctd_ptr(gpuDescriptorSetLayout2), + nullptr + ); + if (!ptPipelineLayout) { + return logFail("Failed to create Pathtracing pipeline layout"); + } + + { + auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index]); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPipelines.data() + index)) + return logFail("Failed to create GLSL compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index]); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPipelines.data() + index)) + return logFail("Failed to create HLSL compute pipeline!\n"); + } + + // persistent wg pipelines + { + auto ptShader = loadAndCompileGLSLShader(PTGLSLShaderPaths[index], true); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTGLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create GLSL PersistentWG compute pipeline!\n"); + } + { + auto ptShader = loadAndCompileHLSLShader(PTHLSLShaderPath, PTHLSLShaderVariants[index], true); + + IGPUComputePipeline::SCreationParams params = {}; + params.layout = ptPipelineLayout.get(); + params.shader.shader = ptShader.get(); + params.shader.entryPoint = "main"; + params.shader.entries = nullptr; + params.cached.requireFullSubgroups = true; + params.shader.requiredSubgroupSize = static_cast(5); + if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, m_PTHLSLPersistentWGPipelines.data() + index)) + return logFail("Failed to create HLSL PersistentWG compute pipeline!\n"); + } + } + } + + // Create graphics pipeline + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + // Load Fragment Shader + auto fragmentShader = loadAndCompileHLSLShader(PresentShaderPath); + if (!fragmentShader) + return logFail("Failed to Load and Compile Fragment Shader: lumaMeterShader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main" + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + + } + } + + // load CPUImages and convert to GPUImages + smart_refctd_ptr envMap, scrambleMap; + { + auto convertImgCPU2GPU = [&](std::span cpuImgs) { + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + std::array commandBufferInfo = { cmdbuf }; + core::smart_refctd_ptr imgFillSemaphore = m_device->createSemaphore(0); + imgFillSemaphore->setObjectDebugName("Image Fill Semaphore"); + + auto converter = CAssetConverter::create({ .device = m_device.get() }); + // We don't want to generate mip-maps for these images, to ensure that we must override the default callbacks. + struct SInputs final : CAssetConverter::SInputs + { + // we also need to override this to have concurrent sharing + inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUImage* buffer, const CAssetConverter::patch_t& patch) const override + { + if (familyIndices.size() > 1) + return familyIndices; + return {}; + } + + inline uint8_t getMipLevelCount(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return image->getCreationParameters().mipLevels; + } + inline uint16_t needToRecomputeMips(const size_t groupCopyID, const ICPUImage* image, const CAssetConverter::patch_t& patch) const override + { + return 0b0u; + } + + std::vector familyIndices; + } inputs = {}; + inputs.readCache = converter.get(); + inputs.logger = m_logger.get(); + { + const core::set uniqueFamilyIndices = { queue->getFamilyIndex(), queue->getFamilyIndex() }; + inputs.familyIndices = { uniqueFamilyIndices.begin(),uniqueFamilyIndices.end() }; + } + // scratch command buffers for asset converter transfer commands + SIntendedSubmitInfo transfer = { + .queue = queue, + .waitSemaphores = {}, + .prevCommandBuffers = {}, + .scratchCommandBuffers = commandBufferInfo, + .scratchSemaphore = { + .semaphore = imgFillSemaphore.get(), + .value = 0, + // because of layout transitions + .stageMask = PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + } + }; + // as per the `SIntendedSubmitInfo` one commandbuffer must be begun + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // Normally we'd have to inherit and override the `getFinalOwnerQueueFamily` callback to ensure that the + // compute queue becomes the owner of the buffers and images post-transfer, but in this example we use concurrent sharing + CAssetConverter::SConvertParams params = {}; + params.transfer = &transfer; + params.utilities = m_utils.get(); + + std::get>(inputs.assets) = cpuImgs; + // assert that we don't need to provide patches + assert(cpuImgs[0]->getImageUsageFlags().hasFlags(ICPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT)); + auto reservation = converter->reserve(inputs); + // the `.value` is just a funny way to make the `smart_refctd_ptr` copyable + auto gpuImgs = reservation.getGPUObjects(); + for (auto& gpuImg : gpuImgs) { + if (!gpuImg) { + m_logger->log("Failed to convert %s into an IGPUImage handle", ILogger::ELL_ERROR, DefaultImagePathsFile); + std::exit(-1); + } + } + + // and launch the conversions + m_api->startCapture(); + auto result = reservation.convert(params); + m_api->endCapture(); + if (!result.blocking() && result.copy() != IQueue::RESULT::SUCCESS) { + m_logger->log("Failed to record or submit conversions", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMap = gpuImgs[0].value; + scrambleMap = gpuImgs[1].value; + }; + + smart_refctd_ptr envMapCPU, scrambleMapCPU; + { + IAssetLoader::SAssetLoadParams lp; + lp.workingDirectory = this->sharedInputCWD; + SAssetBundle bundle = m_assetMgr->getAsset(DefaultImagePathsFile, lp); + if (bundle.getContents().empty()) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + + envMapCPU = IAsset::castDown(bundle.getContents()[0]); + if (!envMapCPU) { + m_logger->log("Couldn't load an asset.", ILogger::ELL_ERROR); + std::exit(-1); + } + }; + { + asset::ICPUImage::SCreationParams info; + info.format = asset::E_FORMAT::EF_R32G32_UINT; + info.type = asset::ICPUImage::ET_2D; + auto extent = envMapCPU->getCreationParameters().extent; + info.extent.width = extent.width; + info.extent.height = extent.height; + info.extent.depth = 1u; + info.mipLevels = 1u; + info.arrayLayers = 1u; + info.samples = asset::ICPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT; + info.flags = static_cast(0u); + info.usage = asset::IImage::EUF_TRANSFER_SRC_BIT | asset::IImage::EUF_SAMPLED_BIT; + + scrambleMapCPU = ICPUImage::create(std::move(info)); + const uint32_t texelFormatByteSize = getTexelOrBlockBytesize(scrambleMapCPU->getCreationParameters().format); + const uint32_t texelBufferSize = scrambleMapCPU->getImageDataSizeInBytes(); + auto texelBuffer = ICPUBuffer::create({ texelBufferSize }); + + core::RandomSampler rng(0xbadc0ffeu); + auto out = reinterpret_cast(texelBuffer->getPointer()); + for (auto index = 0u; index < texelBufferSize / 4; index++) { + out[index] = rng.nextSample(); + } + + auto regions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = regions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = 1u; + region.bufferOffset = 0u; + region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(extent.width, texelFormatByteSize); + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = scrambleMapCPU->getCreationParameters().extent; + + scrambleMapCPU->setBufferAndRegions(std::move(texelBuffer), regions); + } + + std::array cpuImgs = { envMapCPU.get(), scrambleMapCPU.get()}; + convertImgCPU2GPU(cpuImgs); + } + + // create views for textures + { + auto createHDRIImage = [this](const asset::E_FORMAT colorFormat, const uint32_t width, const uint32_t height) -> smart_refctd_ptr { + IGPUImage::SCreationParams imgInfo; + imgInfo.format = colorFormat; + imgInfo.type = IGPUImage::ET_2D; + imgInfo.extent.width = width; + imgInfo.extent.height = height; + imgInfo.extent.depth = 1u; + imgInfo.mipLevels = 1u; + imgInfo.arrayLayers = 1u; + imgInfo.samples = IGPUImage::ESCF_1_BIT; + imgInfo.flags = static_cast(0u); + imgInfo.usage = asset::IImage::EUF_STORAGE_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_SAMPLED_BIT; + + auto image = m_device->createImage(std::move(imgInfo)); + auto imageMemReqs = image->getMemoryReqs(); + imageMemReqs.memoryTypeBits &= m_device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); + m_device->allocate(imageMemReqs, image.get()); + + return image; + }; + auto createHDRIImageView = [this](smart_refctd_ptr img) -> smart_refctd_ptr + { + auto format = img->getCreationParameters().format; + IGPUImageView::SCreationParams imgViewInfo; + imgViewInfo.image = std::move(img); + imgViewInfo.format = format; + imgViewInfo.viewType = IGPUImageView::ET_2D; + imgViewInfo.flags = static_cast(0u); + imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + imgViewInfo.subresourceRange.baseArrayLayer = 0u; + imgViewInfo.subresourceRange.baseMipLevel = 0u; + imgViewInfo.subresourceRange.layerCount = 1u; + imgViewInfo.subresourceRange.levelCount = 1u; + + return m_device->createImageView(std::move(imgViewInfo)); + }; + + auto params = envMap->getCreationParameters(); + auto extent = params.extent; + envMap->setObjectDebugName("Env Map"); + m_envMapView = createHDRIImageView(envMap); + m_envMapView->setObjectDebugName("Env Map View"); + scrambleMap->setObjectDebugName("Scramble Map"); + m_scrambleView = createHDRIImageView(scrambleMap); + m_scrambleView->setObjectDebugName("Scramble Map View"); + auto outImg = createHDRIImage(asset::E_FORMAT::EF_R16G16B16A16_SFLOAT, WindowDimensions.x, WindowDimensions.y); + outImg->setObjectDebugName("Output Image"); + m_outImgView = createHDRIImageView(outImg); + m_outImgView->setObjectDebugName("Output Image View"); + } + + // create sequence buffer view + { + // TODO: do this better use asset manager to get the ICPUBuffer from `.bin` + auto createBufferFromCacheFile = [this]( + system::path filename, + size_t bufferSize, + void *data, + smart_refctd_ptr& buffer + ) -> std::pair, bool> + { + ISystem::future_t> owenSamplerFileFuture; + ISystem::future_t owenSamplerFileReadFuture; + size_t owenSamplerFileBytesRead; + + m_system->createFile(owenSamplerFileFuture, localOutputCWD / filename, IFile::ECF_READ); + smart_refctd_ptr owenSamplerFile; + + if (owenSamplerFileFuture.wait()) + { + owenSamplerFileFuture.acquire().move_into(owenSamplerFile); + if (!owenSamplerFile) + return { nullptr, false }; + + owenSamplerFile->read(owenSamplerFileReadFuture, data, 0, bufferSize); + if (owenSamplerFileReadFuture.wait()) + { + owenSamplerFileReadFuture.acquire().move_into(owenSamplerFileBytesRead); + + if (owenSamplerFileBytesRead < bufferSize) + { + buffer = asset::ICPUBuffer::create({ sizeof(uint32_t) * bufferSize }); + return { owenSamplerFile, false }; + } + + buffer = asset::ICPUBuffer::create({ { sizeof(uint32_t) * bufferSize }, data }); + } + } + + return { owenSamplerFile, true }; + }; + auto writeBufferIntoCacheFile = [this](smart_refctd_ptr file, size_t bufferSize, void* data) + { + ISystem::future_t owenSamplerFileWriteFuture; + size_t owenSamplerFileBytesWritten; + + file->write(owenSamplerFileWriteFuture, data, 0, bufferSize); + if (owenSamplerFileWriteFuture.wait()) + owenSamplerFileWriteFuture.acquire().move_into(owenSamplerFileBytesWritten); + }; + + constexpr size_t bufferSize = MaxBufferDimensions * MaxBufferSamples; + std::array data = {}; + smart_refctd_ptr sampleSeq; + + auto cacheBufferResult = createBufferFromCacheFile(sharedOutputCWD/OwenSamplerFilePath, bufferSize, data.data(), sampleSeq); + if (!cacheBufferResult.second) + { + core::OwenSampler sampler(MaxBufferDimensions, 0xdeadbeefu); + + ICPUBuffer::SCreationParams params = {}; + params.size = MaxBufferDimensions*MaxBufferSamples*sizeof(uint32_t); + sampleSeq = ICPUBuffer::create(std::move(params)); + + auto out = reinterpret_cast(sampleSeq->getPointer()); + for (auto dim = 0u; dim < MaxBufferDimensions; dim++) + for (uint32_t i = 0; i < MaxBufferSamples; i++) + { + out[i * MaxBufferDimensions + dim] = sampler.sample(dim, i); + } + if (cacheBufferResult.first) + writeBufferIntoCacheFile(cacheBufferResult.first, bufferSize, out); + } + + IGPUBuffer::SCreationParams params = {}; + params.usage = asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; + params.size = sampleSeq->getSize(); + + // we don't want to overcomplicate the example with multi-queue + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[0].get(); + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { cmdbuf }; + m_intendedSubmit.scratchCommandBuffers = { &cmdbufInfo, 1 }; + + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + m_api->startCapture(); + auto bufferFuture = m_utils->createFilledDeviceLocalBufferOnDedMem( + m_intendedSubmit, + std::move(params), + sampleSeq->getPointer() + ); + m_api->endCapture(); + bufferFuture.wait(); + auto buffer = bufferFuture.get(); + + m_sequenceBufferView = m_device->createBufferView({ 0u, buffer->get()->getSize(), *buffer }, asset::E_FORMAT::EF_R32G32B32_UINT); + m_sequenceBufferView->setObjectDebugName("Sequence Buffer"); + } + + // Update Descriptors + { + ISampler::SParams samplerParams0 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_FLOAT_OPAQUE_BLACK, + ISampler::ETF_LINEAR, + ISampler::ETF_LINEAR, + ISampler::ESMM_LINEAR, + 0u, + false, + ECO_ALWAYS + }; + auto sampler0 = m_device->createSampler(samplerParams0); + ISampler::SParams samplerParams1 = { + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_EDGE, + ISampler::ETBC_INT_OPAQUE_BLACK, + ISampler::ETF_NEAREST, + ISampler::ETF_NEAREST, + ISampler::ESMM_NEAREST, + 0u, + false, + ECO_ALWAYS + }; + auto sampler1 = m_device->createSampler(samplerParams1); + + std::array writeDSInfos = {}; + writeDSInfos[0].desc = m_outImgView; + writeDSInfos[0].info.image.imageLayout = IImage::LAYOUT::GENERAL; + writeDSInfos[1].desc = m_envMapView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; + writeDSInfos[1].info.combinedImageSampler.sampler = sampler0; + writeDSInfos[1].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[2].desc = m_sequenceBufferView; + writeDSInfos[3].desc = m_scrambleView; + // ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; + writeDSInfos[3].info.combinedImageSampler.sampler = sampler1; + writeDSInfos[3].info.combinedImageSampler.imageLayout = asset::IImage::LAYOUT::READ_ONLY_OPTIMAL; + writeDSInfos[4].desc = m_outImgView; + writeDSInfos[4].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + std::array writeDescriptorSets = {}; + writeDescriptorSets[0] = { + .dstSet = m_descriptorSet0.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[0] + }; + writeDescriptorSets[1] = { + .dstSet = m_descriptorSet2.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[1] + }; + writeDescriptorSets[2] = { + .dstSet = m_descriptorSet2.get(), + .binding = 1, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[2] + }; + writeDescriptorSets[3] = { + .dstSet = m_descriptorSet2.get(), + .binding = 2, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[3] + }; + writeDescriptorSets[4] = { + .dstSet = m_presentDescriptorSet.get(), + .binding = 0, + .arrayElement = 0u, + .count = 1u, + .info = &writeDSInfos[4] + }; + + m_device->updateDescriptorSets(writeDescriptorSets, {}); + } + + // Create ui descriptors + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapV = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + params.TextureWrapW = ISampler::E_TEXTURE_CLAMP::ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getTransferUpQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + ImGui::Combo("Shader", &PTPipeline, shaderNames, E_LIGHT_GEOMETRY::ELG_COUNT); + ImGui::Combo("Render Mode", &renderMode, shaderTypes, E_RENDER_MODE::ERM_COUNT); + ImGui::SliderInt("SPP", &spp, 1, MaxBufferSamples); + ImGui::SliderInt("Depth", &depth, 1, MaxBufferDimensions / 3); + ImGui::Checkbox("Persistent WorkGroups", &usePersistentWorkGroups); + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::End(); + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(60.0f), + WindowDimensions.x / WindowDimensions.y, + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + + m_winMgr->setWindowSize(m_window.get(), WindowDimensions.x, WindowDimensions.y); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); + + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + m_api->startCapture(); + + // CPU events + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + // render whole scene to offline frame buffer & submit + { + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::NONE); + // disregard surface/swapchain transformation for now + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + PTPushConstant pc; + viewProjectionMatrix.getInverseTransform(pc.invMVP); + pc.sampleCount = spp; + pc.depth = depth; + + // safe to proceed + // upload buffer data + cmdbuf->beginDebugMarker("ComputeShaderPathtracer IMGUI Frame"); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + // TRANSITION m_outImgView to GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::GENERAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // cube envmap handle + { + IGPUComputePipeline* pipeline; + if (usePersistentWorkGroups) + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPersistentWGPipelines[PTPipeline].get() : m_PTGLSLPersistentWGPipelines[PTPipeline].get(); + else + pipeline = renderMode == E_RENDER_MODE::ERM_HLSL ? m_PTHLSLPipelines[PTPipeline].get() : m_PTGLSLPipelines[PTPipeline].get(); + cmdbuf->bindComputePipeline(pipeline); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 0u, 1u, &m_descriptorSet0.get()); + cmdbuf->bindDescriptorSets(EPBP_COMPUTE, pipeline->getLayout(), 2u, 1u, &m_descriptorSet2.get()); + cmdbuf->pushConstants(pipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_COMPUTE, 0, sizeof(PTPushConstant), &pc); + if (usePersistentWorkGroups) + { + uint32_t dispatchSize = m_physicalDevice->getLimits().computeOptimalPersistentWorkgroupDispatchSize(WindowDimensions.x * WindowDimensions.y, DefaultWorkGroupSize); + cmdbuf->dispatch(dispatchSize, 1u, 1u); + } + else + cmdbuf->dispatch(1 + (WindowDimensions.x * WindowDimensions.y - 1) / DefaultWorkGroupSize, 1u, 1u); + } + + // TRANSITION m_outImgView to READ (because of descriptorSets0 -> ComputeShader Writes into the image) + { + const IGPUCommandBuffer::SImageMemoryBarrier imgBarriers[] = { + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + } + }, + .image = m_outImgView->getCreationParameters().image.get(), + .subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }, + .oldLayout = IImage::LAYOUT::GENERAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL + } + }; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imgBarriers }); + } + + // TODO: tone mapping and stuff + } + + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WindowDimensions.x; + viewport.height = WindowDimensions.y; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + auto scRes = static_cast(m_surface->getSwapchainResources()); + + // Upload m_outImg to swapchain + UI + { + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDescriptorSet.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + + cmdbuf->endRenderPass(); + } + + cmdbuf->end(); + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } + } + + m_window->setCaption("[Nabla Engine] HLSL Compute Path Tracer"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + m_api->endCapture(); + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + + inline void update() + { + m_camera.setMoveSpeed(moveSpeed); + m_camera.setRotateSpeed(rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) + gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(ELG_COUNT - (uint8_t)1u)); + } + }, m_logger.get()); + + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + private: + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + + // gpu resources + smart_refctd_ptr m_cmdPool; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTGLSLPersistentWGPipelines; + std::array, E_LIGHT_GEOMETRY::ELG_COUNT> m_PTHLSLPersistentWGPipelines; + smart_refctd_ptr m_presentPipeline; + uint64_t m_realFrameIx = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + smart_refctd_ptr m_descriptorSet0, m_descriptorSet2, m_presentDescriptorSet; + + core::smart_refctd_ptr m_guiDescriptorSetPool; + + // system resources + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + + // pathtracer resources + smart_refctd_ptr m_envMapView, m_scrambleView; + smart_refctd_ptr m_sequenceBufferView; + smart_refctd_ptr m_outImgView; + + // sync + smart_refctd_ptr m_semaphore; + + // image upload resources + smart_refctd_ptr m_scratchSemaphore; + SIntendedSubmitInfo m_intendedSubmit; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + + Camera m_camera; + + video::CDumbPresentationOracle m_oracle; + + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + int PTPipeline = E_LIGHT_GEOMETRY::ELG_SPHERE; + int renderMode = E_RENDER_MODE::ERM_HLSL; + int spp = 32; + int depth = 3; + bool usePersistentWorkGroups = false; + + bool m_firstFrame = true; + IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; +}; + +NBL_MAIN_FUNC(HLSLComputePathtracer) diff --git a/27_PLYSTLDemo/pipeline.groovy b/31_HLSLPathTracer/pipeline.groovy similarity index 85% rename from 27_PLYSTLDemo/pipeline.groovy rename to 31_HLSLPathTracer/pipeline.groovy index 9a89cc786..955e77cec 100644 --- a/27_PLYSTLDemo/pipeline.groovy +++ b/31_HLSLPathTracer/pipeline.groovy @@ -2,9 +2,9 @@ import org.DevshGraphicsProgramming.Agent import org.DevshGraphicsProgramming.BuilderInfo import org.DevshGraphicsProgramming.IBuilder -class CPLYSTLDemoBuilder extends IBuilder +class CHLSLPathTracerBuilder extends IBuilder { - public CPLYSTLDemoBuilder(Agent _agent, _info) + public CHLSLPathTracerBuilder(Agent _agent, _info) { super(_agent, _info) } @@ -44,7 +44,7 @@ class CPLYSTLDemoBuilder extends IBuilder def create(Agent _agent, _info) { - return new CPLYSTLDemoBuilder(_agent, _info) + return new CHLSLPathTracerBuilder(_agent, _info) } -return this \ No newline at end of file +return this diff --git a/42_FragmentShaderPathTracer/CMakeLists.txt b/42_FragmentShaderPathTracer/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/42_FragmentShaderPathTracer/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/42_FragmentShaderPathTracer/main.cpp b/42_FragmentShaderPathTracer/main.cpp deleted file mode 100644 index f8505b8d1..000000000 --- a/42_FragmentShaderPathTracer/main.cpp +++ /dev/null @@ -1,693 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include - -#include "../common/CommonAPI.h" -#include "CCamera.hpp" -#include "nbl/ext/ScreenShot/ScreenShot.h" -#include "nbl/video/utilities/CDumbPresentationOracle.h" - -using namespace nbl; -using namespace core; -using namespace ui; - - -using namespace nbl; -using namespace core; -using namespace asset; -using namespace video; - -smart_refctd_ptr createHDRImageView(nbl::core::smart_refctd_ptr device, asset::E_FORMAT colorFormat, uint32_t width, uint32_t height) -{ - smart_refctd_ptr gpuImageViewColorBuffer; - { - IGPUImage::SCreationParams imgInfo; - imgInfo.format = colorFormat; - imgInfo.type = IGPUImage::ET_2D; - imgInfo.extent.width = width; - imgInfo.extent.height = height; - imgInfo.extent.depth = 1u; - imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; - imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; - imgInfo.flags = static_cast(0u); - imgInfo.usage = core::bitflag(asset::IImage::EUF_STORAGE_BIT) | asset::IImage::EUF_TRANSFER_SRC_BIT; - - auto image = device->createImage(std::move(imgInfo)); - auto imageMemReqs = image->getMemoryReqs(); - imageMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(imageMemReqs, image.get()); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(image); - imgViewInfo.format = colorFormat; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - gpuImageViewColorBuffer = device->createImageView(std::move(imgViewInfo)); - } - - return gpuImageViewColorBuffer; -} - -struct ShaderParameters -{ - const uint32_t MaxDepthLog2 = 4; //5 - const uint32_t MaxSamplesLog2 = 10; //18 -} kShaderParameters; - -enum E_LIGHT_GEOMETRY -{ - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE -}; - -struct DispatchInfo_t -{ - uint32_t workGroupCount[3]; -}; - -_NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 16u; - -DispatchInfo_t getDispatchInfo(uint32_t imgWidth, uint32_t imgHeight) { - DispatchInfo_t ret = {}; - ret.workGroupCount[0] = (uint32_t)core::ceil((float)imgWidth / (float)DEFAULT_WORK_GROUP_SIZE); - ret.workGroupCount[1] = (uint32_t)core::ceil((float)imgHeight / (float)DEFAULT_WORK_GROUP_SIZE); - ret.workGroupCount[2] = 1; - return ret; -} - -int main() -{ - system::IApplicationFramework::GlobalsInit(); - - constexpr uint32_t WIN_W = 1280; - constexpr uint32_t WIN_H = 720; - constexpr uint32_t FBO_COUNT = 2u; - constexpr uint32_t FRAMES_IN_FLIGHT = 5u; - constexpr bool LOG_TIMESTAMP = false; - static_assert(FRAMES_IN_FLIGHT>FBO_COUNT); - - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT | asset::IImage::EUF_TRANSFER_DST_BIT); - CommonAPI::InitParams initParams; - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { "Compute Shader PathTracer" }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = FBO_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - auto system = std::move(initOutput.system); - auto window = std::move(initParams.window); - auto windowCb = std::move(initParams.windowCb); - auto gl = std::move(initOutput.apiConnection); - auto surface = std::move(initOutput.surface); - auto gpuPhysicalDevice = std::move(initOutput.physicalDevice); - auto device = std::move(initOutput.logicalDevice); - auto queues = std::move(initOutput.queues); - auto graphicsQueue = queues[CommonAPI::InitOutput::EQT_GRAPHICS]; - auto transferUpQueue = queues[CommonAPI::InitOutput::EQT_TRANSFER_UP]; - auto computeQueue = queues[CommonAPI::InitOutput::EQT_COMPUTE]; - auto renderpass = std::move(initOutput.renderToSwapchainRenderpass); - auto assetManager = std::move(initOutput.assetManager); - auto cpu2gpuParams = std::move(initOutput.cpu2gpuParams); - auto logger = std::move(initOutput.logger); - auto inputSystem = std::move(initOutput.inputSystem); - auto utilities = std::move(initOutput.utilities); - auto graphicsCommandPools = std::move(initOutput.commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]); - auto computeCommandPools = std::move(initOutput.commandPools[CommonAPI::InitOutput::EQT_COMPUTE]); - auto swapchainCreationParams = std::move(initOutput.swapchainCreationParams); - - core::smart_refctd_ptr swapchain = nullptr; - CommonAPI::createSwapchain(std::move(device), swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - auto fbo = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - device, swapchain, renderpass, - asset::EF_D32_SFLOAT - ); - - auto graphicsCmdPoolQueueFamIdx = graphicsQueue->getFamilyIndex(); - - nbl::video::IGPUObjectFromAssetConverter CPU2GPU; - - core::smart_refctd_ptr cmdbuf[FRAMES_IN_FLIGHT]; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - device->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, cmdbuf+i); - - constexpr uint32_t maxDescriptorCount = 256u; - constexpr uint32_t PoolSizesCount = 5u; - - nbl::video::IDescriptorPool::SCreateInfo createInfo; - createInfo.maxDescriptorCount[static_cast(nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = maxDescriptorCount * 1; - createInfo.maxDescriptorCount[static_cast(nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] = maxDescriptorCount * 8; - createInfo.maxDescriptorCount[static_cast(nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)] = maxDescriptorCount * 2; - createInfo.maxDescriptorCount[static_cast(nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER)] = maxDescriptorCount * 1; - createInfo.maxDescriptorCount[static_cast(nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] = maxDescriptorCount * 1; - createInfo.maxSets = maxDescriptorCount; - - auto descriptorPool = device->createDescriptorPool(std::move(createInfo)); - - const auto timestampQueryPool = device->createQueryPool({ - .queryType = video::IQueryPool::EQT_TIMESTAMP, - .queryCount = 2u - }); - - // Camera - core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(60.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.01f, 500.0f); - Camera cam = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - - IGPUDescriptorSetLayout::SBinding descriptorSet0Bindings[] = { - { 0u, nbl::asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - }; - IGPUDescriptorSetLayout::SBinding uboBinding - { 0u, nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }; - IGPUDescriptorSetLayout::SBinding descriptorSet3Bindings[] = { - { 0u, nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 1u, nbl::asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 2u, nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - }; - - auto gpuDescriptorSetLayout0 = device->createDescriptorSetLayout(descriptorSet0Bindings, descriptorSet0Bindings + 1u); - auto gpuDescriptorSetLayout1 = device->createDescriptorSetLayout(&uboBinding, &uboBinding + 1u); - auto gpuDescriptorSetLayout2 = device->createDescriptorSetLayout(descriptorSet3Bindings, descriptorSet3Bindings+3u); - - auto createGpuResources = [&](std::string pathToShader) -> core::smart_refctd_ptr - { - asset::IAssetLoader::SAssetLoadParams params{}; - params.logger = logger.get(); - //params.relativeDir = tmp.c_str(); - auto spec = assetManager->getAsset(pathToShader,params).getContents(); - - if (spec.empty()) - assert(false); - - auto cpuComputeSpecializedShader = core::smart_refctd_ptr_static_cast(*spec.begin()); - - ISpecializedShader::SInfo info = cpuComputeSpecializedShader->getSpecializationInfo(); - info.m_backingBuffer = ICPUBuffer::create({ sizeof(ShaderParameters) }); - memcpy(info.m_backingBuffer->getPointer(),&kShaderParameters,sizeof(ShaderParameters)); - info.m_entries = core::make_refctd_dynamic_array>(2u); - for (uint32_t i=0; i<2; i++) - info.m_entries->operator[](i) = {i,(uint32_t)(i*sizeof(uint32_t)),sizeof(uint32_t)}; - - - cpuComputeSpecializedShader->setSpecializationInfo(std::move(info)); - - auto gpuComputeSpecializedShader = CPU2GPU.getGPUObjectsFromAssets(&cpuComputeSpecializedShader, &cpuComputeSpecializedShader + 1, cpu2gpuParams)->front(); - - auto gpuPipelineLayout = device->createPipelineLayout(nullptr, nullptr, core::smart_refctd_ptr(gpuDescriptorSetLayout0), core::smart_refctd_ptr(gpuDescriptorSetLayout1), core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr); - - auto gpuPipeline = device->createComputePipeline(nullptr, std::move(gpuPipelineLayout), std::move(gpuComputeSpecializedShader)); - - return gpuPipeline; - }; - - E_LIGHT_GEOMETRY lightGeom = ELG_SPHERE; - constexpr const char* shaderPaths[] = {"../litBySphere.comp","../litByTriangle.comp","../litByRectangle.comp"}; - auto gpuComputePipeline = createGpuResources(shaderPaths[lightGeom]); - - DispatchInfo_t dispatchInfo = getDispatchInfo(WIN_W, WIN_H); - - auto createImageView = [&](std::string pathToOpenEXRHDRIImage) - { -#ifndef _NBL_COMPILE_WITH_OPENEXR_LOADER_ - assert(false); -#endif - - auto pathToTexture = pathToOpenEXRHDRIImage; - IAssetLoader::SAssetLoadParams lp(0ull, nullptr, IAssetLoader::ECF_DONT_CACHE_REFERENCES); - auto cpuTexture = assetManager->getAsset(pathToTexture, lp); - auto cpuTextureContents = cpuTexture.getContents(); - assert(!cpuTextureContents.empty()); - auto cpuImage = core::smart_refctd_ptr_static_cast(*cpuTextureContents.begin()); - cpuImage->setImageUsageFlags(IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); - - ICPUImageView::SCreationParams viewParams; - viewParams.flags = static_cast(0u); - viewParams.image = cpuImage; - viewParams.format = viewParams.image->getCreationParameters().format; - viewParams.viewType = IImageView::ET_2D; - viewParams.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - viewParams.subresourceRange.baseArrayLayer = 0u; - viewParams.subresourceRange.layerCount = 1u; - viewParams.subresourceRange.baseMipLevel = 0u; - viewParams.subresourceRange.levelCount = 1u; - - auto cpuImageView = ICPUImageView::create(std::move(viewParams)); - - cpu2gpuParams.beginCommandBuffers(); - auto gpuImageView = CPU2GPU.getGPUObjectsFromAssets(&cpuImageView, &cpuImageView + 1u, cpu2gpuParams)->front(); - cpu2gpuParams.waitForCreationToComplete(false); - - return gpuImageView; - }; - - auto gpuEnvmapImageView = createImageView("../../media/envmap/envmap_0.exr"); - - smart_refctd_ptr gpuSequenceBufferView; - { - const uint32_t MaxDimensions = 3u<(sampleSequence->getPointer()); - for (auto dim=0u; dimcreateFilledDeviceLocalBufferOnDedMem(graphicsQueue, sampleSequence->getSize(), sampleSequence->getPointer()); - core::smart_refctd_ptr gpuSequenceBuffer; - { - IGPUBuffer::SCreationParams params = {}; - const size_t size = sampleSequence->getSize(); - params.usage = core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT) | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; - params.size = size; - gpuSequenceBuffer = device->createBuffer(std::move(params)); - auto gpuSequenceBufferMemReqs = gpuSequenceBuffer->getMemoryReqs(); - gpuSequenceBufferMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(gpuSequenceBufferMemReqs, gpuSequenceBuffer.get()); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,size,gpuSequenceBuffer},sampleSequence->getPointer(), graphicsQueue); - } - gpuSequenceBufferView = device->createBufferView(gpuSequenceBuffer.get(), asset::EF_R32G32B32_UINT); - } - - smart_refctd_ptr gpuScrambleImageView; - { - IGPUImage::SCreationParams imgParams; - imgParams.flags = static_cast(0u); - imgParams.type = IImage::ET_2D; - imgParams.format = EF_R32G32_UINT; - imgParams.extent = {WIN_W, WIN_H,1u}; - imgParams.mipLevels = 1u; - imgParams.arrayLayers = 1u; - imgParams.samples = IImage::ESCF_1_BIT; - imgParams.usage = core::bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_TRANSFER_DST_BIT; - imgParams.initialLayout = asset::IImage::EL_UNDEFINED; - - IGPUImage::SBufferCopy region = {}; - region.bufferOffset = 0u; - region.bufferRowLength = 0u; - region.bufferImageHeight = 0u; - region.imageExtent = imgParams.extent; - region.imageOffset = {0u,0u,0u}; - region.imageSubresource.layerCount = 1u; - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - - constexpr auto ScrambleStateChannels = 2u; - const auto renderPixelCount = imgParams.extent.width*imgParams.extent.height; - core::vector random(renderPixelCount*ScrambleStateChannels); - { - core::RandomSampler rng(0xbadc0ffeu); - for (auto& pixel : random) - pixel = rng.nextSample(); - } - - // TODO: Temp Fix because createFilledDeviceLocalBufferOnDedMem doesn't take in params - // auto buffer = utilities->createFilledDeviceLocalBufferOnDedMem(graphicsQueue, random.size()*sizeof(uint32_t), random.data()); - core::smart_refctd_ptr buffer; - { - IGPUBuffer::SCreationParams params = {}; - const size_t size = random.size() * sizeof(uint32_t); - params.usage = core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT) | asset::IBuffer::EUF_TRANSFER_SRC_BIT; - params.size = size; - buffer = device->createBuffer(std::move(params)); - auto bufferMemReqs = buffer->getMemoryReqs(); - bufferMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(bufferMemReqs, buffer.get()); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,size,buffer},random.data(),graphicsQueue); - } - - IGPUImageView::SCreationParams viewParams; - viewParams.flags = static_cast(0u); - // TODO: Replace this IGPUBuffer -> IGPUImage to using image upload utility - viewParams.image = utilities->createFilledDeviceLocalImageOnDedMem(std::move(imgParams), buffer.get(), 1u, ®ion, graphicsQueue); - viewParams.viewType = IGPUImageView::ET_2D; - viewParams.format = EF_R32G32_UINT; - viewParams.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.layerCount = 1u; - gpuScrambleImageView = device->createImageView(std::move(viewParams)); - } - - // Create Out Image TODO - constexpr uint32_t MAX_FBO_COUNT = 4u; - smart_refctd_ptr outHDRImageViews[MAX_FBO_COUNT] = {}; - assert(MAX_FBO_COUNT >= swapchain->getImageCount()); - for(uint32_t i = 0; i < swapchain->getImageCount(); ++i) { - outHDRImageViews[i] = createHDRImageView(device, asset::EF_R16G16B16A16_SFLOAT, WIN_W, WIN_H); - } - - core::smart_refctd_ptr descriptorSets0[FBO_COUNT] = {}; - for(uint32_t i = 0; i < FBO_COUNT; ++i) - { - auto & descSet = descriptorSets0[i]; - descSet = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout0)); - video::IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSet; - writeDescriptorSet.dstSet = descSet.get(); - writeDescriptorSet.binding = 0; - writeDescriptorSet.count = 1u; - writeDescriptorSet.arrayElement = 0u; - writeDescriptorSet.descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = outHDRImageViews[i]; - info.info.image.sampler = nullptr; - info.info.image.imageLayout = asset::IImage::EL_GENERAL; - } - writeDescriptorSet.info = &info; - device->updateDescriptorSets(1u, &writeDescriptorSet, 0u, nullptr); - } - - struct SBasicViewParametersAligned - { - SBasicViewParameters uboData; - }; - - IGPUBuffer::SCreationParams gpuuboParams = {}; - gpuuboParams.usage = core::bitflag(IGPUBuffer::EUF_UNIFORM_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuuboParams.size = sizeof(SBasicViewParametersAligned); - auto gpuubo = device->createBuffer(std::move(gpuuboParams)); - auto gpuuboMemReqs = gpuubo->getMemoryReqs(); - gpuuboMemReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - device->allocate(gpuuboMemReqs, gpuubo.get()); - - auto uboDescriptorSet1 = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout1)); - { - video::IGPUDescriptorSet::SWriteDescriptorSet uboWriteDescriptorSet; - uboWriteDescriptorSet.dstSet = uboDescriptorSet1.get(); - uboWriteDescriptorSet.binding = 0; - uboWriteDescriptorSet.count = 1u; - uboWriteDescriptorSet.arrayElement = 0u; - uboWriteDescriptorSet.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuubo; - info.info.buffer.offset = 0ull; - info.info.buffer.size = sizeof(SBasicViewParametersAligned); - } - uboWriteDescriptorSet.info = &info; - device->updateDescriptorSets(1u, &uboWriteDescriptorSet, 0u, nullptr); - } - - ISampler::SParams samplerParams0 = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - auto sampler0 = device->createSampler(samplerParams0); - ISampler::SParams samplerParams1 = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - auto sampler1 = device->createSampler(samplerParams1); - - auto descriptorSet2 = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout2)); - { - constexpr auto kDescriptorCount = 3; - IGPUDescriptorSet::SWriteDescriptorSet samplerWriteDescriptorSet[kDescriptorCount]; - IGPUDescriptorSet::SDescriptorInfo samplerDescriptorInfo[kDescriptorCount]; - for (auto i=0; iupdateDescriptorSets(kDescriptorCount, samplerWriteDescriptorSet, 0u, nullptr); - } - - constexpr uint32_t FRAME_COUNT = 500000u; - - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - for (uint32_t i=0u; icreateSemaphore(); - renderFinished[i] = device->createSemaphore(); - } - - CDumbPresentationOracle oracle; - oracle.reportBeginFrameRecord(); - constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - - // polling for events! - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - - uint32_t resourceIx = 0; - while(windowCb->isWindowOpen()) - { - resourceIx++; - if(resourceIx >= FRAMES_IN_FLIGHT) { - resourceIx = 0; - } - - oracle.reportEndFrameRecord(); - double dt = oracle.getDeltaTimeInMicroSeconds() / 1000.0; - auto nextPresentationTimeStamp = oracle.getNextPresentationTimeStamp(); - oracle.reportBeginFrameRecord(); - - // Input - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - cam.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { cam.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { cam.keyboardProcess(events); }, logger.get()); - cam.endInputProcessing(nextPresentationTimeStamp); - - auto& cb = cmdbuf[resourceIx]; - auto& fence = frameComplete[resourceIx]; - if (fence) - while (device->waitForFences(1u,&fence.get(),false,MAX_TIMEOUT)==video::IGPUFence::ES_TIMEOUT) - { - } - else - fence = device->createFence(static_cast(0)); - - const auto viewMatrix = cam.getViewMatrix(); - const auto viewProjectionMatrix = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - cam.getConcatenatedMatrix() - ); - - // safe to proceed - cb->begin(IGPUCommandBuffer::EU_NONE); - cb->resetQueryPool(timestampQueryPool.get(), 0u, 2u); - - // renderpass - uint32_t imgnum = 0u; - swapchain->acquireNextImage(MAX_TIMEOUT,imageAcquire[resourceIx].get(),nullptr,&imgnum); - { - auto mv = viewMatrix; - auto mvp = viewProjectionMatrix; - core::matrix3x4SIMD normalMat; - mv.getSub3x3InverseTranspose(normalMat); - - SBasicViewParametersAligned viewParams; - memcpy(viewParams.uboData.MV, mv.pointer(), sizeof(mv)); - memcpy(viewParams.uboData.MVP, mvp.pointer(), sizeof(mvp)); - memcpy(viewParams.uboData.NormalMat, normalMat.pointer(), sizeof(normalMat)); - - asset::SBufferRange range; - range.buffer = gpuubo; - range.offset = 0ull; - range.size = sizeof(viewParams); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(range, &viewParams, graphicsQueue); - } - - // TRANSITION outHDRImageViews[imgnum] to EIL_GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[3u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[0].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_WRITE_BIT); - imageBarriers[0].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[0].newLayout = asset::IImage::EL_GENERAL; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].image = outHDRImageViews[imgnum]->getCreationParameters().image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - - imageBarriers[1].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[1].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_READ_BIT); - imageBarriers[1].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[1].newLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; - imageBarriers[1].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[1].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[1].image = gpuScrambleImageView->getCreationParameters().image; - imageBarriers[1].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[1].subresourceRange.baseMipLevel = 0u; - imageBarriers[1].subresourceRange.levelCount = 1; - imageBarriers[1].subresourceRange.baseArrayLayer = 0u; - imageBarriers[1].subresourceRange.layerCount = 1; - - imageBarriers[2].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[2].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_READ_BIT); - imageBarriers[2].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[2].newLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; - imageBarriers[2].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[2].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[2].image = gpuEnvmapImageView->getCreationParameters().image; - imageBarriers[2].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[2].subresourceRange.baseMipLevel = 0u; - imageBarriers[2].subresourceRange.levelCount = gpuEnvmapImageView->getCreationParameters().subresourceRange.levelCount; - imageBarriers[2].subresourceRange.baseArrayLayer = 0u; - imageBarriers[2].subresourceRange.layerCount = gpuEnvmapImageView->getCreationParameters().subresourceRange.layerCount; - - cb->pipelineBarrier(asset::EPSF_TOP_OF_PIPE_BIT, asset::EPSF_COMPUTE_SHADER_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 3u, imageBarriers); - } - - // cube envmap handle - { - cb->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_TOP_OF_PIPE_BIT, timestampQueryPool.get(), 0u); - cb->bindComputePipeline(gpuComputePipeline.get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 0u, 1u, &descriptorSets0[imgnum].get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 1u, 1u, &uboDescriptorSet1.get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 2u, 1u, &descriptorSet2.get()); - cb->dispatch(dispatchInfo.workGroupCount[0], dispatchInfo.workGroupCount[1], dispatchInfo.workGroupCount[2]); - cb->writeTimestamp(asset::E_PIPELINE_STAGE_FLAGS::EPSF_BOTTOM_OF_PIPE_BIT, timestampQueryPool.get(), 1u); - } - // TODO: tone mapping and stuff - - // Copy HDR Image to SwapChain - auto srcImgViewCreationParams = outHDRImageViews[imgnum]->getCreationParameters(); - auto dstImgViewCreationParams = fbo->begin()[imgnum]->getCreationParameters().attachments[0]->getCreationParameters(); - - // Getting Ready for Blit - // TRANSITION outHDRImageViews[imgnum] to EIL_TRANSFER_SRC_OPTIMAL - // TRANSITION `fbo[imgnum]->getCreationParameters().attachments[0]` to EIL_TRANSFER_DST_OPTIMAL - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[2u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[0].barrier.dstAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[0].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[0].newLayout = asset::IImage::EL_TRANSFER_SRC_OPTIMAL; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].image = srcImgViewCreationParams.image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - - imageBarriers[1].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[1].barrier.dstAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[1].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[1].newLayout = asset::IImage::EL_TRANSFER_DST_OPTIMAL; - imageBarriers[1].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[1].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[1].image = dstImgViewCreationParams.image; - imageBarriers[1].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[1].subresourceRange.baseMipLevel = 0u; - imageBarriers[1].subresourceRange.levelCount = 1; - imageBarriers[1].subresourceRange.baseArrayLayer = 0u; - imageBarriers[1].subresourceRange.layerCount = 1; - cb->pipelineBarrier(asset::EPSF_TRANSFER_BIT, asset::EPSF_TRANSFER_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 2u, imageBarriers); - } - - // Blit Image - { - SImageBlit blit = {}; - blit.srcOffsets[0] = {0, 0, 0}; - blit.srcOffsets[1] = {WIN_W, WIN_H, 1}; - - blit.srcSubresource.aspectMask = srcImgViewCreationParams.subresourceRange.aspectMask; - blit.srcSubresource.mipLevel = srcImgViewCreationParams.subresourceRange.baseMipLevel; - blit.srcSubresource.baseArrayLayer = srcImgViewCreationParams.subresourceRange.baseArrayLayer; - blit.srcSubresource.layerCount = srcImgViewCreationParams.subresourceRange.layerCount; - blit.dstOffsets[0] = {0, 0, 0}; - blit.dstOffsets[1] = {WIN_W, WIN_H, 1}; - blit.dstSubresource.aspectMask = dstImgViewCreationParams.subresourceRange.aspectMask; - blit.dstSubresource.mipLevel = dstImgViewCreationParams.subresourceRange.baseMipLevel; - blit.dstSubresource.baseArrayLayer = dstImgViewCreationParams.subresourceRange.baseArrayLayer; - blit.dstSubresource.layerCount = dstImgViewCreationParams.subresourceRange.layerCount; - - auto srcImg = srcImgViewCreationParams.image; - auto dstImg = dstImgViewCreationParams.image; - - cb->blitImage(srcImg.get(), asset::IImage::EL_TRANSFER_SRC_OPTIMAL, dstImg.get(), asset::IImage::EL_TRANSFER_DST_OPTIMAL, 1u, &blit , ISampler::ETF_NEAREST); - } - - // TRANSITION `fbo[imgnum]->getCreationParameters().attachments[0]` to EIL_PRESENT - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[1u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[0].barrier.dstAccessMask = asset::EAF_NONE; - imageBarriers[0].oldLayout = asset::IImage::EL_TRANSFER_DST_OPTIMAL; - imageBarriers[0].newLayout = asset::IImage::EL_PRESENT_SRC; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdPoolQueueFamIdx; - imageBarriers[0].image = dstImgViewCreationParams.image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - cb->pipelineBarrier(asset::EPSF_TRANSFER_BIT, asset::EPSF_TOP_OF_PIPE_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 1u, imageBarriers); - } - - cb->end(); - device->resetFences(1, &fence.get()); - CommonAPI::Submit(device.get(), cb.get(), graphicsQueue, imageAcquire[resourceIx].get(), renderFinished[resourceIx].get(), fence.get()); - CommonAPI::Present(device.get(), swapchain.get(), graphicsQueue, renderFinished[resourceIx].get(), imgnum); - - if (LOG_TIMESTAMP) - { - std::array timestamps{}; - auto queryResultFlags = core::bitflag(video::IQueryPool::EQRF_WAIT_BIT) | video::IQueryPool::EQRF_WITH_AVAILABILITY_BIT | video::IQueryPool::EQRF_64_BIT; - device->getQueryPoolResults(timestampQueryPool.get(), 0u, 2u, sizeof(timestamps), timestamps.data(), sizeof(uint64_t) * 2ull, queryResultFlags); - const float timePassed = (timestamps[2] - timestamps[0]) * device->getPhysicalDevice()->getLimits().timestampPeriodInNanoSeconds; - logger->log("Time Passed (Seconds) = %f", system::ILogger::ELL_INFO, (timePassed * 1e-9)); - logger->log("Timestamps availablity: %d, %d", system::ILogger::ELL_INFO, timestamps[1], timestamps[3]); - } - } - - const auto& fboCreationParams = fbo->begin()[0]->getCreationParameters(); - auto gpuSourceImageView = fboCreationParams.attachments[0]; - - device->waitIdle(); - - // bool status = ext::ScreenShot::createScreenShot(device.get(), queues[decltype(initOutput)::EQT_TRANSFER_UP], renderFinished[0].get(), gpuSourceImageView.get(), assetManager.get(), "ScreenShot.png"); - // assert(status); - - return 0; -} diff --git a/42_FragmentShaderPathTracer/pipeline.groovy b/42_FragmentShaderPathTracer/pipeline.groovy deleted file mode 100644 index 9e3a71cf3..000000000 --- a/42_FragmentShaderPathTracer/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CFragmentShaderPathTracerBuilder extends IBuilder -{ - public CFragmentShaderPathTracerBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CFragmentShaderPathTracerBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/53_ComputeShaders/CMakeLists.txt b/53_ComputeShaders/CMakeLists.txt deleted file mode 100644 index 2f9218f93..000000000 --- a/53_ComputeShaders/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/53_ComputeShaders/computeShader.comp b/53_ComputeShaders/computeShader.comp deleted file mode 100644 index 033a6aabb..000000000 --- a/53_ComputeShaders/computeShader.comp +++ /dev/null @@ -1,95 +0,0 @@ -#version 450 core -#extension GL_EXT_shader_16bit_storage : require - -#include "shaderCommon.glsl" - -layout(set = 0, binding = 0, std430) buffer Position -{ - vec4 positions[]; -}; - -layout(set = 0, binding = 1, std430) buffer Velocity -{ - vec4 velocities[]; -}; - -layout(set = 0, binding = 2, std430) buffer Color -{ - vec4 colors[]; -}; - -layout(set = 0, binding = 3, std430) buffer ColorRisingFlag -{ - bvec4 colorsRisingFlag[]; -}; - -layout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in; - -void manageColorAxieState(float colorAxie, inout bool colorIntensityRisingAxieFlag) -{ - if(colorAxie <= 0) - colorIntensityRisingAxieFlag = true; - else if(colorAxie >= 1) - colorIntensityRisingAxieFlag = false; -} - -void manageColorState(vec3 color) -{ - uint globalInvocationID = gl_GlobalInvocationID.x; // the .y and .z are both 1 in this case - bvec4 isColorIntensityRising = colorsRisingFlag[globalInvocationID]; - - manageColorAxieState(color.x, isColorIntensityRising.x); - manageColorAxieState(color.y, isColorIntensityRising.y); - manageColorAxieState(color.z, isColorIntensityRising.z); - - colorsRisingFlag[globalInvocationID] = isColorIntensityRising; -} - -float getNewAxieColor(float colorAxie, bool colorIntensityRisingAxieFlag) -{ - const float colorDelta = 0.04; - - if(colorIntensityRisingAxieFlag) - colorAxie += colorDelta; - else - colorAxie -= colorDelta; - - return colorAxie; -} - -vec3 getNewColor(vec3 color) -{ - uint globalInvocationID = gl_GlobalInvocationID.x; // the .y and .z are both 1 in this case - bvec4 isColorIntensityRising = colorsRisingFlag[globalInvocationID]; - - return vec3(getNewAxieColor(color.x, isColorIntensityRising.x), getNewAxieColor(color.y, isColorIntensityRising.y), getNewAxieColor(color.z, isColorIntensityRising.z)); -} - -void main() -{ - const float deltaTime = 0.004; - - uint globalInvocationID = gl_GlobalInvocationID.x; // the .y and .z are both 1 in this case - - vec3 position = positions[globalInvocationID].xyz; - vec3 velocity = velocities[globalInvocationID].xyz; - vec3 color = colors[globalInvocationID].xyz; - - if(!pushConstants.isXPressed) - { - /* - if(pushConstants.isZPressed) - { - // TODO gravity to force a particle's velocity towards the user - } - */ - position += velocity * deltaTime; - } - - vec3 newComputedColor = getNewColor(color); - manageColorState(newComputedColor); - - positions[globalInvocationID].xyz = position; - velocities[globalInvocationID].xyz = velocity; - colors[globalInvocationID].xyz = newComputedColor; -} \ No newline at end of file diff --git a/53_ComputeShaders/config.json.template b/53_ComputeShaders/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/53_ComputeShaders/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/53_ComputeShaders/fragmentShader.frag b/53_ComputeShaders/fragmentShader.frag deleted file mode 100644 index 9fe445b2b..000000000 --- a/53_ComputeShaders/fragmentShader.frag +++ /dev/null @@ -1,12 +0,0 @@ -#version 430 core - -layout(location = 0) in vec4 inFFullyProjectedVelocity; -layout(location = 1) in vec4 inFColor; - -layout(location = 0) out vec4 outColor; - -void main() -{ - outColor = inFColor; -} - \ No newline at end of file diff --git a/53_ComputeShaders/geometryShader.geom b/53_ComputeShaders/geometryShader.geom deleted file mode 100644 index 4a8bf36f0..000000000 --- a/53_ComputeShaders/geometryShader.geom +++ /dev/null @@ -1,27 +0,0 @@ -#version 450 core - -#include "shaderCommon.glsl" - -layout(location = 0) in vec4 gFullyProjectedVelocity[]; -layout(location = 1) in vec4 gColor[]; - -layout(location = 0) out vec4 outFVelocity; -layout(location = 1) out vec4 outFColor; - -layout (points) in; -layout (line_strip, max_vertices = 2) out; - -void main() -{ - if(pushConstants.isCPressed) - { - outFColor = vec4(0.0, 1.0, 0.0, 0.0); - gl_Position = gl_in[0].gl_Position; - EmitVertex(); - gl_Position = gl_in[0].gl_Position + gFullyProjectedVelocity[0]; - EmitVertex(); - - EndPrimitive(); - } -} - \ No newline at end of file diff --git a/53_ComputeShaders/main.cpp b/53_ComputeShaders/main.cpp deleted file mode 100644 index b8fb14017..000000000 --- a/53_ComputeShaders/main.cpp +++ /dev/null @@ -1,694 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - -#include "CCamera.hpp" -#include "../common/CommonAPI.h" -#include "nbl/ext/ScreenShot/ScreenShot.h" - -using namespace nbl; -using namespace asset; -using namespace core; - -/* - Uncomment for more detailed logging -*/ - -// #define NBL_MORE_LOGS - -class CEventReceiver -{ -public: - CEventReceiver() : particlesVectorChangeFlag(false), forceChangeVelocityFlag(false), visualizeVelocityVectorsFlag(false) {} - - void process(const ui::IKeyboardEventChannel::range_t& events) - { - particlesVectorChangeFlag = false; - forceChangeVelocityFlag = false; - visualizeVelocityVectorsFlag = false; - - for (auto eventIterator = events.begin(); eventIterator != events.end(); eventIterator++) - { - auto event = *eventIterator; - - if (event.keyCode == nbl::ui::EKC_X) - particlesVectorChangeFlag = true; - - if (event.keyCode == nbl::ui::EKC_Z) - forceChangeVelocityFlag = true; - - if (event.keyCode == nbl::ui::EKC_C) - visualizeVelocityVectorsFlag = true; - - if (event.keyCode == nbl::ui::EKC_V) - visualizeVelocityVectorsFlag = false; - } - } - - inline bool isXPressed() const { return particlesVectorChangeFlag; } - inline bool isZPressed() const { return forceChangeVelocityFlag; } - inline bool isCPressed() const { return visualizeVelocityVectorsFlag; } - -private: - bool particlesVectorChangeFlag; - bool forceChangeVelocityFlag; - bool visualizeVelocityVectorsFlag; -}; - -_NBL_STATIC_INLINE_CONSTEXPR size_t NUMBER_OF_PARTICLES = 1024 * 1024; // total number of particles to move -_NBL_STATIC_INLINE_CONSTEXPR size_t WORK_GROUP_SIZE = 128; // work-items per work-group - -enum E_ENTRIES -{ - EE_POSITIONS, - EE_VELOCITIES, - EE_COLORS, - EE_COLORS_RISING_FLAG, - EE_COUNT -}; - -#include "nbl/nblpack.h" -struct alignas(16) SShaderStorageBufferObject -{ - core::vector4df_SIMD positions[NUMBER_OF_PARTICLES]; - core::vector4df_SIMD velocities[NUMBER_OF_PARTICLES]; - core::vector4df_SIMD colors[NUMBER_OF_PARTICLES]; - bool isColorIntensityRising[NUMBER_OF_PARTICLES][4]; -} PACK_STRUCT; -#include "nbl/nblunpack.h" - -static_assert(sizeof(SShaderStorageBufferObject) == sizeof(SShaderStorageBufferObject::positions) + sizeof(SShaderStorageBufferObject::velocities) + sizeof(SShaderStorageBufferObject::colors) + sizeof(SShaderStorageBufferObject::isColorIntensityRising), "There will be inproper alignment!"); - -#include "nbl/nblpack.h" -struct alignas(32) SPushConstants -{ - uint32_t isXPressed = false; - uint32_t isZPressed = false; - uint32_t isCPressed = false; - core::vector3df currentUserAbsolutePosition; -} PACK_STRUCT; -#include "nbl/nblunpack.h" - -void triggerRandomSetup(SShaderStorageBufferObject* ssbo) -{ - _NBL_STATIC_INLINE_CONSTEXPR float POSITION_EACH_AXIE_MIN = -10.f; - _NBL_STATIC_INLINE_CONSTEXPR float POSITION_EACH_AXIE_MAX = 10.f; - - _NBL_STATIC_INLINE_CONSTEXPR float VELOCITY_EACH_AXIE_MIN = 0.f; - _NBL_STATIC_INLINE_CONSTEXPR float VELOCITY_EACH_AXIE_MAX = 0.001f; - - _NBL_STATIC_INLINE_CONSTEXPR float COLOR_EACH_AXIE_MIN = 0.f; - _NBL_STATIC_INLINE_CONSTEXPR float COLOR_EACH_AXIE_MAX = 1.f; - - auto get_random = [&](const float& min, const float& max) - { - static std::default_random_engine engine; - static std::uniform_real_distribution<> distribution(min, max); - return distribution(engine); - }; - - for (size_t i = 0; i < NUMBER_OF_PARTICLES; ++i) - { - ssbo->positions[i] = core::vector4df_SIMD(get_random(POSITION_EACH_AXIE_MIN, POSITION_EACH_AXIE_MAX), get_random(POSITION_EACH_AXIE_MIN, POSITION_EACH_AXIE_MAX), get_random(POSITION_EACH_AXIE_MIN, POSITION_EACH_AXIE_MAX), get_random(POSITION_EACH_AXIE_MIN, POSITION_EACH_AXIE_MAX)); - ssbo->velocities[i] = core::vector4df_SIMD(get_random(VELOCITY_EACH_AXIE_MIN, VELOCITY_EACH_AXIE_MAX), get_random(VELOCITY_EACH_AXIE_MIN, VELOCITY_EACH_AXIE_MAX), get_random(VELOCITY_EACH_AXIE_MIN, VELOCITY_EACH_AXIE_MAX), get_random(VELOCITY_EACH_AXIE_MIN, VELOCITY_EACH_AXIE_MAX)); - ssbo->colors[i] = core::vector4df_SIMD(get_random(COLOR_EACH_AXIE_MIN, COLOR_EACH_AXIE_MAX), get_random(COLOR_EACH_AXIE_MIN, COLOR_EACH_AXIE_MAX), get_random(COLOR_EACH_AXIE_MIN, COLOR_EACH_AXIE_MAX), get_random(COLOR_EACH_AXIE_MIN, COLOR_EACH_AXIE_MAX)); - - for (uint8_t b = 0; b < 4; ++b) - ssbo->isColorIntensityRising[i][b] = true; - } -} - -class MeshLoadersApp : public ApplicationBase -{ - static constexpr uint32_t WIN_W = 1280; - static constexpr uint32_t WIN_H = 720; - static constexpr uint32_t FBO_COUNT = 2u; - static constexpr uint32_t FRAMES_IN_FLIGHT = 1u; - static constexpr size_t NBL_FRAMES_TO_AVERAGE = 100ull; - -public: - nbl::core::smart_refctd_ptr windowManager; - nbl::core::smart_refctd_ptr window; - nbl::core::smart_refctd_ptr windowCallback; - nbl::core::smart_refctd_ptr gl; - nbl::core::smart_refctd_ptr surface; - nbl::core::smart_refctd_ptr utilities; - nbl::core::smart_refctd_ptr logicalDevice; - nbl::video::IPhysicalDevice* gpuPhysicalDevice; - std::array queues = { nullptr, nullptr, nullptr, nullptr }; - nbl::core::smart_refctd_ptr swapchain; - nbl::core::smart_refctd_ptr renderpass; - nbl::core::smart_refctd_dynamic_array> fbo; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; - nbl::core::smart_refctd_ptr system; - nbl::core::smart_refctd_ptr assetManager; - nbl::video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - nbl::core::smart_refctd_ptr logger; - nbl::core::smart_refctd_ptr inputSystem; - - nbl::core::smart_refctd_ptr gpuTransferFence; - nbl::core::smart_refctd_ptr gpuComputeFence; - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - - core::smart_refctd_ptr commandBuffers[1]; - - CEventReceiver eventReceiver; - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - std::chrono::system_clock::time_point lastTime; - size_t frame_count = 0ull; - double time_sum = 0; - double dtList[NBL_FRAMES_TO_AVERAGE] = {}; - - SPushConstants pushConstants; - nbl::core::smart_refctd_ptr gpuComputePipeline; - nbl::core::smart_refctd_ptr gpuCDescriptorSet; - nbl::core::smart_refctd_ptr gpuUBO; - nbl::core::smart_refctd_ptr gpuGraphicsPipeline; - nbl::core::smart_refctd_ptr gpuGraphicsPipeline2; - nbl::core::smart_refctd_ptr gpuMeshBuffer; - nbl::core::smart_refctd_ptr gpuMeshBuffer2; - core::smart_refctd_ptr gpuGDescriptorSet1; - nbl::core::smart_refctd_ptr render_finished_sem; - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - void setSystem(core::smart_refctd_ptr&& s) override - { - system = std::move(s); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - video::IAPIConnection* getAPIConnection() override - { - return gl.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return logicalDevice.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbo->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_D32_SFLOAT; - } - -APP_CONSTRUCTOR(MeshLoadersApp) - - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT); - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = FBO_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = nbl::asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - window = std::move(initParams.window); - gl = std::move(initOutput.apiConnection); - surface = std::move(initOutput.surface); - gpuPhysicalDevice = std::move(initOutput.physicalDevice); - logicalDevice = std::move(initOutput.logicalDevice); - queues = std::move(initOutput.queues); - renderpass = std::move(initOutput.renderToSwapchainRenderpass); - commandPools = std::move(initOutput.commandPools); - assetManager = std::move(initOutput.assetManager); - logger = std::move(initOutput.logger); - inputSystem = std::move(initOutput.inputSystem); - windowCallback = std::move(initParams.windowCb); - cpu2gpuParams = std::move(initOutput.cpu2gpuParams); - m_swapchainCreationParams = std::move(initOutput.swapchainCreationParams); - auto defaultGraphicsCommandPool = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS][0]; - - CommonAPI::createSwapchain(std::move(logicalDevice), m_swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbo = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - logicalDevice, swapchain, renderpass, - nbl::asset::EF_D32_SFLOAT - ); - - logicalDevice->createCommandBuffers(defaultGraphicsCommandPool.get(), nbl::video::IGPUCommandBuffer::EL_PRIMARY, 1, commandBuffers); - auto commandBuffer = commandBuffers[0]; - - auto createDescriptorPool = [&](const uint32_t itemCount, E_DESCRIPTOR_TYPE descriptorType) - { - constexpr uint32_t maxItemCount = 256u; - { - nbl::video::IDescriptorPool::SDescriptorPoolSize poolSize; - poolSize.count = itemCount; - poolSize.type = descriptorType; - return logicalDevice->createDescriptorPool(static_cast(0), maxItemCount, 1u, &poolSize); - } - }; - - /* - Compute pipeline - */ - - auto computeShaderBundle = assetManager->getAsset("../computeShader.comp", {}); - { - bool status = !computeShaderBundle.getContents().empty(); - assert(status); - } - - auto cpuComputeShader = core::smart_refctd_ptr_static_cast(computeShaderBundle.getContents().begin()[0]); - smart_refctd_ptr gpuComputeShader; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuComputeShader, &cpuComputeShader + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuComputeShader = (*gpu_array)[0]; - } - - auto cpuSSBOBuffer = ICPUBuffer::create({ sizeof(SShaderStorageBufferObject) }); - cpuSSBOBuffer->addUsageFlags(asset::IBuffer::EUF_STORAGE_BUFFER_BIT); - triggerRandomSetup(reinterpret_cast(cpuSSBOBuffer->getPointer())); - core::smart_refctd_ptr gpuSSBOBuffer; - { - cpu2gpuParams.beginCommandBuffers(); - - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuSSBOBuffer, &cpuSSBOBuffer + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - cpu2gpuParams.waitForCreationToComplete(false); - - auto gpuSSBOOffsetBufferPair = (*gpu_array)[0]; - gpuSSBOBuffer = core::smart_refctd_ptr(gpuSSBOOffsetBufferPair->getBuffer()); - } - - video::IGPUDescriptorSetLayout::SBinding gpuBindingsLayout[EE_COUNT] = - { - {EE_POSITIONS, EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr}, - {EE_VELOCITIES, EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr}, - {EE_COLORS, EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr}, - {EE_COLORS_RISING_FLAG, EDT_STORAGE_BUFFER, 1u, video::IGPUShader::ESS_COMPUTE, nullptr} - }; - - auto gpuCDescriptorPool = createDescriptorPool(EE_COUNT, EDT_STORAGE_BUFFER); - auto gpuCDescriptorSetLayout = logicalDevice->createDescriptorSetLayout(gpuBindingsLayout, gpuBindingsLayout + EE_COUNT); - gpuCDescriptorSet = logicalDevice->createDescriptorSet(gpuCDescriptorPool.get(), core::smart_refctd_ptr(gpuCDescriptorSetLayout)); - { - video::IGPUDescriptorSet::SDescriptorInfo gpuDescriptorSetInfos[EE_COUNT]; - - gpuDescriptorSetInfos[EE_POSITIONS].desc = gpuSSBOBuffer; - gpuDescriptorSetInfos[EE_POSITIONS].buffer.size = sizeof(SShaderStorageBufferObject::positions); - gpuDescriptorSetInfos[EE_POSITIONS].buffer.offset = 0; - - gpuDescriptorSetInfos[EE_VELOCITIES].desc = gpuSSBOBuffer; - gpuDescriptorSetInfos[EE_VELOCITIES].buffer.size = sizeof(SShaderStorageBufferObject::velocities); - gpuDescriptorSetInfos[EE_VELOCITIES].buffer.offset = sizeof(SShaderStorageBufferObject::positions); - - gpuDescriptorSetInfos[EE_COLORS].desc = gpuSSBOBuffer; - gpuDescriptorSetInfos[EE_COLORS].buffer.size = sizeof(SShaderStorageBufferObject::colors); - gpuDescriptorSetInfos[EE_COLORS].buffer.offset = gpuDescriptorSetInfos[EE_VELOCITIES].buffer.offset + sizeof(SShaderStorageBufferObject::velocities); - - gpuDescriptorSetInfos[EE_COLORS_RISING_FLAG].desc = gpuSSBOBuffer; - gpuDescriptorSetInfos[EE_COLORS_RISING_FLAG].buffer.size = sizeof(SShaderStorageBufferObject::isColorIntensityRising); - gpuDescriptorSetInfos[EE_COLORS_RISING_FLAG].buffer.offset = gpuDescriptorSetInfos[EE_COLORS].buffer.offset + sizeof(SShaderStorageBufferObject::colors); - - video::IGPUDescriptorSet::SWriteDescriptorSet gpuWrites[EE_COUNT]; - { - for (uint32_t binding = 0u; binding < EE_COUNT; binding++) - gpuWrites[binding] = { gpuCDescriptorSet.get(), binding, 0u, 1u, EDT_STORAGE_BUFFER, gpuDescriptorSetInfos + binding }; - logicalDevice->updateDescriptorSets(EE_COUNT, gpuWrites, 0u, nullptr); - } - } - - asset::SPushConstantRange pushConstantRange; - { - pushConstantRange.stageFlags = (asset::IShader::E_SHADER_STAGE)(asset::IShader::ESS_COMPUTE | asset::IShader::ESS_GEOMETRY); - pushConstantRange.offset = 0; - pushConstantRange.size = sizeof(SPushConstants); - } - - auto gpuCPipelineLayout = logicalDevice->createPipelineLayout(&pushConstantRange, &pushConstantRange + 1, std::move(gpuCDescriptorSetLayout), nullptr, nullptr, nullptr); - gpuComputePipeline = logicalDevice->createComputePipeline(nullptr, std::move(gpuCPipelineLayout), std::move(gpuComputeShader)); - - /* - Graphics Pipeline - */ - - asset::SVertexInputParams inputVertexParams; - inputVertexParams.enabledAttribFlags = core::createBitmask({ EE_POSITIONS, EE_VELOCITIES, EE_COLORS, EE_COLORS_RISING_FLAG }); - inputVertexParams.enabledBindingFlags = core::createBitmask({ EE_POSITIONS, EE_VELOCITIES, EE_COLORS, EE_COLORS_RISING_FLAG }); - - for (uint8_t i = 0; i < EE_COUNT; ++i) - { - inputVertexParams.bindings[i].stride = (i == EE_COLORS_RISING_FLAG ? getTexelOrBlockBytesize(EF_R8G8B8A8_UINT) : getTexelOrBlockBytesize(EF_R32G32B32A32_SFLOAT)); - inputVertexParams.bindings[i].inputRate = asset::EVIR_PER_VERTEX; - - inputVertexParams.attributes[i].binding = i; - inputVertexParams.attributes[i].format = (i == EE_COLORS_RISING_FLAG ? EF_R8G8B8A8_UINT : asset::EF_R32G32B32A32_SFLOAT); - inputVertexParams.attributes[i].relativeOffset = 0; - } - - asset::SBlendParams blendParams; - asset::SPrimitiveAssemblyParams primitiveAssemblyParams; - primitiveAssemblyParams.primitiveType = EPT_POINT_LIST; - asset::SRasterizationParams rasterizationParams; - - video::IGPUDescriptorSetLayout::SBinding gpuUboBinding = {}; - gpuUboBinding.count = 1u; - gpuUboBinding.binding = 0; - gpuUboBinding.stageFlags = static_cast(asset::ICPUShader::ESS_VERTEX | asset::ICPUShader::ESS_FRAGMENT); - gpuUboBinding.type = asset::EDT_UNIFORM_BUFFER; - - auto gpuGDescriptorPool = createDescriptorPool(1, EDT_UNIFORM_BUFFER); - auto gpuGDs1Layout = logicalDevice->createDescriptorSetLayout(&gpuUboBinding, &gpuUboBinding + 1); - - video::IGPUBuffer::SCreationParams gpuUBOCreationParams; - //gpuUBOCreationParams.size = sizeof(SBasicViewParameters); - gpuUBOCreationParams.usage = asset::IBuffer::E_USAGE_FLAGS(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT | asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF); - gpuUBOCreationParams.queueFamilyIndexCount = 0u; - gpuUBOCreationParams.queueFamilyIndices = nullptr; - gpuUBOCreationParams.size = sizeof(SBasicViewParameters); - - gpuUBO = logicalDevice->createBuffer(std::move(gpuUBOCreationParams)); - auto gpuUBOmemreqs = gpuUBO->getMemoryReqs(); - gpuUBOmemreqs.memoryTypeBits &= gpuPhysicalDevice->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(gpuUBOmemreqs, gpuUBO.get()); - - gpuGDescriptorSet1 = logicalDevice->createDescriptorSet(gpuGDescriptorPool.get(), gpuGDs1Layout); - { - video::IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = gpuGDescriptorSet1.get(); - write.binding = 0; - write.count = 1u; - write.arrayElement = 0u; - write.descriptorType = asset::EDT_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuUBO; - info.buffer.offset = 0ull; - info.buffer.size = sizeof(SBasicViewParameters); - } - write.info = &info; - logicalDevice->updateDescriptorSets(1u, &write, 0u, nullptr); - } - - auto vertexShaderBundle = assetManager->getAsset("../vertexShader.vert", {}); - { - bool status = !vertexShaderBundle.getContents().empty(); - assert(status); - } - - auto cpuVertexShader = core::smart_refctd_ptr_static_cast(vertexShaderBundle.getContents().begin()[0]); - smart_refctd_ptr gpuVertexShader; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuVertexShader, &cpuVertexShader + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuVertexShader = (*gpu_array)[0]; - } - - auto fragmentShaderBundle = assetManager->getAsset("../fragmentShader.frag", {}); - { - bool status = !fragmentShaderBundle.getContents().empty(); - assert(status); - } - - auto cpuFragmentShader = core::smart_refctd_ptr_static_cast(fragmentShaderBundle.getContents().begin()[0]); - smart_refctd_ptr gpuFragmentShader; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuFragmentShader, &cpuFragmentShader + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuFragmentShader = (*gpu_array)[0]; - } - - auto geometryShaderBundle = assetManager->getAsset("../geometryShader.geom", {}); - { - bool status = !geometryShaderBundle.getContents().empty(); - assert(status); - } - - auto cpuGeometryShader = core::smart_refctd_ptr_static_cast(geometryShaderBundle.getContents().begin()[0]); - smart_refctd_ptr gpuGeometryShader; - { - auto gpu_array = cpu2gpu.getGPUObjectsFromAssets(&cpuGeometryShader, &cpuGeometryShader + 1, cpu2gpuParams); - if (!gpu_array || gpu_array->size() < 1u || !(*gpu_array)[0]) - assert(false); - - gpuGeometryShader = (*gpu_array)[0]; - } - - core::smart_refctd_ptr gpuGShaders[] = { gpuVertexShader, gpuFragmentShader, gpuGeometryShader }; - auto gpuGShadersPointer = reinterpret_cast(gpuGShaders); - - auto gpuGPipelineLayout = logicalDevice->createPipelineLayout(&pushConstantRange, &pushConstantRange + 1, nullptr, std::move(gpuGDs1Layout), nullptr, nullptr); - auto gpuRenderpassIndependentPipeline = logicalDevice->createRenderpassIndependentPipeline(nullptr, core::smart_refctd_ptr(gpuGPipelineLayout), gpuGShadersPointer, gpuGShadersPointer + 2 /* discard geometry shader*/, inputVertexParams, blendParams, primitiveAssemblyParams, rasterizationParams); - auto gpuRenderpassIndependentPipeline2 = logicalDevice->createRenderpassIndependentPipeline(nullptr, core::smart_refctd_ptr(gpuGPipelineLayout), gpuGShadersPointer, gpuGShadersPointer + 3, inputVertexParams, blendParams, primitiveAssemblyParams, rasterizationParams); - - asset::SBufferBinding gpuGbindings[video::IGPUMeshBuffer::MAX_ATTR_BUF_BINDING_COUNT]; - - gpuGbindings[EE_POSITIONS].buffer = gpuSSBOBuffer; - gpuGbindings[EE_POSITIONS].offset = 0; - - gpuGbindings[EE_VELOCITIES].buffer = gpuSSBOBuffer; - gpuGbindings[EE_VELOCITIES].offset = sizeof(SShaderStorageBufferObject::positions); - - gpuGbindings[EE_COLORS].buffer = gpuSSBOBuffer; - gpuGbindings[EE_COLORS].offset = gpuGbindings[EE_VELOCITIES].offset + sizeof(SShaderStorageBufferObject::velocities); - - gpuGbindings[EE_COLORS_RISING_FLAG].buffer = gpuSSBOBuffer; - gpuGbindings[EE_COLORS_RISING_FLAG].offset = gpuGbindings[EE_COLORS].offset + sizeof(SShaderStorageBufferObject::colors); - - gpuMeshBuffer = core::make_smart_refctd_ptr(std::move(gpuRenderpassIndependentPipeline), nullptr, gpuGbindings, asset::SBufferBinding()); - { - gpuMeshBuffer->setIndexType(asset::EIT_UNKNOWN); - gpuMeshBuffer->setIndexCount(NUMBER_OF_PARTICLES); - } - - { - nbl::video::IGPUGraphicsPipeline::SCreationParams graphicsPipelineParams; - graphicsPipelineParams.renderpassIndependent = core::smart_refctd_ptr(const_cast(gpuMeshBuffer->getPipeline())); - graphicsPipelineParams.renderpass = core::smart_refctd_ptr(renderpass); - gpuGraphicsPipeline = logicalDevice->createGraphicsPipeline(nullptr, std::move(graphicsPipelineParams)); - } - - gpuMeshBuffer2 = core::make_smart_refctd_ptr(std::move(gpuRenderpassIndependentPipeline2), nullptr, gpuGbindings, asset::SBufferBinding()); - { - gpuMeshBuffer2->setIndexType(asset::EIT_UNKNOWN); - gpuMeshBuffer2->setIndexCount(NUMBER_OF_PARTICLES); - } - - { - nbl::video::IGPUGraphicsPipeline::SCreationParams graphicsPipelineParams; - graphicsPipelineParams.renderpassIndependent = core::smart_refctd_ptr(const_cast(gpuMeshBuffer2->getPipeline())); - graphicsPipelineParams.renderpass = core::smart_refctd_ptr(renderpass); - gpuGraphicsPipeline2 = logicalDevice->createGraphicsPipeline(nullptr, std::move(graphicsPipelineParams)); - } - - const std::string captionData = "[Nabla Engine] Compute Shaders"; - window->setCaption(captionData); - - core::vectorSIMDf cameraPosition(0, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.001, 1000); - camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, -1), projectionMatrix, 10.f, 1.f); - lastTime = std::chrono::system_clock::now(); - for (size_t i = 0ull; i < NBL_FRAMES_TO_AVERAGE; ++i) - dtList[i] = 0.0; - } - - void onAppTerminated_impl() override - { - const auto& fboCreationParams = fbo->begin()[0]->getCreationParameters(); - auto gpuSourceImageView = fboCreationParams.attachments[0]; - - bool status = ext::ScreenShot::createScreenShot(logicalDevice.get(), - queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], - render_finished_sem.get(), - gpuSourceImageView.get(), - assetManager.get(), - "ScreenShot.png", - asset::IImage::EL_PRESENT_SRC, - asset::EAF_NONE); - - assert(status); - } - - void workLoopBody() override - { - auto renderStart = std::chrono::system_clock::now(); - const auto renderDt = std::chrono::duration_cast(renderStart - lastTime).count(); - lastTime = renderStart; - { // Calculate Simple Moving Average for FrameTime - time_sum -= dtList[frame_count]; - time_sum += renderDt; - dtList[frame_count] = renderDt; - frame_count++; - if (frame_count >= NBL_FRAMES_TO_AVERAGE) - frame_count = 0; - } - const double averageFrameTime = time_sum / (double)NBL_FRAMES_TO_AVERAGE; - -#ifdef NBL_MORE_LOGS - logger->log("renderDt = %f ------ averageFrameTime = %f", system::ILogger::ELL_INFO, renderDt, averageFrameTime); -#endif // NBL_MORE_LOGS - - auto averageFrameTimeDuration = std::chrono::duration(averageFrameTime); - auto nextPresentationTime = renderStart + averageFrameTimeDuration; - auto nextPresentationTimeStamp = std::chrono::duration_cast(nextPresentationTime.time_since_epoch()); - - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - camera.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const ui::IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const ui::IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); eventReceiver.process(events); }, logger.get()); - camera.endInputProcessing(nextPresentationTimeStamp); - - const auto& viewMatrix = camera.getViewMatrix(); - const auto& viewProjectionMatrix = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - camera.getConcatenatedMatrix() - ); - - auto& commandBuffer = commandBuffers[0]; - commandBuffer->reset(nbl::video::IGPUCommandBuffer::ERF_RELEASE_RESOURCES_BIT); - commandBuffer->begin(video::IGPUCommandBuffer::EU_ONE_TIME_SUBMIT_BIT); // TODO: Reset Frame's CommandPool - - asset::SViewport viewport; - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - commandBuffer->setViewport(0u, 1u, &viewport); - - nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; - VkRect2D area; - area.offset = { 0,0 }; - area.extent = { WIN_W, WIN_H }; - nbl::asset::SClearValue clear[2]; - clear[0].color.float32[0] = 0.f; - clear[0].color.float32[1] = 0.f; - clear[0].color.float32[2] = 0.f; - clear[0].color.float32[3] = 0.f; - clear[1].depthStencil.depth = 0.f; - - beginInfo.clearValueCount = 2u; - beginInfo.framebuffer = fbo->begin()[0]; - beginInfo.renderpass = renderpass; - beginInfo.renderArea = area; - beginInfo.clearValues = clear; - - commandBuffer->beginRenderPass(&beginInfo, nbl::asset::ESC_INLINE); - - pushConstants.isXPressed = eventReceiver.isXPressed(); - pushConstants.isZPressed = eventReceiver.isZPressed(); - pushConstants.isCPressed = eventReceiver.isCPressed(); - pushConstants.currentUserAbsolutePosition = camera.getPosition().getAsVector3df(); - - /* - Calculation of particle postitions takes place here - */ - - commandBuffer->bindComputePipeline(gpuComputePipeline.get()); - commandBuffer->pushConstants(gpuComputePipeline->getLayout(), asset::IShader::ESS_COMPUTE, 0, sizeof(SPushConstants), &pushConstants); - commandBuffer->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 0, 1, &gpuCDescriptorSet.get(), 0u); - - static_assert(NUMBER_OF_PARTICLES % WORK_GROUP_SIZE == 0, "Inccorect amount!"); - _NBL_STATIC_INLINE_CONSTEXPR size_t groupCountX = NUMBER_OF_PARTICLES / WORK_GROUP_SIZE; - - commandBuffer->dispatch(groupCountX, 1, 1); - - /* - After calculation of positions each particle gets displayed - */ - - core::matrix3x4SIMD modelMatrix; - modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); - - core::matrix4SIMD mvp = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); - - SBasicViewParameters uboData; - memcpy(uboData.MV, viewMatrix.pointer(), sizeof(uboData.MV)); - memcpy(uboData.MVP, mvp.pointer(), sizeof(uboData.MVP)); - memcpy(uboData.NormalMat, viewMatrix.pointer(), sizeof(uboData.NormalMat)); - commandBuffer->updateBuffer(gpuUBO.get(), 0ull, sizeof(uboData), &uboData); - - /* - Draw particles - */ - - commandBuffer->bindGraphicsPipeline(gpuGraphicsPipeline.get()); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuMeshBuffer->getPipeline()->getLayout(), 1u, 1u, &gpuGDescriptorSet1.get(), 0u); - commandBuffer->drawMeshBuffer(gpuMeshBuffer.get()); - - /* - Draw extras with geometry usage under key c and v conditions - */ - - commandBuffer->bindGraphicsPipeline(gpuGraphicsPipeline2.get()); - commandBuffer->pushConstants(gpuMeshBuffer2->getPipeline()->getLayout(), asset::IShader::ESS_GEOMETRY, 0, sizeof(SPushConstants), &pushConstants); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuMeshBuffer2->getPipeline()->getLayout(), 1u, 1u, &gpuGDescriptorSet1.get(), 0u); - commandBuffer->drawMeshBuffer(gpuMeshBuffer2.get()); - - commandBuffer->endRenderPass(); - commandBuffer->end(); - - auto img_acq_sem = logicalDevice->createSemaphore(); - render_finished_sem = logicalDevice->createSemaphore(); - - uint32_t imgnum = 0u; - constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; // ns - swapchain->acquireNextImage(MAX_TIMEOUT, img_acq_sem.get(), nullptr, &imgnum); - - CommonAPI::Submit(logicalDevice.get(), commandBuffer.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], img_acq_sem.get(), render_finished_sem.get()); - CommonAPI::Present(logicalDevice.get(), swapchain.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], render_finished_sem.get(), imgnum); - } - - bool keepRunning() override - { - return windowCallback->isWindowOpen(); - } -}; - -NBL_COMMON_API_MAIN(MeshLoadersApp, MeshLoadersApp::Nabla) diff --git a/53_ComputeShaders/pipeline.groovy b/53_ComputeShaders/pipeline.groovy deleted file mode 100644 index e8eb74b5b..000000000 --- a/53_ComputeShaders/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CComputeShadersBuilder extends IBuilder -{ - public CComputeShadersBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CComputeShadersBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/53_ComputeShaders/shaderCommon.glsl b/53_ComputeShaders/shaderCommon.glsl deleted file mode 100644 index 972a8789a..000000000 --- a/53_ComputeShaders/shaderCommon.glsl +++ /dev/null @@ -1,6 +0,0 @@ -layout(push_constant, row_major) uniform Block{ - bool isXPressed; - bool isZPressed; - bool isCPressed; - vec3 currentUserAbsolutePostion; -} pushConstants; \ No newline at end of file diff --git a/53_ComputeShaders/vertexShader.vert b/53_ComputeShaders/vertexShader.vert deleted file mode 100644 index 6b14d97c8..000000000 --- a/53_ComputeShaders/vertexShader.vert +++ /dev/null @@ -1,23 +0,0 @@ -#version 430 core - -layout(location = 0) in vec4 vPosition; -layout(location = 1) in vec4 vVelocity; -layout(location = 2) in vec4 vColor; - -#include -#include - -layout (set = 1, binding = 0, row_major, std140) uniform UBO -{ - nbl_glsl_SBasicViewParameters params; -} cameraData; - -layout(location = 0) flat out vec4 outGOrFFullyProjectedVelocity; -layout(location = 1) flat out vec4 outGorFColor; - -void main() -{ - gl_Position = (cameraData.params.MVP) * vPosition; - outGOrFFullyProjectedVelocity = (cameraData.params.MVP) * vVelocity * 0.0001; - outGorFColor = vColor; -} \ No newline at end of file diff --git a/56_RayQuery/CMakeLists.txt b/56_RayQuery/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/56_RayQuery/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/56_RayQuery/common.glsl b/56_RayQuery/common.glsl deleted file mode 100644 index ad88789f8..000000000 --- a/56_RayQuery/common.glsl +++ /dev/null @@ -1,793 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -// basic settings -#define MAX_DEPTH 15 -#define SAMPLES 32 - -// firefly and variance reduction techniques -//#define KILL_DIFFUSE_SPECULAR_PATHS -//#define VISUALIZE_HIGH_VARIANCE - -#define INVALID_ID_16BIT 0xffffu -struct Sphere -{ - vec3 position; - float radius2; - uint bsdfLightIDs; -}; - -layout(set=0, binding=0, rgba16f) uniform image2D outImage; - -layout(set = 2, binding = 0) uniform sampler2D envMap; -layout(set = 2, binding = 1) uniform usamplerBuffer sampleSequence; -layout(set = 2, binding = 2) uniform usampler2D scramblebuf; -layout(set = 2, binding = 3) uniform accelerationStructureEXT topLevelAS; -layout(set = 2, binding = 4) readonly restrict buffer InputBuffer -{ - Sphere spheres[]; -}; - -#ifndef _NBL_GLSL_WORKGROUP_SIZE_ -#define _NBL_GLSL_WORKGROUP_SIZE_ 16 -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=_NBL_GLSL_WORKGROUP_SIZE_, local_size_z=1) in; -#endif - -ivec2 getCoordinates() { - return ivec2(gl_GlobalInvocationID.xy); -} - -vec2 getTexCoords() { - ivec2 imageSize = imageSize(outImage); - ivec2 iCoords = getCoordinates(); - return vec2(float(iCoords.x) / imageSize.x, 1.0 - float(iCoords.y) / imageSize.y); -} - - -#include -#include -#include - -#include - -layout(set = 1, binding = 0, row_major, std140) uniform UBO -{ - nbl_glsl_SBasicViewParameters params; -} cameraData; - -Sphere Sphere_Sphere(in vec3 position, in float radius, in uint bsdfID, in uint lightID) -{ - Sphere sphere; - sphere.position = position; - sphere.radius2 = radius*radius; - sphere.bsdfLightIDs = bitfieldInsert(bsdfID,lightID,16,16); - return sphere; -} - -// return intersection distance if found, FLT_NAN otherwise -float Sphere_intersect(in Sphere sphere, in vec3 origin, in vec3 direction) -{ - vec3 relOrigin = origin-sphere.position; - float relOriginLen2 = dot(relOrigin,relOrigin); - const float radius2 = sphere.radius2; - - float dirDotRelOrigin = dot(direction,relOrigin); - float det = radius2-relOriginLen2+dirDotRelOrigin*dirDotRelOrigin; - - // do some speculative math here - float detsqrt = sqrt(det); - return -dirDotRelOrigin+(relOriginLen2>radius2 ? (-detsqrt):detsqrt); -} - -vec3 Sphere_getNormal(in Sphere sphere, in vec3 position) -{ - const float radiusRcp = inversesqrt(sphere.radius2); - return (position-sphere.position)*radiusRcp; -} - -float Sphere_getSolidAngle_impl(in float cosThetaMax) -{ - return 2.0*nbl_glsl_PI*(1.0-cosThetaMax); -} -float Sphere_getSolidAngle(in Sphere sphere, in vec3 origin) -{ - float cosThetaMax = sqrt(1.0-sphere.radius2/nbl_glsl_lengthSq(sphere.position-origin)); - return Sphere_getSolidAngle_impl(cosThetaMax); -} - -struct Triangle -{ - vec3 vertex0; - uint bsdfLightIDs; - vec3 vertex1; - uint padding0; - vec3 vertex2; - uint padding1; -}; - -Triangle Triangle_Triangle(in mat3 vertices, in uint bsdfID, in uint lightID) -{ - Triangle tri; - tri.vertex0 = vertices[0]; - tri.vertex1 = vertices[1]; - tri.vertex2 = vertices[2]; - // - tri.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); - return tri; -} - -// return intersection distance if found, FLT_NAN otherwise -float Triangle_intersect(in Triangle tri, in vec3 origin, in vec3 direction) -{ - const vec3 edges[2] = vec3[2](tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0); - - const vec3 h = cross(direction,edges[1]); - const float a = dot(edges[0],h); - - const vec3 relOrigin = origin-tri.vertex0; - - const float u = dot(relOrigin,h)/a; - - const vec3 q = cross(relOrigin,edges[0]); - const float v = dot(direction,q)/a; - - const float t = dot(edges[1],q)/a; - - return t>0.f&&u>=0.f&&v>=0.f&&(u+v)<=1.f ? t:nbl_glsl_FLT_NAN; -} - -vec3 Triangle_getNormalTimesArea_impl(in mat2x3 edges) -{ - return cross(edges[0],edges[1])*0.5; -} -vec3 Triangle_getNormalTimesArea(in Triangle tri) -{ - return Triangle_getNormalTimesArea_impl(mat2x3(tri.vertex1-tri.vertex0,tri.vertex2-tri.vertex0)); -} - - - -struct Rectangle -{ - vec3 offset; - uint bsdfLightIDs; - vec3 edge0; - uint padding0; - vec3 edge1; - uint padding1; -}; - -Rectangle Rectangle_Rectangle(in vec3 offset, in vec3 edge0, in vec3 edge1, in uint bsdfID, in uint lightID) -{ - Rectangle rect; - rect.offset = offset; - rect.edge0 = edge0; - rect.edge1 = edge1; - // - rect.bsdfLightIDs = bitfieldInsert(bsdfID, lightID, 16, 16); - return rect; -} - -// return intersection distance if found, FLT_NAN otherwise -float Rectangle_intersect(in Rectangle rect, in vec3 origin, in vec3 direction) -{ - const vec3 h = cross(direction,rect.edge1); - const float a = dot(rect.edge0,h); - - const vec3 relOrigin = origin-rect.offset; - - const float u = dot(relOrigin,h)/a; - - const vec3 q = cross(relOrigin,rect.edge0); - const float v = dot(direction,q)/a; - - const float t = dot(rect.edge1,q)/a; - - const bool intersection = t>0.f&&u>=0.f&&v>=0.f&&u<=1.f&&v<=1.f; - return intersection ? t:nbl_glsl_FLT_NAN; -} - -vec3 Rectangle_getNormalTimesArea(in Rectangle rect) -{ - return cross(rect.edge0,rect.edge1); -} - - - -#define DIFFUSE_OP 0u -#define CONDUCTOR_OP 1u -#define DIELECTRIC_OP 2u -#define OP_BITS_OFFSET 0 -#define OP_BITS_SIZE 2 -struct BSDFNode -{ - uvec4 data[2]; -}; - -uint BSDFNode_getType(in BSDFNode node) -{ - return bitfieldExtract(node.data[0].w,OP_BITS_OFFSET,OP_BITS_SIZE); -} -bool BSDFNode_isBSDF(in BSDFNode node) -{ - return BSDFNode_getType(node)==DIELECTRIC_OP; -} -bool BSDFNode_isNotDiffuse(in BSDFNode node) -{ - return BSDFNode_getType(node)!=DIFFUSE_OP; -} -float BSDFNode_getRoughness(in BSDFNode node) -{ - return uintBitsToFloat(node.data[1].w); -} -vec3 BSDFNode_getRealEta(in BSDFNode node) -{ - return uintBitsToFloat(node.data[0].rgb); -} -vec3 BSDFNode_getImaginaryEta(in BSDFNode node) -{ - return uintBitsToFloat(node.data[1].rgb); -} -mat2x3 BSDFNode_getEta(in BSDFNode node) -{ - return mat2x3(BSDFNode_getRealEta(node),BSDFNode_getImaginaryEta(node)); -} -#include -vec3 BSDFNode_getReflectance(in BSDFNode node, in float VdotH) -{ - const vec3 albedoOrRealIoR = uintBitsToFloat(node.data[0].rgb); - if (BSDFNode_isNotDiffuse(node)) - return nbl_glsl_fresnel_conductor(albedoOrRealIoR, BSDFNode_getImaginaryEta(node), VdotH); - else - return albedoOrRealIoR; -} - -float BSDFNode_getNEEProb(in BSDFNode bsdf) -{ - const float alpha = BSDFNode_isNotDiffuse(bsdf) ? BSDFNode_getRoughness(bsdf):1.0; - return min(8.0*alpha,1.0); -} - -#include -#include -float getLuma(in vec3 col) -{ - return dot(transpose(nbl_glsl_scRGBtoXYZ)[1],col); -} - -#define BSDF_COUNT 7 -BSDFNode bsdfs[BSDF_COUNT] = { - {{uvec4(floatBitsToUint(vec3(0.8,0.8,0.8)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(0.8,0.4,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(0.4,0.8,0.4)),DIFFUSE_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.02,1.3)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,1.0,2.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.0))}}, - {{uvec4(floatBitsToUint(vec3(1.02,1.3,1.02)),CONDUCTOR_OP),floatBitsToUint(vec4(1.0,2.0,1.0,0.15))}}, - {{uvec4(floatBitsToUint(vec3(1.4,1.45,1.5)),DIELECTRIC_OP),floatBitsToUint(vec4(0.0,0.0,0.0,0.0625))}} -}; - - -struct Light -{ - vec3 radiance; - uint objectID; -}; - -vec3 Light_getRadiance(in Light light) -{ - return light.radiance; -} -uint Light_getObjectID(in Light light) -{ - return light.objectID; -} - - -#define LIGHT_COUNT 1 -float scene_getLightChoicePdf(in Light light) -{ - return 1.0/float(LIGHT_COUNT); -} - - -#define LIGHT_COUNT 1 -Light lights[LIGHT_COUNT] = -{ - { - vec3(30.0,25.0,15.0), -#ifdef POLYGON_METHOD - 0u -#else - 8u -#endif - } -}; - - - -#define ANY_HIT_FLAG (-2147483648) -#define DEPTH_BITS_COUNT 8 -#define DEPTH_BITS_OFFSET (31-DEPTH_BITS_COUNT) -struct ImmutableRay_t -{ - vec3 origin; - vec3 direction; -#if POLYGON_METHOD==2 - vec3 normalAtOrigin; - bool wasBSDFAtOrigin; -#endif -}; -struct MutableRay_t -{ - float intersectionT; - uint objectID; - /* irrelevant here - uint triangleID; - vec2 barycentrics; - */ -}; -struct Payload_t -{ - vec3 accumulation; - float otherTechniqueHeuristic; - vec3 throughput; - #ifdef KILL_DIFFUSE_SPECULAR_PATHS - bool hasDiffuse; - #endif -}; - -struct Ray_t -{ - ImmutableRay_t _immutable; - MutableRay_t _mutable; - Payload_t _payload; -}; - - -#define INTERSECTION_ERROR_BOUND_LOG2 (-8.0) -float getTolerance_common(in uint depth) -{ - float depthRcp = 1.0/float(depth); - return INTERSECTION_ERROR_BOUND_LOG2;// *depthRcp*depthRcp; -} -float getStartTolerance(in uint depth) -{ - return exp2(getTolerance_common(depth)); -} -float getEndTolerance(in uint depth) -{ - return 1.0-exp2(getTolerance_common(depth)+1.0); -} - - -vec2 SampleSphericalMap(vec3 v) -{ - vec2 uv = vec2(atan(v.z, v.x), asin(v.y)); - uv *= nbl_glsl_RECIPROCAL_PI*0.5; - uv += 0.5; - return uv; -} - -void missProgram(in ImmutableRay_t _immutable, inout Payload_t _payload) -{ - vec3 finalContribution = _payload.throughput; - // #define USE_ENVMAP -#ifdef USE_ENVMAP - vec2 uv = SampleSphericalMap(_immutable.direction); - finalContribution *= textureLod(envMap, uv, 0.0).rgb; -#else - const vec3 kConstantEnvLightRadiance = vec3(0.15, 0.21, 0.3); - finalContribution *= kConstantEnvLightRadiance; -#endif - _payload.accumulation += finalContribution; -} - -#include -#include -#include -#include -#include -#include -#include -nbl_glsl_LightSample nbl_glsl_bsdf_cos_generate(in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in vec3 u, in BSDFNode bsdf, in float monochromeEta, out nbl_glsl_AnisotropicMicrofacetCache _cache) -{ - const float a = BSDFNode_getRoughness(bsdf); - const mat2x3 ior = BSDFNode_getEta(bsdf); - - // fresnel stuff for dielectrics - float orientedEta, rcpOrientedEta; - const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); - - nbl_glsl_LightSample smpl; - nbl_glsl_AnisotropicMicrofacetCache dummy; - switch (BSDFNode_getType(bsdf)) - { - case DIFFUSE_OP: - smpl = nbl_glsl_oren_nayar_cos_generate(interaction,u.xy,a*a); - break; - case CONDUCTOR_OP: - smpl = nbl_glsl_ggx_cos_generate(interaction,u.xy,a,a,_cache); - break; - default: - smpl = nbl_glsl_ggx_dielectric_cos_generate(interaction,u,a,a,monochromeEta,_cache); - break; - } - return smpl; -} - -vec3 nbl_glsl_bsdf_cos_remainder_and_pdf(out float pdf, in nbl_glsl_LightSample _sample, in nbl_glsl_AnisotropicViewSurfaceInteraction interaction, in BSDFNode bsdf, in float monochromeEta, in nbl_glsl_AnisotropicMicrofacetCache _cache) -{ - // are V and L on opposite sides of the surface? - const bool transmitted = nbl_glsl_isTransmissionPath(interaction.isotropic.NdotV,_sample.NdotL); - - // is the BSDF or BRDF, if it is then we make the dot products `abs` before `max(,0.0)` - const bool transmissive = BSDFNode_isBSDF(bsdf); - const float clampedNdotL = nbl_glsl_conditionalAbsOrMax(transmissive,_sample.NdotL,0.0); - const float clampedNdotV = nbl_glsl_conditionalAbsOrMax(transmissive,interaction.isotropic.NdotV,0.0); - - vec3 remainder; - - const float minimumProjVectorLen = 0.00000001; - if (clampedNdotV>minimumProjVectorLen && clampedNdotL>minimumProjVectorLen) - { - // fresnel stuff for conductors (but reflectance also doubles as albedo) - const mat2x3 ior = BSDFNode_getEta(bsdf); - const vec3 reflectance = BSDFNode_getReflectance(bsdf,_cache.isotropic.VdotH); - - // fresnel stuff for dielectrics - float orientedEta, rcpOrientedEta; - const bool viewerInsideMedium = nbl_glsl_getOrientedEtas(orientedEta,rcpOrientedEta,interaction.isotropic.NdotV,monochromeEta); - - // - const float VdotL = dot(interaction.isotropic.V.dir,_sample.L); - - // - const float a = max(BSDFNode_getRoughness(bsdf),0.0001); // TODO: @Crisspl 0-roughness still doesn't work! Also Beckmann has a weird dark rim instead as fresnel!? - const float a2 = a*a; - - // TODO: refactor into Material Compiler-esque thing - switch (BSDFNode_getType(bsdf)) - { - case DIFFUSE_OP: - remainder = reflectance*nbl_glsl_oren_nayar_cos_remainder_and_pdf_wo_clamps(pdf,a*a,VdotL,clampedNdotL,clampedNdotV); - break; - case CONDUCTOR_OP: - remainder = nbl_glsl_ggx_cos_remainder_and_pdf_wo_clamps(pdf,nbl_glsl_ggx_trowbridge_reitz(a2,_cache.isotropic.NdotH2),clampedNdotL,_sample.NdotL2,clampedNdotV,interaction.isotropic.NdotV_squared,reflectance,a2); - break; - default: - remainder = vec3(nbl_glsl_ggx_dielectric_cos_remainder_and_pdf(pdf, _sample, interaction.isotropic, _cache.isotropic, monochromeEta, a*a)); - break; - } - } - else - remainder = vec3(0.0); - return remainder; -} - -layout (constant_id = 0) const int MAX_DEPTH_LOG2 = 4; -layout (constant_id = 1) const int MAX_SAMPLES_LOG2 = 10; - - -#include - -mat2x3 rand3d(in uint protoDimension, in uint _sample, inout nbl_glsl_xoroshiro64star_state_t scramble_state) -{ - mat2x3 retval; - uint address = bitfieldInsert(protoDimension,_sample,MAX_DEPTH_LOG2,MAX_SAMPLES_LOG2); - for (int i=0; i<2u; i++) - { - uvec3 seqVal = texelFetch(sampleSequence,int(address)+i).xyz; - seqVal ^= uvec3(nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state),nbl_glsl_xoroshiro64star(scramble_state)); - retval[i] = vec3(seqVal)*uintBitsToFloat(0x2f800004u); - } - return retval; -} - - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction); -int traceRay(inout float intersectionT, in vec3 origin, in vec3 direction) -{ - int objectID = -1; - -#define USE_RAY_QUERY -#ifdef USE_RAY_QUERY - rayQueryEXT rayQuery; - rayQueryInitializeEXT(rayQuery, topLevelAS, gl_RayFlagsNoneEXT, 0xFF, origin, 0.0, direction, 1000.0); - - // Start traversal: return false if traversal is complete - while(rayQueryProceedEXT(rayQuery)) - { - if(rayQueryGetIntersectionTypeEXT(rayQuery, false) == gl_RayQueryCandidateIntersectionAABBEXT) - { - int id = rayQueryGetIntersectionPrimitiveIndexEXT(rayQuery, false); - float t = Sphere_intersect(spheres[id],origin,direction); - bool reportIntersection = (t != nbl_glsl_FLT_NAN && t > 0 && t < intersectionT); - if(reportIntersection) - { - intersectionT = t; - objectID = id; - rayQueryGenerateIntersectionEXT(rayQuery, t); - } - } - } -#else - for (int i=0; i0.0 && t0.0; - // but if we allowed non-watertight transmitters (single water surface), it would make sense just to apply this line by itself - nbl_glsl_AnisotropicMicrofacetCache _cache; - validPath = validPath && nbl_glsl_calcAnisotropicMicrofacetCache(_cache, interaction, nee_sample, monochromeEta); - if (validPath) - { - float bsdfPdf; - neeContrib *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,nee_sample,interaction,bsdf,monochromeEta,_cache)*throughput; - const float oc = bsdfPdf*rcpChoiceProb; - neeContrib /= 1.0/oc+oc/(lightPdf*lightPdf); // MIS weight - if (bsdfPdflumaContributionThreshold && traceRay(t,intersection+nee_sample.L*t*getStartTolerance(depth),nee_sample.L)==-1) - ray._payload.accumulation += neeContrib; - } - } - - // sample BSDF - float bsdfPdf; vec3 bsdfSampleL; - { - nbl_glsl_AnisotropicMicrofacetCache _cache; - nbl_glsl_LightSample bsdf_sample = nbl_glsl_bsdf_cos_generate(interaction,epsilon[1],bsdf,monochromeEta,_cache); - // the value of the bsdf divided by the probability of the sample being generated - throughput *= nbl_glsl_bsdf_cos_remainder_and_pdf(bsdfPdf,bsdf_sample,interaction,bsdf,monochromeEta,_cache); - // - bsdfSampleL = bsdf_sample.L; - } - - // additional threshold - const float lumaThroughputThreshold = lumaContributionThreshold; - if (bsdfPdf>bsdfPdfThreshold && getLuma(throughput)>lumaThroughputThreshold) - { - ray._payload.throughput = throughput; - ray._payload.otherTechniqueHeuristic = neeProbability/bsdfPdf; // numerically stable, don't touch - ray._payload.otherTechniqueHeuristic *= ray._payload.otherTechniqueHeuristic; - - // trace new ray - ray._immutable.origin = intersection+bsdfSampleL*(1.0/*kSceneSize*/)*getStartTolerance(depth); - ray._immutable.direction = bsdfSampleL; - #if POLYGON_METHOD==2 - ray._immutable.normalAtOrigin = interaction.isotropic.N; - ray._immutable.wasBSDFAtOrigin = isBSDF; - #endif - return true; - } - } - return false; -} - -void main() -{ - const ivec2 coords = getCoordinates(); - const vec2 texCoord = getTexCoords(); - - if (false == (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageSize(outImage),coords)))) { - return; - } - - if (((MAX_DEPTH-1)>>MAX_DEPTH_LOG2)>0 || ((SAMPLES-1)>>MAX_SAMPLES_LOG2)>0) - { - vec4 pixelCol = vec4(1.0,0.0,0.0,1.0); - imageStore(outImage, coords, pixelCol); - return; - } - - nbl_glsl_xoroshiro64star_state_t scramble_start_state = texelFetch(scramblebuf,coords,0).rg; - const vec2 pixOffsetParam = vec2(1.0)/vec2(textureSize(scramblebuf,0)); - - - const mat4 invMVP = inverse(cameraData.params.MVP); - - vec4 NDC = vec4(texCoord*vec2(2.0,-2.0)+vec2(-1.0,1.0),0.0,1.0); - vec3 camPos; - { - vec4 tmp = invMVP*NDC; - camPos = tmp.xyz/tmp.w; - NDC.z = 1.0; - } - - vec3 color = vec3(0.0); - float meanLumaSquared = 0.0; - // TODO: if we collapse the nested for loop, then all GPUs will get `MAX_DEPTH` factor speedup, not just NV with separate PC - for (int i=0; i5.0) - color = vec3(1.0,0.0,0.0); - #endif - - vec4 pixelCol = vec4(color, 1.0); - imageStore(outImage, coords, pixelCol); -} -/** TODO: Improving Rendering - -Now: -- Always MIS (path correlated reuse) -- Test MIS alpha (roughness) scheme - -Many Lights: -- Path Guiding -- Light Importance Lists/Classification -- Spatio-Temporal Reservoir Sampling - -Indirect Light: -- Bidirectional Path Tracing -- Uniform Path Sampling / Vertex Connection and Merging / Path Space Regularization - -Animations: -- A-SVGF / BMFR -**/ \ No newline at end of file diff --git a/56_RayQuery/config.json.template b/56_RayQuery/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/56_RayQuery/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/56_RayQuery/litByRectangle.comp b/56_RayQuery/litByRectangle.comp deleted file mode 100644 index 829d03398..000000000 --- a/56_RayQuery/litByRectangle.comp +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 460 core -#extension GL_GOOGLE_include_directive : require -#extension GL_EXT_ray_query : enable - -#define SPHERE_COUNT 8 -#define POLYGON_METHOD 0 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling -#include "common.glsl" - - -#define RECTANGLE_COUNT 1 -const vec3 edge0 = normalize(vec3(2,0,-1)); -const vec3 edge1 = normalize(vec3(2,-5,4)); -Rectangle rectangles[RECTANGLE_COUNT] = { - Rectangle_Rectangle(vec3(-3.8,0.35,1.3),edge0*7.0,edge1*0.1,INVALID_ID_16BIT,0u) -}; - - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) -{ - for (int i=0; i0.0 && t -float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) -{ - const Rectangle rect = rectangles[Light_getObjectID(light)]; - - const vec3 L = ray._immutable.direction; -#if POLYGON_METHOD==0 - const float dist = ray._mutable.intersectionT; - return dist*dist/abs(dot(Rectangle_getNormalTimesArea(rect),L)); -#else - const ImmutableRay_t _immutable = ray._immutable; - const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); - #if POLYGON_METHOD==1 - const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); - // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 - return rcpProb>FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; - #elif POLYGON_METHOD==2 - const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); - // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small - return pdfFLT_MIN ? (1.0/rcpPdf):0.0; - - const vec3 N = Triangle_getNormalTimesArea(tri); - newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); - return L; -#endif -} - - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - if (objectID0.0) - { - const float rcpDistance = inversesqrt(distanceSQ); - Z *= rcpDistance; - - const float cosThetaMax = sqrt(cosThetaMax2); - const float cosTheta = mix(1.0,cosThetaMax,xi.x); - - vec3 L = Z*cosTheta; - - const float cosTheta2 = cosTheta*cosTheta; - const float sinTheta = sqrt(1.0-cosTheta2); - float sinPhi,cosPhi; - nbl_glsl_sincos(2.0*nbl_glsl_PI*xi.y-nbl_glsl_PI,sinPhi,cosPhi); - mat2x3 XY = nbl_glsl_frisvad(Z); - - L += (XY[0]*cosPhi+XY[1]*sinPhi)*sinTheta; - - newRayMaxT = (cosTheta-sqrt(cosTheta2-cosThetaMax2))/rcpDistance; - pdf = 1.0/Sphere_getSolidAngle_impl(cosThetaMax); - return L; - } - pdf = 0.0; - return vec3(0.0,0.0,0.0); -} - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - Sphere sphere = spheres[objectID]; - normal = Sphere_getNormal(sphere,intersection); - return sphere.bsdfLightIDs; -} \ No newline at end of file diff --git a/56_RayQuery/litByTriangle.comp b/56_RayQuery/litByTriangle.comp deleted file mode 100644 index 1cd1d3ee3..000000000 --- a/56_RayQuery/litByTriangle.comp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#version 460 core -#extension GL_GOOGLE_include_directive : require -#extension GL_EXT_ray_query : enable - -#define SPHERE_COUNT 8 -#define POLYGON_METHOD 0 // 0 area sampling, 1 solid angle sampling, 2 approximate projected solid angle sampling -#include "common.glsl" - -#define TRIANGLE_COUNT 1 -Triangle triangles[TRIANGLE_COUNT] = { - Triangle_Triangle(mat3(vec3(-1.8,0.35,0.3),vec3(-1.2,0.35,0.0),vec3(-1.5,0.8,-0.3)),INVALID_ID_16BIT,0u) -}; - -void traceRay_extraShape(inout int objectID, inout float intersectionT, in vec3 origin, in vec3 direction) -{ - for (int i=0; i0.0 && t -float nbl_glsl_light_deferred_pdf(in Light light, in Ray_t ray) -{ - const Triangle tri = triangles[Light_getObjectID(light)]; - - const vec3 L = ray._immutable.direction; -#if POLYGON_METHOD==0 - const float dist = ray._mutable.intersectionT; - return dist*dist/abs(dot(Triangle_getNormalTimesArea(tri),L)); -#else - const ImmutableRay_t _immutable = ray._immutable; - const mat3 sphericalVertices = nbl_glsl_shapes_getSphericalTriangle(mat3(tri.vertex0,tri.vertex1,tri.vertex2),_immutable.origin); - #if POLYGON_METHOD==1 - const float rcpProb = nbl_glsl_shapes_SolidAngleOfTriangle(sphericalVertices); - // if `rcpProb` is NAN then the triangle's solid angle was close to 0.0 - return rcpProb>FLT_MIN ? (1.0/rcpProb):nbl_glsl_FLT_MAX; - #elif POLYGON_METHOD==2 - const float pdf = nbl_glsl_sampling_probProjectedSphericalTriangleSample(sphericalVertices,_immutable.normalAtOrigin,_immutable.wasBSDFAtOrigin,L); - // if `pdf` is NAN then the triangle's projected solid angle was close to 0.0, if its close to INF then the triangle was very small - return pdfFLT_MIN ? (1.0/rcpPdf):0.0; - - const vec3 N = Triangle_getNormalTimesArea(tri); - newRayMaxT = dot(N,tri.vertex0-origin)/dot(N,L); - return L; -#endif -} - - -uint getBSDFLightIDAndDetermineNormal(out vec3 normal, in uint objectID, in vec3 intersection) -{ - if (objectID - -#include "../common/CommonAPI.h" -#include "CCamera.hpp" -#include "nbl/ext/ScreenShot/ScreenShot.h" -#include "nbl/video/utilities/CDumbPresentationOracle.h" - -using namespace nbl; -using namespace core; -using namespace ui; - - -using namespace nbl; -using namespace core; -using namespace asset; -using namespace video; - -smart_refctd_ptr createHDRImageView(nbl::core::smart_refctd_ptr device, asset::E_FORMAT colorFormat, uint32_t width, uint32_t height) -{ - smart_refctd_ptr gpuImageViewColorBuffer; - { - IGPUImage::SCreationParams imgInfo; - imgInfo.format = colorFormat; - imgInfo.type = IGPUImage::ET_2D; - imgInfo.extent.width = width; - imgInfo.extent.height = height; - imgInfo.extent.depth = 1u; - imgInfo.mipLevels = 1u; - imgInfo.arrayLayers = 1u; - imgInfo.samples = asset::ICPUImage::ESCF_1_BIT; - imgInfo.flags = static_cast(0u); - imgInfo.usage = core::bitflag(asset::IImage::EUF_STORAGE_BIT) | asset::IImage::EUF_TRANSFER_SRC_BIT; - - // (Erfan -> Cyprian) - // auto image = device->createGPUImageOnDedMem(std::move(imgInfo),device->getDeviceLocalGPUMemoryReqs()); - auto image = device->createImage(std::move(imgInfo)); - auto imageMemoryReqs = image->getMemoryReqs(); - imageMemoryReqs.memoryTypeBits &= device->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); // getDeviceLocalMemoryTypeBits because of previous code getDeviceLocalGPUMemoryReqs - auto imageMem = device->allocate(imageMemoryReqs, image.get()); - - IGPUImageView::SCreationParams imgViewInfo; - imgViewInfo.image = std::move(image); - imgViewInfo.format = colorFormat; - imgViewInfo.viewType = IGPUImageView::ET_2D; - imgViewInfo.flags = static_cast(0u); - imgViewInfo.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - imgViewInfo.subresourceRange.baseArrayLayer = 0u; - imgViewInfo.subresourceRange.baseMipLevel = 0u; - imgViewInfo.subresourceRange.layerCount = 1u; - imgViewInfo.subresourceRange.levelCount = 1u; - - gpuImageViewColorBuffer = device->createImageView(std::move(imgViewInfo)); - } - - return gpuImageViewColorBuffer; -} - -struct ShaderParameters -{ - const uint32_t MaxDepthLog2 = 4; //5 - const uint32_t MaxSamplesLog2 = 10; //18 -} kShaderParameters; - -enum E_LIGHT_GEOMETRY -{ - ELG_SPHERE, - ELG_TRIANGLE, - ELG_RECTANGLE -}; - -struct DispatchInfo_t -{ - uint32_t workGroupCount[3]; -}; - -_NBL_STATIC_INLINE_CONSTEXPR uint32_t DEFAULT_WORK_GROUP_SIZE = 16u; - -DispatchInfo_t getDispatchInfo(uint32_t imgWidth, uint32_t imgHeight) { - DispatchInfo_t ret = {}; - ret.workGroupCount[0] = (uint32_t)core::ceil((float)imgWidth / (float)DEFAULT_WORK_GROUP_SIZE); - ret.workGroupCount[1] = (uint32_t)core::ceil((float)imgHeight / (float)DEFAULT_WORK_GROUP_SIZE); - ret.workGroupCount[2] = 1; - return ret; -} - -class RayQuerySampleApp : public ApplicationBase -{ - constexpr static uint32_t WIN_W = 1280u; - constexpr static uint32_t WIN_H = 720u; - constexpr static uint32_t FRAMES_IN_FLIGHT = 5u; - static constexpr uint64_t MAX_TIMEOUT = 99999999999999ull; - - core::smart_refctd_ptr windowManager; - core::smart_refctd_ptr window; - core::smart_refctd_ptr windowCb; - core::smart_refctd_ptr apiConnection; - core::smart_refctd_ptr surface; - core::smart_refctd_ptr utilities; - core::smart_refctd_ptr logicalDevice; - video::IPhysicalDevice* physicalDevice; - std::array queues; - core::smart_refctd_ptr swapchain; - core::smart_refctd_ptr renderpass; - core::smart_refctd_dynamic_array> fbos; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; - core::smart_refctd_ptr system; - core::smart_refctd_ptr assetManager; - video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - core::smart_refctd_ptr logger; - core::smart_refctd_ptr inputSystem; - video::IGPUObjectFromAssetConverter cpu2gpu; - - int32_t m_resourceIx = -1; - uint32_t m_acquiredNextFBO = {}; - - CDumbPresentationOracle oracle; - - // polling for events! - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - - core::smart_refctd_ptr frameUploadDataCompleteFence[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr frameUploadDataCompleteSemaphore[FRAMES_IN_FLIGHT] = { nullptr }; - - core::smart_refctd_ptr cmdbuf[FRAMES_IN_FLIGHT]; // from graphics - - Camera cam; - - core::smart_refctd_ptr gpuubo = nullptr; - core::smart_refctd_ptr gpuEnvmapImageView = nullptr; - core::smart_refctd_ptr gpuScrambleImageView; - - core::smart_refctd_ptr gpuComputePipeline = nullptr; - DispatchInfo_t dispatchInfo = {}; - - core::smart_refctd_ptr outHDRImageViews[CommonAPI::InitOutput::MaxSwapChainImageCount] = {}; - - core::smart_refctd_ptr descriptorSets0[CommonAPI::InitOutput::MaxSwapChainImageCount] = {}; - core::smart_refctd_ptr descriptorSet2 = nullptr; - core::smart_refctd_ptr uboDescriptorSet1 = nullptr; - - core::smart_refctd_ptr aabbsBuffer = nullptr; - core::smart_refctd_ptr gpuBlas = nullptr; - core::smart_refctd_ptr gpuBlas2 = nullptr; // Built via CPUObject To GPUObject operations and utility - core::smart_refctd_ptr gpuTlas = nullptr; - core::smart_refctd_ptr instancesBuffer = nullptr; - - core::smart_refctd_ptr gpuSequenceBufferView = nullptr; - - core::smart_refctd_ptr sampler0 = nullptr; - core::smart_refctd_ptr sampler1 = nullptr; - - core::smart_refctd_ptr gpuSequenceBuffer = nullptr; - - core::smart_refctd_ptr spheresBuffer = nullptr; - - struct SBasicViewParametersAligned - { - SBasicViewParameters uboData; - }; - -public: - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - void setSystem(core::smart_refctd_ptr&& system) override - { - system = std::move(system); - } - - APP_CONSTRUCTOR(RayQuerySampleApp); - - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT | asset::IImage::EUF_TRANSFER_DST_BIT | asset::IImage::EUF_TRANSFER_SRC_BIT); - - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = 2u; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithRaytracingExt(std::move(initParams)); - - system = std::move(initOutput.system); - window = std::move(initParams.window); - windowCb = std::move(initParams.windowCb); - apiConnection = std::move(initOutput.apiConnection); - surface = std::move(initOutput.surface); - physicalDevice = std::move(initOutput.physicalDevice); - logicalDevice = std::move(initOutput.logicalDevice); - utilities = std::move(initOutput.utilities); - queues = std::move(initOutput.queues); - renderpass = std::move(initOutput.renderToSwapchainRenderpass); - commandPools = std::move(initOutput.commandPools); - assetManager = std::move(initOutput.assetManager); - cpu2gpuParams = std::move(initOutput.cpu2gpuParams); - logger = std::move(initOutput.logger); - inputSystem = std::move(initOutput.inputSystem); - - CommonAPI::createSwapchain(std::move(logicalDevice), initOutput.swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbos = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - logicalDevice, swapchain, renderpass, - asset::EF_D32_SFLOAT - ); - auto graphicsQueue = queues[CommonAPI::InitOutput::EQT_GRAPHICS]; - auto computeQueue = queues[CommonAPI::InitOutput::EQT_GRAPHICS]; - auto graphicsCommandPools = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]; - auto computeCommandPools = commandPools[CommonAPI::InitOutput::EQT_COMPUTE]; - - video::IGPUObjectFromAssetConverter cpu2gpu; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - logicalDevice->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, cmdbuf+i); - - core::smart_refctd_ptr descriptorPool = nullptr; - { - video::IDescriptorPool::SCreateInfo createInfo = {}; - createInfo.maxSets = CommonAPI::InitOutput::MaxSwapChainImageCount+2; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER)] = 1; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE)] = CommonAPI::InitOutput::MaxSwapChainImageCount; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)] = 2; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER)] = 1; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] = 1; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE)] = 1; - - descriptorPool = logicalDevice->createDescriptorPool(std::move(createInfo)); - } - - // Initialize Spheres - constexpr uint32_t SphereCount = 9u; - constexpr uint32_t INVALID_ID_16BIT = 0xffffu; - - struct alignas(16) Sphere - { - Sphere() - : position(0.0f, 0.0f, 0.0f) - , radius2(0.0f) - { - bsdfLightIDs = core::bitfieldInsert(0u,INVALID_ID_16BIT,16,16); - } - - Sphere(core::vector3df _position, float _radius, uint32_t _bsdfID, uint32_t _lightID) - { - position = _position; - radius2 = _radius*_radius; - bsdfLightIDs = core::bitfieldInsert(_bsdfID,_lightID,16,16); - } - - IGPUAccelerationStructure::AABB_Position getAABB() const - { - float radius = core::sqrt(radius2); - return IGPUAccelerationStructure::AABB_Position(position-core::vector3df(radius, radius, radius), position+core::vector3df(radius, radius, radius)); - } - - core::vector3df position; - float radius2; - uint32_t bsdfLightIDs; - }; - - Sphere spheres[SphereCount] = {}; - spheres[0] = Sphere(core::vector3df(0.0,-100.5,-1.0), 100.0, 0u, INVALID_ID_16BIT); - spheres[1] = Sphere(core::vector3df(3.0,0.0,-1.0), 0.5, 1u, INVALID_ID_16BIT); - spheres[2] = Sphere(core::vector3df(0.0,0.0,-1.0), 0.5, 2u, INVALID_ID_16BIT); - spheres[3] = Sphere(core::vector3df(-3.0,0.0,-1.0), 0.5, 3u, INVALID_ID_16BIT); - spheres[4] = Sphere(core::vector3df(3.0,0.0,1.0), 0.5, 4u, INVALID_ID_16BIT); - spheres[5] = Sphere(core::vector3df(0.0,0.0,1.0), 0.5, 4u, INVALID_ID_16BIT); - spheres[6] = Sphere(core::vector3df(-3.0,0.0,1.0), 0.5, 5u, INVALID_ID_16BIT); - spheres[7] = Sphere(core::vector3df(0.5,1.0,0.5), 0.5, 6u, INVALID_ID_16BIT); - spheres[8] = Sphere(core::vector3df(-1.5,1.5,0.0), 0.3, INVALID_ID_16BIT, 0u); - - // Create Spheres Buffer - uint32_t spheresBufferSize = sizeof(Sphere) * SphereCount; - - { - IGPUBuffer::SCreationParams params = {}; - params.size = spheresBufferSize; // (Erfan->Cyprian) See How I moved "createDeviceLocalGPUBufferOnDedMem" second parameter to params.size? IGPUBuffer::SCreationParams::size is very important to be filled unlike before - params.usage = core::bitflag(asset::IBuffer::EUF_STORAGE_BUFFER_BIT) | asset::IBuffer::EUF_TRANSFER_DST_BIT; - spheresBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = spheresBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); // (Erfan->Cyprian) I used `getDeviceLocalMemoryTypeBits` because of previous createDeviceLocalGPUBufferOnDedMem (Focus on DeviceLocal Part) - auto spheresBufferMem = logicalDevice->allocate(bufferReqs, spheresBuffer.get()); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,spheresBufferSize,spheresBuffer}, spheres, graphicsQueue); - } - -#define TEST_CPU_2_GPU_BLAS -#ifdef TEST_CPU_2_GPU_BLAS - // Acceleration Structure Test - // Create + Build BLAS (CPU2GPU Version) - { - struct AABB { - IGPUAccelerationStructure::AABB_Position aabb; - }; - const uint32_t aabbsCount = SphereCount / 2u; - uint32_t aabbsBufferSize = sizeof(AABB) * aabbsCount; - - AABB aabbs[aabbsCount] = {}; - for(uint32_t i = 0; i < aabbsCount; ++i) - { - aabbs[i].aabb = spheres[i].getAABB(); - } - - // auto raytracingFlags = core::bitflag(asset::IBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT) | asset::IBuffer::EUF_STORAGE_BUFFER_BIT; - // | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT - core::smart_refctd_ptr aabbsBuffer = ICPUBuffer::create({ aabbsBufferSize }); - memcpy(aabbsBuffer->getPointer(), aabbs, aabbsBufferSize); - - ICPUAccelerationStructure::SCreationParams asCreateParams; - asCreateParams.type = ICPUAccelerationStructure::ET_BOTTOM_LEVEL; - asCreateParams.flags = ICPUAccelerationStructure::ECF_NONE; - core::smart_refctd_ptr cpuBlas = ICPUAccelerationStructure::create(std::move(asCreateParams)); - - using HostGeom = ICPUAccelerationStructure::HostBuildGeometryInfo::Geom; - core::smart_refctd_dynamic_array geometries = core::make_refctd_dynamic_array>(1u); - - HostGeom & simpleGeom = geometries->operator[](0u); - simpleGeom.type = IAccelerationStructure::EGT_AABBS; - simpleGeom.flags = IAccelerationStructure::EGF_OPAQUE_BIT; - simpleGeom.data.aabbs.data.offset = 0u; - simpleGeom.data.aabbs.data.buffer = aabbsBuffer; - simpleGeom.data.aabbs.stride = sizeof(AABB); - - ICPUAccelerationStructure::HostBuildGeometryInfo buildInfo; - buildInfo.type = asCreateParams.type; - buildInfo.buildFlags = ICPUAccelerationStructure::EBF_PREFER_FAST_TRACE_BIT; - buildInfo.buildMode = ICPUAccelerationStructure::EBM_BUILD; - buildInfo.geometries = geometries; - - core::smart_refctd_dynamic_array buildRangeInfos = core::make_refctd_dynamic_array>(1u); - ICPUAccelerationStructure::BuildRangeInfo & firstBuildRangeInfo = buildRangeInfos->operator[](0u); - firstBuildRangeInfo.primitiveCount = aabbsCount; - firstBuildRangeInfo.primitiveOffset = 0u; - firstBuildRangeInfo.firstVertex = 0u; - firstBuildRangeInfo.transformOffset = 0u; - - cpuBlas->setBuildInfoAndRanges(std::move(buildInfo), buildRangeInfos); - - // Build BLAS - { - cpu2gpuParams.beginCommandBuffers(); - gpuBlas2 = cpu2gpu.getGPUObjectsFromAssets(&cpuBlas, &cpuBlas + 1u, cpu2gpuParams)->front(); - cpu2gpuParams.waitForCreationToComplete(); - } - } -#endif - - // Create + Build BLAS - { - // Build BLAS with AABBS - const uint32_t aabbsCount = SphereCount; - - struct AABB { - IGPUAccelerationStructure::AABB_Position aabb; - }; - - AABB aabbs[aabbsCount] = {}; - for(uint32_t i = 0; i < aabbsCount; ++i) - { - aabbs[i].aabb = spheres[i].getAABB(); - } - auto raytracingFlags = core::bitflag(asset::IBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT) | asset::IBuffer::EUF_STORAGE_BUFFER_BIT; - uint32_t aabbsBufferSize = sizeof(AABB) * aabbsCount; - - { - IGPUBuffer::SCreationParams params = {}; - params.size = aabbsBufferSize; - params.usage = raytracingFlags | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - aabbsBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = aabbsBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto aabbBufferMem = logicalDevice->allocate(bufferReqs, aabbsBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - // (Erfan->Cyprian) -> I passed `IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT` as a third parameter to the allocate function because the buffer needs the usage `EUF_SHADER_DEVICE_ADDRESS_BIT` - // You don't have to worry about it, it's only used in this example - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,aabbsBufferSize,aabbsBuffer}, aabbs, graphicsQueue); - } - - using DeviceGeom = IGPUAccelerationStructure::DeviceBuildGeometryInfo::Geometry; - - DeviceGeom simpleGeom = {}; - simpleGeom.type = IAccelerationStructure::EGT_AABBS; - simpleGeom.flags = IAccelerationStructure::EGF_OPAQUE_BIT; - simpleGeom.data.aabbs.data.offset = 0u; - simpleGeom.data.aabbs.data.buffer = aabbsBuffer; - simpleGeom.data.aabbs.stride = sizeof(AABB); - - IGPUAccelerationStructure::DeviceBuildGeometryInfo blasBuildInfo = {}; - blasBuildInfo.type = IGPUAccelerationStructure::ET_BOTTOM_LEVEL; - blasBuildInfo.buildFlags = IGPUAccelerationStructure::EBF_PREFER_FAST_TRACE_BIT; - blasBuildInfo.buildMode = IGPUAccelerationStructure::EBM_BUILD; - blasBuildInfo.srcAS = nullptr; - blasBuildInfo.dstAS = nullptr; - blasBuildInfo.geometries = core::SRange(&simpleGeom, &simpleGeom + 1u); - blasBuildInfo.scratchAddr = {}; - - // Get BuildSizes - IGPUAccelerationStructure::BuildSizes buildSizes = {}; - { - std::vector maxPrimCount(1u); - maxPrimCount[0] = aabbsCount; - buildSizes = logicalDevice->getAccelerationStructureBuildSizes(blasBuildInfo, maxPrimCount.data()); - } - - { - core::smart_refctd_ptr asBuffer; - IGPUBuffer::SCreationParams params = {}; - params.size = buildSizes.accelerationStructureSize; - params.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | asset::IBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; - asBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = asBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto asBufferMem = logicalDevice->allocate(bufferReqs, asBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - - IGPUAccelerationStructure::SCreationParams blasParams = {}; - blasParams.type = IGPUAccelerationStructure::ET_BOTTOM_LEVEL; - blasParams.flags = IGPUAccelerationStructure::ECF_NONE; - blasParams.bufferRange.buffer = asBuffer; - blasParams.bufferRange.offset = 0u; - blasParams.bufferRange.size = buildSizes.accelerationStructureSize; - gpuBlas = logicalDevice->createAccelerationStructure(std::move(blasParams)); - } - - // Allocate ScratchBuffer - core::smart_refctd_ptr scratchBuffer; - { - IGPUBuffer::SCreationParams params = {}; - params.size = buildSizes.buildScratchSize; - params.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | asset::IBuffer::EUF_STORAGE_BUFFER_BIT; - scratchBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = scratchBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto scratchBufferMem = logicalDevice->allocate(bufferReqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - } - - // Complete BLAS Build Info - { - blasBuildInfo.dstAS = gpuBlas.get(); - blasBuildInfo.scratchAddr.buffer = scratchBuffer; - blasBuildInfo.scratchAddr.offset = 0u; - } - - IGPUAccelerationStructure::BuildRangeInfo firstBuildRangeInfos[1u]; - firstBuildRangeInfos[0].primitiveCount = aabbsCount; - firstBuildRangeInfos[0].primitiveOffset = 0u; - firstBuildRangeInfos[0].firstVertex = 0u; - firstBuildRangeInfos[0].transformOffset = 0u; - IGPUAccelerationStructure::BuildRangeInfo* pRangeInfos[1u]; - pRangeInfos[0] = firstBuildRangeInfos; - // pRangeInfos[1] = &secondBuildRangeInfos; - - // Build BLAS - { - utilities->buildAccelerationStructures(computeQueue, core::SRange(&blasBuildInfo, &blasBuildInfo + 1u), pRangeInfos); - } - } - - // Create + Build TLAS - { - struct Instance { - IGPUAccelerationStructure::Instance instance; - }; - - const uint32_t instancesCount = 1u; - Instance instances[instancesCount] = {}; - core::matrix3x4SIMD identity; - instances[0].instance.mat = identity; - instances[0].instance.instanceCustomIndex = 0u; - instances[0].instance.mask = 0xFF; - instances[0].instance.instanceShaderBindingTableRecordOffset = 0u; - instances[0].instance.flags = IAccelerationStructure::EIF_TRIANGLE_FACING_CULL_DISABLE_BIT; -#ifdef TEST_CPU_2_GPU_BLAS - instances[0].instance.accelerationStructureReference = gpuBlas2->getReferenceForDeviceOperations(); -#else - instances[0].instance.accelerationStructureReference = gpuBlas->getReferenceForDeviceOperations(); -#endif - auto raytracingFlags = core::bitflag(asset::IBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT) | asset::IBuffer::EUF_STORAGE_BUFFER_BIT; - - uint32_t instancesBufferSize = sizeof(Instance); - { - IGPUBuffer::SCreationParams params = {}; - params.size = instancesBufferSize; - params.usage = raytracingFlags | asset::IBuffer::EUF_TRANSFER_DST_BIT | asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - instancesBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = instancesBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto instancesBufferMem = logicalDevice->allocate(bufferReqs, instancesBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,instancesBufferSize,instancesBuffer}, instances, graphicsQueue); - } - - using DeviceGeom = IGPUAccelerationStructure::DeviceBuildGeometryInfo::Geometry; - - DeviceGeom blasInstancesGeom = {}; - blasInstancesGeom.type = IAccelerationStructure::EGT_INSTANCES; - blasInstancesGeom.flags = IAccelerationStructure::EGF_NONE; - blasInstancesGeom.data.instances.data.offset = 0u; - blasInstancesGeom.data.instances.data.buffer = instancesBuffer; - - IGPUAccelerationStructure::DeviceBuildGeometryInfo tlasBuildInfo = {}; - tlasBuildInfo.type = IGPUAccelerationStructure::ET_TOP_LEVEL; - tlasBuildInfo.buildFlags = IGPUAccelerationStructure::EBF_PREFER_FAST_TRACE_BIT; - tlasBuildInfo.buildMode = IGPUAccelerationStructure::EBM_BUILD; - tlasBuildInfo.srcAS = nullptr; - tlasBuildInfo.dstAS = nullptr; - tlasBuildInfo.geometries = core::SRange(&blasInstancesGeom, &blasInstancesGeom + 1u); - tlasBuildInfo.scratchAddr = {}; - - // Get BuildSizes - IGPUAccelerationStructure::BuildSizes buildSizes = {}; - { - std::vector maxPrimCount(1u); - maxPrimCount[0] = instancesCount; - buildSizes = logicalDevice->getAccelerationStructureBuildSizes(tlasBuildInfo, maxPrimCount.data()); - } - - { - core::smart_refctd_ptr asBuffer; - IGPUBuffer::SCreationParams params = {}; - params.size = buildSizes.accelerationStructureSize; - params.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | asset::IBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; - asBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = asBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto asBufferMem = logicalDevice->allocate(bufferReqs, asBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - - IGPUAccelerationStructure::SCreationParams tlasParams = {}; - tlasParams.type = IGPUAccelerationStructure::ET_TOP_LEVEL; - tlasParams.flags = IGPUAccelerationStructure::ECF_NONE; - tlasParams.bufferRange.buffer = asBuffer; - tlasParams.bufferRange.offset = 0u; - tlasParams.bufferRange.size = buildSizes.accelerationStructureSize; - gpuTlas = logicalDevice->createAccelerationStructure(std::move(tlasParams)); - } - - // Allocate ScratchBuffer - core::smart_refctd_ptr scratchBuffer; - { - IGPUBuffer::SCreationParams params = {}; - params.size = buildSizes.buildScratchSize; - params.usage = core::bitflag(asset::IBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | asset::IBuffer::EUF_STORAGE_BUFFER_BIT; - scratchBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = scratchBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto scratchBufferMem = logicalDevice->allocate(bufferReqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - } - - // Complete BLAS Build Info - { - tlasBuildInfo.dstAS = gpuTlas.get(); - tlasBuildInfo.scratchAddr.buffer = scratchBuffer; - tlasBuildInfo.scratchAddr.offset = 0u; - } - - IGPUAccelerationStructure::BuildRangeInfo firstBuildRangeInfos[1u]; - firstBuildRangeInfos[0].primitiveCount = instancesCount; - firstBuildRangeInfos[0].primitiveOffset = 0u; - firstBuildRangeInfos[0].firstVertex = 0u; - firstBuildRangeInfos[0].transformOffset = 0u; - IGPUAccelerationStructure::BuildRangeInfo* pRangeInfos[1u]; - pRangeInfos[0] = firstBuildRangeInfos; - - // Build TLAS - { - utilities->buildAccelerationStructures(computeQueue, core::SRange(&tlasBuildInfo, &tlasBuildInfo + 1u), pRangeInfos); - } - } - - - // Camera - core::vectorSIMDf cameraPosition(0, 5, -10); - matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(60.0f), video::ISurface::getTransformedAspectRatio(swapchain->getPreTransform(), WIN_W, WIN_H), 0.01f, 500.0f); - cam = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); - - IGPUDescriptorSetLayout::SBinding descriptorSet0Bindings[] = - { - { 0u, asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - }; - IGPUDescriptorSetLayout::SBinding uboBinding {0, asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr}; - IGPUDescriptorSetLayout::SBinding descriptorSet3Bindings[] = { - { 0u, asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 1u, asset::IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 2u, asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 3u, asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr }, - { 4u, asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, video::IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, IShader::ESS_COMPUTE, 1u, nullptr } - }; - - auto gpuDescriptorSetLayout0 = logicalDevice->createDescriptorSetLayout(descriptorSet0Bindings, descriptorSet0Bindings + 1u); - auto gpuDescriptorSetLayout1 = logicalDevice->createDescriptorSetLayout(&uboBinding, &uboBinding + 1u); - auto gpuDescriptorSetLayout2 = logicalDevice->createDescriptorSetLayout(descriptorSet3Bindings, descriptorSet3Bindings+5u); - - auto createGpuResources = [&](std::string pathToShader) -> core::smart_refctd_ptr - { - asset::IAssetLoader::SAssetLoadParams params{}; - params.logger = logger.get(); - //params.relativeDir = tmp.c_str(); - auto spec = assetManager->getAsset(pathToShader,params).getContents(); - - if (spec.empty()) - assert(false); - - auto cpuComputeSpecializedShader = core::smart_refctd_ptr_static_cast(*spec.begin()); - - ISpecializedShader::SInfo info = cpuComputeSpecializedShader->getSpecializationInfo(); - info.m_backingBuffer = ICPUBuffer::create({ sizeof(ShaderParameters) }); - memcpy(info.m_backingBuffer->getPointer(),&kShaderParameters,sizeof(ShaderParameters)); - info.m_entries = core::make_refctd_dynamic_array>(2u); - for (uint32_t i=0; i<2; i++) - info.m_entries->operator[](i) = {i,i*sizeof(uint32_t),sizeof(uint32_t)}; - - - cpuComputeSpecializedShader->setSpecializationInfo(std::move(info)); - - auto gpuComputeSpecializedShader = cpu2gpu.getGPUObjectsFromAssets(&cpuComputeSpecializedShader, &cpuComputeSpecializedShader + 1, cpu2gpuParams)->front(); - - auto gpuPipelineLayout = logicalDevice->createPipelineLayout(nullptr, nullptr, core::smart_refctd_ptr(gpuDescriptorSetLayout0), core::smart_refctd_ptr(gpuDescriptorSetLayout1), core::smart_refctd_ptr(gpuDescriptorSetLayout2), nullptr); - - auto gpuPipeline = logicalDevice->createComputePipeline(nullptr, std::move(gpuPipelineLayout), std::move(gpuComputeSpecializedShader)); - - return gpuPipeline; - }; - - E_LIGHT_GEOMETRY lightGeom = ELG_SPHERE; - constexpr const char* shaderPaths[] = {"../litBySphere.comp","../litByTriangle.comp","../litByRectangle.comp"}; - gpuComputePipeline = createGpuResources(shaderPaths[lightGeom]); - - dispatchInfo = getDispatchInfo(WIN_W, WIN_H); - - auto createImageView = [&](std::string pathToOpenEXRHDRIImage) - { - auto pathToTexture = pathToOpenEXRHDRIImage; - IAssetLoader::SAssetLoadParams lp(0ull, nullptr, IAssetLoader::ECF_DONT_CACHE_REFERENCES); - auto cpuTexture = assetManager->getAsset(pathToTexture, lp); - auto cpuTextureContents = cpuTexture.getContents(); - assert(!cpuTextureContents.empty()); - auto cpuImage = core::smart_refctd_ptr_static_cast(*cpuTextureContents.begin()); - cpuImage->setImageUsageFlags(IImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT); - - ICPUImageView::SCreationParams viewParams; - viewParams.flags = static_cast(0u); - viewParams.image = cpuImage; - viewParams.format = viewParams.image->getCreationParameters().format; - viewParams.viewType = IImageView::ET_2D; - viewParams.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - viewParams.subresourceRange.baseArrayLayer = 0u; - viewParams.subresourceRange.layerCount = 1u; - viewParams.subresourceRange.baseMipLevel = 0u; - viewParams.subresourceRange.levelCount = 1u; - - auto cpuImageView = ICPUImageView::create(std::move(viewParams)); - - cpu2gpuParams.beginCommandBuffers(); - auto gpuImageView = cpu2gpu.getGPUObjectsFromAssets(&cpuImageView, &cpuImageView + 1u, cpu2gpuParams)->front(); - cpu2gpuParams.waitForCreationToComplete(); - - return gpuImageView; - }; - - gpuEnvmapImageView = createImageView("../../media/envmap/envmap_0.exr"); - - { - const uint32_t MaxDimensions = 3u<(sampleSequence->getPointer()); - for (auto dim=0u; dimgetSize(); - IGPUBuffer::SCreationParams params = {}; - params.size = bufferSize; - params.usage = core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT) | asset::IBuffer::EUF_UNIFORM_TEXEL_BUFFER_BIT; - gpuSequenceBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = gpuSequenceBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto gpuSequenceBufferMem = logicalDevice->allocate(bufferReqs, gpuSequenceBuffer.get()); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,bufferSize,gpuSequenceBuffer},sampleSequence->getPointer(), graphicsQueue); - } - gpuSequenceBufferView = logicalDevice->createBufferView(gpuSequenceBuffer.get(), asset::EF_R32G32B32_UINT); - } - - { - IGPUImage::SCreationParams imgParams; - imgParams.flags = static_cast(0u); - imgParams.type = IImage::ET_2D; - imgParams.format = EF_R32G32_UINT; - imgParams.extent = {WIN_W, WIN_H,1u}; - imgParams.mipLevels = 1u; - imgParams.arrayLayers = 1u; - imgParams.samples = IImage::ESCF_1_BIT; - imgParams.usage = core::bitflag(IImage::EUF_SAMPLED_BIT) | IImage::EUF_TRANSFER_DST_BIT; - imgParams.initialLayout = asset::IImage::EL_UNDEFINED; - - IGPUImage::SBufferCopy region = {}; - region.bufferOffset = 0u; - region.bufferRowLength = 0u; - region.bufferImageHeight = 0u; - region.imageExtent = imgParams.extent; - region.imageOffset = {0u,0u,0u}; - region.imageSubresource.layerCount = 1u; - region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - - constexpr auto ScrambleStateChannels = 2u; - const auto renderPixelCount = imgParams.extent.width*imgParams.extent.height; - core::vector random(renderPixelCount*ScrambleStateChannels); - { - core::RandomSampler rng(0xbadc0ffeu); - for (auto& pixel : random) - pixel = rng.nextSample(); - } - - core::smart_refctd_ptr scrambleImageBuffer; - { - const auto bufferSize = random.size() * sizeof(uint32_t); - IGPUBuffer::SCreationParams params = {}; - params.size = bufferSize; - params.usage = core::bitflag(asset::IBuffer::EUF_TRANSFER_DST_BIT) | asset::IBuffer::EUF_TRANSFER_SRC_BIT; - scrambleImageBuffer = logicalDevice->createBuffer(std::move(params)); - auto bufferReqs = scrambleImageBuffer->getMemoryReqs(); - bufferReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto bufferMem = logicalDevice->allocate(bufferReqs, scrambleImageBuffer.get()); - utilities->updateBufferRangeViaStagingBufferAutoSubmit(asset::SBufferRange{0u,bufferSize,scrambleImageBuffer},random.data(),graphicsQueue); - } - - IGPUImageView::SCreationParams viewParams; - viewParams.flags = static_cast(0u); - // TODO: Replace this IGPUBuffer -> IGPUImage to using image upload utility - viewParams.image = utilities->createFilledDeviceLocalImageOnDedMem(std::move(imgParams), scrambleImageBuffer.get(), 1u, ®ion, graphicsQueue); - viewParams.viewType = IGPUImageView::ET_2D; - viewParams.format = EF_R32G32_UINT; - viewParams.subresourceRange.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.layerCount = 1u; - gpuScrambleImageView = logicalDevice->createImageView(std::move(viewParams)); - } - - // Create Out Image - for(uint32_t i = 0; i < swapchain->getImageCount(); ++i) { - outHDRImageViews[i] = createHDRImageView(logicalDevice, asset::EF_R16G16B16A16_SFLOAT, WIN_W, WIN_H); - } - - for(uint32_t i = 0; i < swapchain->getImageCount(); ++i) - { - auto & descSet = descriptorSets0[i]; - descSet = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout0)); - video::IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSet; - writeDescriptorSet.dstSet = descSet.get(); - writeDescriptorSet.binding = 0; - writeDescriptorSet.count = 1u; - writeDescriptorSet.arrayElement = 0u; - writeDescriptorSet.descriptorType = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = outHDRImageViews[i]; - info.info.image.sampler = nullptr; - info.info.image.imageLayout = asset::IImage::EL_GENERAL; - } - writeDescriptorSet.info = &info; - logicalDevice->updateDescriptorSets(1u, &writeDescriptorSet, 0u, nullptr); - } - - IGPUBuffer::SCreationParams gpuuboParams = {}; - gpuuboParams.size = sizeof(SBasicViewParametersAligned); - gpuuboParams.usage = core::bitflag(IGPUBuffer::EUF_UNIFORM_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuubo = logicalDevice->createBuffer(std::move(gpuuboParams)); - auto gpuUboMemReqs = gpuubo->getMemoryReqs(); - gpuUboMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto gpuUboMem = logicalDevice->allocate(gpuUboMemReqs, gpuubo.get()); - - uboDescriptorSet1 = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout1)); - { - video::IGPUDescriptorSet::SWriteDescriptorSet uboWriteDescriptorSet; - uboWriteDescriptorSet.dstSet = uboDescriptorSet1.get(); - uboWriteDescriptorSet.binding = 0; - uboWriteDescriptorSet.count = 1u; - uboWriteDescriptorSet.arrayElement = 0u; - uboWriteDescriptorSet.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuubo; - info.info.buffer.offset = 0ull; - info.info.buffer.size = sizeof(SBasicViewParametersAligned); - } - uboWriteDescriptorSet.info = &info; - logicalDevice->updateDescriptorSets(1u, &uboWriteDescriptorSet, 0u, nullptr); - } - - ISampler::SParams samplerParams0 = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_FLOAT_OPAQUE_BLACK, ISampler::ETF_LINEAR, ISampler::ETF_LINEAR, ISampler::ESMM_LINEAR, 0u, false, ECO_ALWAYS }; - sampler0 = logicalDevice->createSampler(samplerParams0); - ISampler::SParams samplerParams1 = { ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETC_CLAMP_TO_EDGE, ISampler::ETBC_INT_OPAQUE_BLACK, ISampler::ETF_NEAREST, ISampler::ETF_NEAREST, ISampler::ESMM_NEAREST, 0u, false, ECO_ALWAYS }; - sampler1 = logicalDevice->createSampler(samplerParams1); - - descriptorSet2 = descriptorPool->createDescriptorSet(core::smart_refctd_ptr(gpuDescriptorSetLayout2)); - { - constexpr auto kDescriptorCount = 5; - IGPUDescriptorSet::SWriteDescriptorSet writeDescriptorSet2[kDescriptorCount]; - IGPUDescriptorSet::SDescriptorInfo writeDescriptorInfo[kDescriptorCount]; - for (auto i=0; iupdateDescriptorSets(kDescriptorCount, writeDescriptorSet2, 0u, nullptr); - } - - constexpr uint32_t FRAME_COUNT = 500000u; - - for (uint32_t i=0u; icreateSemaphore(); - renderFinished[i] = logicalDevice->createSemaphore(); - frameComplete[i] = logicalDevice->createFence(video::IGPUFence::ECF_SIGNALED_BIT); - frameUploadDataCompleteSemaphore[i] = logicalDevice->createSemaphore(); - frameUploadDataCompleteFence[i] = logicalDevice->createFence(video::IGPUFence::ECF_UNSIGNALED); - } - - oracle.reportBeginFrameRecord(); - } - - void onAppTerminated_impl() override - { - const auto& fboCreationParams = fbos->begin()[m_acquiredNextFBO]->getCreationParameters(); - auto gpuSourceImageView = fboCreationParams.attachments[0]; - logicalDevice->waitIdle(); - - bool status = ext::ScreenShot::createScreenShot( - logicalDevice.get(), - queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], - renderFinished[m_resourceIx].get(), - gpuSourceImageView.get(), - assetManager.get(), - "ScreenShot.png", - asset::IImage::EL_PRESENT_SRC, - asset::EAF_NONE); - - assert(status); - } - - void workLoopBody() override - { - auto& graphicsQueue = queues[CommonAPI::InitOutput::EQT_GRAPHICS]; - - m_resourceIx++; - if(m_resourceIx >= FRAMES_IN_FLIGHT) { - m_resourceIx = 0; - } - - oracle.reportEndFrameRecord(); - double dt = oracle.getDeltaTimeInMicroSeconds() / 1000.0; - auto nextPresentationTimeStamp = oracle.getNextPresentationTimeStamp(); - oracle.reportBeginFrameRecord(); - - // Input - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - cam.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { cam.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { cam.keyboardProcess(events); }, logger.get()); - cam.endInputProcessing(nextPresentationTimeStamp); - - auto& cb = cmdbuf[m_resourceIx]; - auto& fence = frameComplete[m_resourceIx]; - while (logicalDevice->waitForFences(1u,&fence.get(),false,MAX_TIMEOUT)==video::IGPUFence::ES_TIMEOUT) - { - } - - const auto viewMatrix = cam.getViewMatrix(); - const auto viewProjectionMatrix = matrix4SIMD::concatenateBFollowedByAPrecisely( - video::ISurface::getSurfaceTransformationMatrix(swapchain->getPreTransform()), - cam.getConcatenatedMatrix() - ); - - // safe to proceed - cb->begin(IGPUCommandBuffer::EU_NONE); - - // renderpass - swapchain->acquireNextImage(MAX_TIMEOUT,imageAcquire[m_resourceIx].get(),nullptr,&m_acquiredNextFBO); - { - auto mv = viewMatrix; - auto mvp = viewProjectionMatrix; - core::matrix3x4SIMD normalMat; - mv.getSub3x3InverseTranspose(normalMat); - - SBasicViewParametersAligned viewParams; - memcpy(viewParams.uboData.MV, mv.pointer(), sizeof(mv)); - memcpy(viewParams.uboData.MVP, mvp.pointer(), sizeof(mvp)); - memcpy(viewParams.uboData.NormalMat, normalMat.pointer(), sizeof(normalMat)); - - asset::SBufferRange range; - range.buffer = gpuubo; - range.offset = 0ull; - range.size = sizeof(viewParams); - - video::IGPUQueue::SSubmitInfo uploadImageSubmit; - uploadImageSubmit.pSignalSemaphores = &frameUploadDataCompleteSemaphore[m_resourceIx].get(); - uploadImageSubmit.signalSemaphoreCount = 1u; - - // We know the fence is already signal because of how we structured our execution -> frameUploadDataCompleteSemaphore -> signals to Render Frame -> wait for frameComplete fence to finish -> then we know frameUploadCompleteFence is signalled - utilities->getDefaultUpStreamingBuffer()->cull_frees(); // need to cull_frees after fence signalled and before fence is reset again - logicalDevice->resetFences(1, &frameUploadDataCompleteFence[m_resourceIx].get()); - - utilities->updateBufferRangeViaStagingBufferAutoSubmit(range, &viewParams, graphicsQueue, frameUploadDataCompleteFence[m_resourceIx].get(), uploadImageSubmit); - // No need to wait for frameUploadDataCompleteFence in CPU, we'll use semaphores to singal the next stage the upload is complete. - } - - auto graphicsCmdQueueFamIdx = queues[CommonAPI::InitOutput::EQT_GRAPHICS]->getFamilyIndex(); - // TRANSITION outHDRImageViews[m_acquiredNextFBO] to EIL_GENERAL (because of descriptorSets0 -> ComputeShader Writes into the image) - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[3u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[0].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_WRITE_BIT); - imageBarriers[0].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[0].newLayout = asset::IImage::EL_GENERAL; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].image = outHDRImageViews[m_acquiredNextFBO]->getCreationParameters().image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - - imageBarriers[1].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[1].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_READ_BIT); - imageBarriers[1].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[1].newLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; - imageBarriers[1].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[1].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[1].image = gpuScrambleImageView->getCreationParameters().image; - imageBarriers[1].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[1].subresourceRange.baseMipLevel = 0u; - imageBarriers[1].subresourceRange.levelCount = 1; - imageBarriers[1].subresourceRange.baseArrayLayer = 0u; - imageBarriers[1].subresourceRange.layerCount = 1; - - imageBarriers[2].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[2].barrier.dstAccessMask = static_cast(asset::EAF_SHADER_READ_BIT); - imageBarriers[2].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[2].newLayout = asset::IImage::EL_SHADER_READ_ONLY_OPTIMAL; - imageBarriers[2].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[2].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[2].image = gpuEnvmapImageView->getCreationParameters().image; - imageBarriers[2].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[2].subresourceRange.baseMipLevel = 0u; - imageBarriers[2].subresourceRange.levelCount = gpuEnvmapImageView->getCreationParameters().subresourceRange.levelCount; - imageBarriers[2].subresourceRange.baseArrayLayer = 0u; - imageBarriers[2].subresourceRange.layerCount = gpuEnvmapImageView->getCreationParameters().subresourceRange.layerCount; - - cb->pipelineBarrier(asset::EPSF_TOP_OF_PIPE_BIT, asset::EPSF_COMPUTE_SHADER_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 3u, imageBarriers); - } - - // cube envmap handle - { - cb->bindComputePipeline(gpuComputePipeline.get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 0u, 1u, &descriptorSets0[m_acquiredNextFBO].get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 1u, 1u, &uboDescriptorSet1.get()); - cb->bindDescriptorSets(EPBP_COMPUTE, gpuComputePipeline->getLayout(), 2u, 1u, &descriptorSet2.get()); - cb->dispatch(dispatchInfo.workGroupCount[0], dispatchInfo.workGroupCount[1], dispatchInfo.workGroupCount[2]); - } - // TODO: tone mapping and stuff - - // Copy HDR Image to SwapChain - auto srcImgViewCreationParams = outHDRImageViews[m_acquiredNextFBO]->getCreationParameters(); - auto dstImgViewCreationParams = fbos->begin()[m_acquiredNextFBO]->getCreationParameters().attachments[0]->getCreationParameters(); - - // Getting Ready for Blit - // TRANSITION outHDRImageViews[m_acquiredNextFBO] to EIL_TRANSFER_SRC_OPTIMAL - // TRANSITION `fbos[m_acquiredNextFBO]->getCreationParameters().attachments[0]` to EIL_TRANSFER_DST_OPTIMAL - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[2u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[0].barrier.dstAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[0].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[0].newLayout = asset::IImage::EL_TRANSFER_SRC_OPTIMAL; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].image = srcImgViewCreationParams.image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - - imageBarriers[1].barrier.srcAccessMask = asset::EAF_NONE; - imageBarriers[1].barrier.dstAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[1].oldLayout = asset::IImage::EL_UNDEFINED; - imageBarriers[1].newLayout = asset::IImage::EL_TRANSFER_DST_OPTIMAL; - imageBarriers[1].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[1].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[1].image = dstImgViewCreationParams.image; - imageBarriers[1].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[1].subresourceRange.baseMipLevel = 0u; - imageBarriers[1].subresourceRange.levelCount = 1; - imageBarriers[1].subresourceRange.baseArrayLayer = 0u; - imageBarriers[1].subresourceRange.layerCount = 1; - cb->pipelineBarrier(asset::EPSF_TRANSFER_BIT, asset::EPSF_TRANSFER_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 2u, imageBarriers); - } - - // Blit Image - { - SImageBlit blit = {}; - blit.srcOffsets[0] = {0, 0, 0}; - blit.srcOffsets[1] = {WIN_W, WIN_H, 1}; - - blit.srcSubresource.aspectMask = srcImgViewCreationParams.subresourceRange.aspectMask; - blit.srcSubresource.mipLevel = srcImgViewCreationParams.subresourceRange.baseMipLevel; - blit.srcSubresource.baseArrayLayer = srcImgViewCreationParams.subresourceRange.baseArrayLayer; - blit.srcSubresource.layerCount = srcImgViewCreationParams.subresourceRange.layerCount; - blit.dstOffsets[0] = {0, 0, 0}; - blit.dstOffsets[1] = {WIN_W, WIN_H, 1}; - blit.dstSubresource.aspectMask = dstImgViewCreationParams.subresourceRange.aspectMask; - blit.dstSubresource.mipLevel = dstImgViewCreationParams.subresourceRange.baseMipLevel; - blit.dstSubresource.baseArrayLayer = dstImgViewCreationParams.subresourceRange.baseArrayLayer; - blit.dstSubresource.layerCount = dstImgViewCreationParams.subresourceRange.layerCount; - - auto srcImg = srcImgViewCreationParams.image; - auto dstImg = dstImgViewCreationParams.image; - - cb->blitImage(srcImg.get(), asset::IImage::EL_TRANSFER_SRC_OPTIMAL, dstImg.get(), asset::IImage::EL_TRANSFER_DST_OPTIMAL, 1u, &blit , ISampler::ETF_NEAREST); - } - - // TRANSITION `fbos[m_acquiredNextFBO]->getCreationParameters().attachments[0]` to EIL_PRESENT - { - IGPUCommandBuffer::SImageMemoryBarrier imageBarriers[1u] = {}; - imageBarriers[0].barrier.srcAccessMask = asset::EAF_TRANSFER_WRITE_BIT; - imageBarriers[0].barrier.dstAccessMask = asset::EAF_NONE; - imageBarriers[0].oldLayout = asset::IImage::EL_TRANSFER_DST_OPTIMAL; - imageBarriers[0].newLayout = asset::IImage::EL_PRESENT_SRC; - imageBarriers[0].srcQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].dstQueueFamilyIndex = graphicsCmdQueueFamIdx; - imageBarriers[0].image = dstImgViewCreationParams.image; - imageBarriers[0].subresourceRange.aspectMask = asset::IImage::EAF_COLOR_BIT; - imageBarriers[0].subresourceRange.baseMipLevel = 0u; - imageBarriers[0].subresourceRange.levelCount = 1; - imageBarriers[0].subresourceRange.baseArrayLayer = 0u; - imageBarriers[0].subresourceRange.layerCount = 1; - cb->pipelineBarrier(asset::EPSF_TRANSFER_BIT, asset::EPSF_TOP_OF_PIPE_BIT, asset::EDF_NONE, 0u, nullptr, 0u, nullptr, 1u, imageBarriers); - } - - cb->end(); - logicalDevice->resetFences(1, &fence.get()); - - nbl::video::IGPUQueue::SSubmitInfo submit; - submit.commandBufferCount = 1u; - submit.commandBuffers = &cb.get(); - submit.signalSemaphoreCount = 1u; - submit.pSignalSemaphores = &renderFinished[m_resourceIx].get(); - nbl::video::IGPUSemaphore* waitSemaphores[2u] = { imageAcquire[m_resourceIx].get(), frameUploadDataCompleteSemaphore[m_resourceIx].get() }; - asset::E_PIPELINE_STAGE_FLAGS waitStages[2u] = { nbl::asset::EPSF_COLOR_ATTACHMENT_OUTPUT_BIT, nbl::asset::EPSF_RAY_TRACING_SHADER_BIT_KHR} ; - submit.waitSemaphoreCount = 2u; - submit.pWaitSemaphores = waitSemaphores; - submit.pWaitDstStageMask = waitStages; - - graphicsQueue->submit(1u,&submit,fence.get()); - - CommonAPI::Present(logicalDevice.get(), swapchain.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], renderFinished[m_resourceIx].get(), m_acquiredNextFBO); - } - - bool keepRunning() override - { - return windowCb->isWindowOpen(); - } - - video::IAPIConnection* getAPIConnection() override - { - return apiConnection.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return logicalDevice.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbos->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_D32_SFLOAT; - } -}; - -NBL_COMMON_API_MAIN(RayQuerySampleApp) diff --git a/61_UI/CMakeLists.txt b/61_UI/CMakeLists.txt index a34e46ce6..5d0021f61 100644 --- a/61_UI/CMakeLists.txt +++ b/61_UI/CMakeLists.txt @@ -12,7 +12,9 @@ if(NBL_BUILD_IMGUI) imguizmo "${NBL_EXT_IMGUI_UI_LIB}" ) - - nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") - LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} geometryCreatorSpirvBRD) + + # TODO; Arek I removed `NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET` from the last parameter here, doesn't this macro have 4 arguments anyway !? + nbl_create_executable_project("${NBL_EXTRA_SOURCES}" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}") + # TODO: Arek temporarily disabled cause I haven't figured out how to make this target yet + # LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} nblExamplesGeometrySpirvBRD) endif() \ No newline at end of file diff --git a/61_UI/include/common.hpp b/61_UI/include/common.hpp index a5def7551..fe7d086dd 100644 --- a/61_UI/include/common.hpp +++ b/61_UI/include/common.hpp @@ -1,25 +1,19 @@ -#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#include -// common api -#include "CCamera.hpp" -#include "SimpleWindowedApplication.hpp" -#include "CEventCallback.hpp" +#include "nbl/examples/examples.hpp" // the example's headers #include "transform.hpp" -#include "CGeomtryCreatorScene.hpp" using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; -using namespace scene; -using namespace geometrycreator; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; -#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/61_UI/include/transform.hpp b/61_UI/include/transform.hpp index 88a78f751..fb1672c2f 100644 --- a/61_UI/include/transform.hpp +++ b/61_UI/include/transform.hpp @@ -1,20 +1,23 @@ -#ifndef __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ +#ifndef _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED_ + #include "nbl/ui/ICursorControl.h" + #include "nbl/ext/ImGui/ImGui.h" + #include "imgui/imgui_internal.h" #include "imguizmo/ImGuizmo.h" -static constexpr inline auto OfflineSceneTextureIx = 1u; struct TransformRequestParams { - bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; float camDistance = 8.f; + uint8_t sceneTexDescIx = ~0; + bool useWindow = true, editTransformDecomposition = false, enableViewManipulate = false; }; -void EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) +nbl::hlsl::uint16_t2 EditTransform(float* cameraView, const float* cameraProjection, float* matrix, const TransformRequestParams& params) { static ImGuizmo::OPERATION mCurrentGizmoOperation(ImGuizmo::TRANSLATE); static ImGuizmo::MODE mCurrentGizmoMode(ImGuizmo::LOCAL); @@ -99,11 +102,12 @@ void EditTransform(float* cameraView, const float* cameraProjection, float* matr rendered is aligned to our texture scene using imgui "cursor" screen positions */ - +// TODO: this shouldn't be handled here I think SImResourceInfo info; - info.textureID = OfflineSceneTextureIx; + info.textureID = params.sceneTexDescIx; info.samplerIx = (uint16_t)nbl::ext::imgui::UI::DefaultSamplerIx::USER; + nbl::hlsl::uint16_t2 retval; if (params.useWindow) { ImGui::SetNextWindowSize(ImVec2(800, 400), ImGuiCond_Appearing); @@ -118,6 +122,7 @@ void EditTransform(float* cameraView, const float* cameraProjection, float* matr ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; @@ -137,6 +142,7 @@ void EditTransform(float* cameraView, const float* cameraProjection, float* matr ImGui::Image(info, contentRegionSize); ImGuizmo::SetRect(cursorPos.x, cursorPos.y, contentRegionSize.x, contentRegionSize.y); + retval = {contentRegionSize.x,contentRegionSize.y}; viewManipulateRight = cursorPos.x + contentRegionSize.x; viewManipulateTop = cursorPos.y; @@ -149,6 +155,8 @@ void EditTransform(float* cameraView, const float* cameraProjection, float* matr ImGui::End(); ImGui::PopStyleColor(); + + return retval; } #endif // __NBL_THIS_EXAMPLE_TRANSFORM_H_INCLUDED__ \ No newline at end of file diff --git a/61_UI/main.cpp b/61_UI/main.cpp index 470d5e723..643cab079 100644 --- a/61_UI/main.cpp +++ b/61_UI/main.cpp @@ -5,794 +5,882 @@ #include "common.hpp" /* - Renders scene texture to an offline - framebuffer which color attachment - is then sampled into a imgui window. +Renders scene texture to an offscreen framebuffer whose color attachment is then sampled into a imgui window. - Written with Nabla, it's UI extension - and got integrated with ImGuizmo to - handle scene's object translations. +Written with Nabla's UI extension and got integrated with ImGuizmo to handle scene's object translations. */ - -class UISampleApp final : public examples::SimpleWindowedApplication +class UISampleApp final : public MonoWindowApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using clock_t = std::chrono::steady_clock; - - _NBL_STATIC_INLINE_CONSTEXPR uint32_t WIN_W = 1280, WIN_H = 720; - - constexpr static inline clock_t::duration DisplayImageDuration = std::chrono::milliseconds(900); + using device_base_t = MonoWindowApplication; + using asset_base_t = BuiltinResourcesApplication; public: inline UISampleApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) - : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} - - inline core::vector getSurfaces() const override - { - if (!m_surface) - { - { - auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); - IWindow::SCreationParams params = {}; - params.callback = core::make_smart_refctd_ptr(); - params.width = WIN_W; - params.height = WIN_H; - params.x = 32; - params.y = 32; - params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; - params.windowCaption = "UISampleApp"; - params.callback = windowCallback; - const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); - } - - auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); - const_cast&>(m_surface) = nbl::video::CSimpleResizeSurface::create(std::move(surface)); - } - - if (m_surface) - return { {m_surface->getSurface()/*,EQF_NONE*/} }; - - return {}; - } + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD), + device_base_t({1280,720}, EF_UNKNOWN, _localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} inline bool onAppInitialized(smart_refctd_ptr&& system) override { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); - + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) return false; - m_assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); - auto* geometry = m_assetManager->getGeometryCreator(); - m_semaphore = m_device->createSemaphore(m_realFrameIx); if (!m_semaphore) return logFail("Failed to Create a Semaphore!"); - ISwapchain::SCreationParams swapchainParams = { .surface = m_surface->getSurface() }; - if (!swapchainParams.deduceFormat(m_physicalDevice)) - return logFail("Could not choose a Surface Format for the Swapchain!"); - - const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + auto pool = m_device->createCommandPool(getGraphicsQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + for (auto i = 0u; icreateCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{m_cmdBufs.data()+i,1})) + return logFail("Couldn't create Command Buffer!"); + } + + const uint32_t addtionalBufferOwnershipFamilies[] = {getGraphicsQueue()->getFamilyIndex()}; + m_scene = CGeometryCreatorScene::create( { - .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - }, - { - .srcSubpass = 0, - .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } + .transferQueue = getTransferUpQueue(), + .utilities = m_utils.get(), + .logger = m_logger.get(), + .addtionalBufferOwnershipFamilies = addtionalBufferOwnershipFamilies }, - IGPURenderpass::SCreationParams::DependenciesEnd - }; - - auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); - auto* renderpass = scResources->getRenderpass(); + CSimpleDebugRenderer::DefaultPolygonGeometryPatch + ); - if (!renderpass) - return logFail("Failed to create Renderpass!"); - - auto gQueue = getGraphicsQueue(); - if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) - return logFail("Could not create Window & Surface or initialize the Surface!"); - - m_cmdPool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - - for (auto i = 0u; i < MaxFramesInFlight; i++) + // for the scene drawing pass { - if (!m_cmdPool) - return logFail("Couldn't create Command Pool!"); - if (!m_cmdPool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) - return logFail("Couldn't create Command Buffer!"); + IGPURenderpass::SCreationParams params = {}; + const IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = sceneRenderDepthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, + /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; + params.depthStencilAttachments = depthAttachments; + const IGPURenderpass::SCreationParams::SColorAttachmentDescription colorAttachments[] = { + {{ + { + .format = finalSceneRenderFormat, + .samples = IGPUImage::E_SAMPLE_COUNT_FLAGS::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */IGPURenderpass::LOAD_OP::CLEAR, + /*.storeOp = */IGPURenderpass::STORE_OP::STORE, + /*.initialLayout = */IGPUImage::LAYOUT::UNDEFINED, + /*.finalLayout = */ IGPUImage::LAYOUT::READ_ONLY_OPTIMAL // ImGUI shall read + }}, + IGPURenderpass::SCreationParams::ColorAttachmentsEnd + }; + params.colorAttachments = colorAttachments; + IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + {}, + IGPURenderpass::SCreationParams::SubpassesEnd + }; + subpasses[0].depthStencilAttachment = {{.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}}; + subpasses[0].colorAttachments[0] = {.render={.attachmentIndex=0,.layout=IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL}}; + params.subpasses = subpasses; + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // wipe-transition of Color to ATTACHMENT_OPTIMAL and depth + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + // last place where the depth can get modified in previous frame, `COLOR_ATTACHMENT_OUTPUT_BIT` is implicitly later + // while color is sampled by ImGUI + .srcStageMask = PIPELINE_STAGE_FLAGS::LATE_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + // don't want any writes to be available, as we are clearing both attachments + .srcAccessMask = ACCESS_FLAGS::NONE, + // destination needs to wait as early as possible + // TODO: `COLOR_ATTACHMENT_OUTPUT_BIT` shouldn't be needed, because its a logically later stage, see TODO in `ECommonEnums.h` + .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT|PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // because depth and color get cleared first no read mask + .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT|ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available, also won't be using depth so don't care about it being visible to anyone else + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, + // the ImGUI will sample the color, then next frame we overwrite both attachments + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT|PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT, + // but we only care about the availability-visibility chain between renderpass and imgui + .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + params.dependencies = {}; + m_renderpass = m_device->createRenderpass(std::move(params)); + if (!m_renderpass) + return logFail("Failed to create Scene Renderpass!"); } - - //pass.scene = CScene::create(smart_refctd_ptr(m_utils), smart_refctd_ptr(m_logger), gQueue, geometry); - pass.scene = CScene::create(smart_refctd_ptr(m_utils), smart_refctd_ptr(m_logger), gQueue, geometry); - - nbl::ext::imgui::UI::SCreationParameters params; - - params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; - params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; - params.assetManager = m_assetManager; - params.pipelineCache = nullptr; - params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, TexturesAmount); - params.renderpass = smart_refctd_ptr(renderpass); - params.streamingBuffer = nullptr; - params.subpassIx = 0u; - params.transfer = getTransferUpQueue(); - params.utilities = m_utils; + const auto& geometries = m_scene->getInitParams().geometries; + m_renderer = CSimpleDebugRenderer::create(m_assetMgr.get(),m_renderpass.get(),0,{&geometries.front().get(),geometries.size()}); + // special case { - pass.ui.manager = nbl::ext::imgui::UI::create(std::move(params)); - - if (!pass.ui.manager) - return false; - - // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources - const auto* descriptorSetLayout = pass.ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); - const auto& params = pass.ui.manager->getCreationParameters(); - - IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; - descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = TexturesAmount; - descriptorPoolInfo.maxSets = 1u; - descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; - - m_descriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); - assert(m_descriptorSetPool); + const auto& pipelines = m_renderer->getInitParams().pipelines; + auto ix = 0u; + for (const auto& name : m_scene->getInitParams().geometryNames) + { + if (name=="Cone") + m_renderer->getGeometry(ix).pipeline = pipelines[CSimpleDebugRenderer::SInitParams::PipelineType::Cone]; + ix++; + } + } + // we'll only display one thing at a time + m_renderer->m_instances.resize(1); - m_descriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &pass.ui.descriptorSet); - assert(pass.ui.descriptorSet); + // Create ImGUI + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::imgui::UI::SCreationParameters params = {}; + params.resources.texturesInfo = {.setIx=0u,.bindingIx=TexturesImGUIBindingIndex}; + params.resources.samplersInfo = {.setIx=0u,.bindingIx=1u}; + params.utilities = m_utils; + params.transfer = getTransferUpQueue(); + params.pipelineLayout = ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(),params.resources.texturesInfo,params.resources.samplersInfo,MaxImGUITextures); + params.assetManager = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); + params.renderpass = smart_refctd_ptr(scRes->getRenderpass()); + params.subpassIx = 0u; + params.pipelineCache = nullptr; + interface.imGUI = ext::imgui::UI::create(std::move(params)); + if (!interface.imGUI) + return logFail("Failed to create `nbl::ext::imgui::UI` class"); } - pass.ui.manager->registerListener([this]() -> void - { - ImGuiIO& io = ImGui::GetIO(); - camera.setProjectionMatrix([&]() + // create rest of User Interface + { + auto* imgui = interface.imGUI.get(); + // create the suballocated descriptor set + { + // note that we use default layout provided by our extension, but you are free to create your own by filling ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* layout = imgui->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + auto pool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT,{&layout,1}); + auto ds = pool->createDescriptorSet(smart_refctd_ptr(layout)); + interface.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + if (!interface.subAllocDS) + return logFail("Failed to create the descriptor set"); + // make sure Texture Atlas slot is taken for eternity { - static matrix4SIMD projection; - - if (isPerspective) - if(isLH) - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); - else - projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); - else - { - float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; - - if(isLH) - projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); - else - projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); - } - - return projection; - }()); - - ImGuizmo::SetOrthographic(false); - ImGuizmo::BeginFrame(); - - ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - - // create a window and insert the inspector - ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); - ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); - ImGui::Begin("Editor"); - - if (ImGui::RadioButton("Full view", !transformParams.useWindow)) - transformParams.useWindow = false; - - ImGui::SameLine(); - - if (ImGui::RadioButton("Window", transformParams.useWindow)) - transformParams.useWindow = true; - - ImGui::Text("Camera"); - bool viewDirty = false; - - if (ImGui::RadioButton("LH", isLH)) - isLH = true; - - ImGui::SameLine(); - - if (ImGui::RadioButton("RH", !isLH)) - isLH = false; - - if (ImGui::RadioButton("Perspective", isPerspective)) - isPerspective = true; - - ImGui::SameLine(); + auto dummy = SubAllocatedDescriptorSet::invalid_value; + interface.subAllocDS->multi_allocate(0,1,&dummy); + assert(dummy==ext::imgui::UI::FontAtlasTexId); + } + // write constant descriptors, note we don't create info & write pair for the samplers because UI extension's are immutable and baked into DS layout + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = smart_refctd_ptr(interface.imGUI->getFontAtlasView()); + info.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = ext::imgui::UI::FontAtlasTexId, + .count = 1, + .info = &info + }; + if (!m_device->updateDescriptorSets({&write,1},{})) + return logFail("Failed to write the descriptor set"); + } + imgui->registerListener([this](){interface();}); + } - if (ImGui::RadioButton("Orthographic", !isPerspective)) - isPerspective = false; + interface.camera.mapKeysToArrows(); - ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); - ImGui::Checkbox("Enable camera movement", &move); - ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); - ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); + onAppInitializedFinish(); + return true; + } - // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case + // + virtual inline bool onAppTerminated() + { + SubAllocatedDescriptorSet::value_type fontAtlasDescIx = ext::imgui::UI::FontAtlasTexId; + IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + interface.subAllocDS->multi_deallocate(dummy,TexturesImGUIBindingIndex,1,&fontAtlasDescIx); + return device_base_t::onAppTerminated(); + } - if (isPerspective) - ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); - else - ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); + inline IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) override + { + // CPU events + update(nextPresentationTimestamp); - ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); - ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); + const auto& virtualWindowRes = interface.sceneResolution; + if (!m_framebuffer || m_framebuffer->getCreationParameters().width!=virtualWindowRes[0] || m_framebuffer->getCreationParameters().height!=virtualWindowRes[1]) + recreateFramebuffer(virtualWindowRes); - viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); + // + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; - if (viewDirty || firstFrame) + auto* const cb = m_cmdBufs.data()[resourceIx].get(); + cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // clear to black for both things + const IGPUCommandBuffer::SClearColorValue clearValue = { .float32 = {0.f,0.f,0.f,1.f} }; + if (m_framebuffer) + { + cb->beginDebugMarker("UISampleApp Scene Frame"); + { + const IGPUCommandBuffer::SClearDepthStencilValue farValue = { .depth=0.f }; + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { - core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance); - core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f); - const static core::vectorSIMDf up(0.f, 1.f, 0.f); - - camera.setPosition(cameraPosition); - camera.setTarget(cameraTarget); - camera.setBackupUpVector(up); - - camera.recomputeViewMatrix(); - - firstFrame = false; + .framebuffer = m_framebuffer.get(), + .colorClearValues = &clearValue, + .depthStencilClearValues = &farValue, + .renderArea = { + .offset = {0,0}, + .extent = {virtualWindowRes[0],virtualWindowRes[1]} + } + }; + beginRenderpass(cb,renderpassInfo); + } + // draw scene + { + float32_t3x4 viewMatrix; + float32_t4x4 viewProjMatrix; + // TODO: get rid of legacy matrices + { + const auto& camera = interface.camera; + memcpy(&viewMatrix,camera.getViewMatrix().pointer(),sizeof(viewMatrix)); + memcpy(&viewProjMatrix,camera.getConcatenatedMatrix().pointer(),sizeof(viewProjMatrix)); } + const auto viewParams = CSimpleDebugRenderer::SViewParams(viewMatrix,viewProjMatrix); - ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); - if (ImGuizmo::IsUsing()) + // tear down scene every frame + auto& instance = m_renderer->m_instances[0]; + memcpy(&instance.world,&interface.model,sizeof(instance.world)); + instance.packedGeo = m_renderer->getGeometries().data() + interface.gcIndex; + m_renderer->render(cb,viewParams); + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + { + cb->beginDebugMarker("UISampleApp IMGUI Frame"); + { + auto scRes = static_cast(m_surface->getSwapchainResources()); + const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = { - ImGui::Text("Using gizmo"); - } - else + .framebuffer = scRes->getFramebuffer(device_base_t::getCurrentAcquire().imageIndex), + .colorClearValues = &clearValue, + .depthStencilClearValues = nullptr, + .renderArea = { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + } + }; + beginRenderpass(cb,renderpassInfo); + } + // draw ImGUI + { + auto* imgui = interface.imGUI.get(); + auto* pipeline = imgui->getPipeline(); + cb->bindGraphicsPipeline(pipeline); + // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx + const auto* ds = interface.subAllocDS->getDescriptorSet(); + cb->bindDescriptorSets(EPBP_GRAPHICS,pipeline->getLayout(),imgui->getCreationParameters().resources.texturesInfo.setIx,1u,&ds); + // a timepoint in the future to release streaming resources for geometry + const ISemaphore::SWaitInfo drawFinished = {.semaphore=m_semaphore.get(),.value=m_realFrameIx+1u}; + if (!imgui->render(cb,drawFinished)) { - ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); - ImGui::SameLine(); - ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); - ImGui::SameLine(); - ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); - ImGui::SameLine(); - ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + m_logger->log("TODO: need to present acquired image before bailing because its already acquired.",ILogger::ELL_ERROR); + return {}; } - ImGui::Separator(); - - /* - * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout - * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection - - - VIEW: - - ImGuizmo - - | X[0] Y[0] Z[0] 0.0f | - | X[1] Y[1] Z[1] 0.0f | - | X[2] Y[2] Z[2] 0.0f | - | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | - - Nabla - - | X[0] X[1] X[2] -Dot(X, eye) | - | Y[0] Y[1] Y[2] -Dot(Y, eye) | - | Z[0] Z[1] Z[2] -Dot(Z, eye) | - - = transpose(nbl::core::matrix4SIMD()) - - - PERSPECTIVE [PROJECTION CASE]: - - ImGuizmo + } + cb->endRenderPass(); + cb->endDebugMarker(); + } + cb->end(); - | (temp / temp2) (0.0) (0.0) (0.0) | - | (0.0) (temp / temp3) (0.0) (0.0) | - | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | - | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | + //updateGUIDescriptorSet(); - Nabla + IQueue::SSubmitInfo::SSemaphoreInfo retval = + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_GRAPHICS_BITS + }; + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cb } + }; + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { + { + .semaphore = device_base_t::getCurrentAcquire().semaphore, + .value = device_base_t::getCurrentAcquire().acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = {&retval,1} + } + }; + + if (getGraphicsQueue()->submit(infos) != IQueue::RESULT::SUCCESS) + { + retval.semaphore = nullptr; // so that we don't wait on semaphore that will never signal + m_realFrameIx--; + } - | w (0.0) (0.0) (0.0) | - | (0.0) -h (0.0) (0.0) | - | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | - | (0.0) (0.0) (-1.0) (0.0) | - = transpose() + m_window->setCaption("[Nabla Engine] UI App Test Demo"); + return retval; + } - * - * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, - * note it also modifies input view matrix but projection matrix is immutable - */ + protected: + const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const override + { + // Subsequent submits don't wait for each other, but they wait for acquire and get waited on by present + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = { + // don't want any writes to be available, we'll clear, only thing to worry about is the layout transition + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // should sync against the semaphore wait anyway + .srcAccessMask = ACCESS_FLAGS::NONE, + // layout transition needs to finish before the color write + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + // leave view offsets and flags default + }, + // want layout transition to begin after all color output is done + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = { + // last place where the color can get modified, depth is implicitly earlier + .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + // only write ops, reads can't be made available + .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + // spec says nothing is needed when presentation is the destination + } + // leave view offsets and flags default + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + return dependencies; + } - static struct - { - core::matrix4SIMD view, projection, model; - } imguizmoM16InOut; + private: + inline void update(const std::chrono::microseconds nextPresentationTimestamp) + { + auto& camera = interface.camera; + camera.setMoveSpeed(interface.moveSpeed); + camera.setRotateSpeed(interface.rotateSpeed); - ImGuizmo::SetID(0u); - imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix())); - imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix()); - imguizmoM16InOut.model = core::transpose(core::matrix4SIMD(pass.scene->object.model)); - { - if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates - imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ + m_inputSystem->getDefaultMouse(&mouse); + m_inputSystem->getDefaultKeyboard(&keyboard); - transformParams.editTransformDecomposition = true; - EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams); - } + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } uiEvents; - // to Nabla + update camera & model matrices - const auto& view = camera.getViewMatrix(); - const auto& projection = camera.getProjectionMatrix(); + // TODO: should be a member really + static std::chrono::microseconds previousEventTimestamp{}; - // TODO: make it more nicely - const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) - camera.setProjectionMatrix(projection); // update concatanated matrix + // I think begin/end should always be called on camera, just events shouldn't be fed, why? + // If you stop begin/end, whatever keys were up/down get their up/down values frozen leading to + // `perActionDt` becoming obnoxiously large the first time the even processing resumes due to + // `timeDiff` being computed since `lastVirtualUpTimeStamp` + camera.beginInputProcessing(nextPresentationTimestamp); + { + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { - static nbl::core::matrix3x4SIMD modelView, normal; - static nbl::core::matrix4SIMD modelViewProjection; + if (interface.move) + camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl - auto& hook = pass.scene->object; - hook.model = core::transpose(imguizmoM16InOut.model).extractSub3x4(); + for (const auto& e : events) // here capture { - const auto& references = pass.scene->getResources().objects; - const auto type = static_cast(gcIndex); - - const auto& [gpu, meta] = references[type]; - hook.meta.type = type; - hook.meta.name = meta.name; - } - - auto& ubo = hook.viewParameters; + if (e.timeStamp < previousEventTimestamp) + continue; - modelView = nbl::core::concatenateBFollowedByA(view, hook.model); - modelView.getSub3x3InverseTranspose(normal); - modelViewProjection = nbl::core::concatenateBFollowedByA(camera.getConcatenatedMatrix(), hook.model); + previousEventTimestamp = e.timeStamp; + uiEvents.mouse.emplace_back(e); - memcpy(ubo.MVP, modelViewProjection.pointer(), sizeof(ubo.MVP)); - memcpy(ubo.MV, modelView.pointer(), sizeof(ubo.MV)); - memcpy(ubo.NormalMat, normal.pointer(), sizeof(ubo.NormalMat)); - - // object meta display - { - ImGui::Begin("Object"); - ImGui::Text("type: \"%s\"", hook.meta.name.data()); - ImGui::End(); + if (e.type==nbl::ui::SMouseEvent::EET_SCROLL && m_renderer) + { + interface.gcIndex += int16_t(core::sign(e.scrollEvent.verticalScroll)); + interface.gcIndex = core::clamp(interface.gcIndex,0ull,m_renderer->getGeometries().size()-1); + } } - } - - // view matrices editor + }, + m_logger.get() + ); + keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { - ImGui::Begin("Matrices"); + if (interface.move) + camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl - auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true) + for (const auto& e : events) // here capture { - ImGui::Text(topText); - if (ImGui::BeginTable(tableName, columns)) - { - for (int y = 0; y < rows; ++y) - { - ImGui::TableNextRow(); - for (int x = 0; x < columns; ++x) - { - ImGui::TableSetColumnIndex(x); - ImGui::Text("%.3f", *(pointer + (y * columns) + x)); - } - } - ImGui::EndTable(); - } + if (e.timeStamp < previousEventTimestamp) + continue; - if (withSeparator) - ImGui::Separator(); - }; + previousEventTimestamp = e.timeStamp; + uiEvents.keyboard.emplace_back(e); + } + }, + m_logger.get() + ); + } + camera.endInputProcessing(nextPresentationTimestamp); - addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, pass.scene->object.model.pointer()); - addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer()); - addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); - ImGui::End(); - } + ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = float32_t2(cursorPosition.x,cursorPosition.y) - float32_t2(m_window->getX(),m_window->getY()), + .displaySize = {m_window->getWidth(),m_window->getHeight()}, + .mouseEvents = uiEvents.mouse, + .keyboardEvents = uiEvents.keyboard + }; - // Nabla Imgui backend MDI buffer info - // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, - // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. - { - auto* streaminingBuffer = pass.ui.manager->getStreamingBuffer(); + interface.objectName = m_scene->getInitParams().geometryNames[interface.gcIndex]; + interface.imGUI->update(params); + } - const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested - const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available - const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer + void recreateFramebuffer(const uint16_t2 resolution) + { + auto createImageAndView = [&](E_FORMAT format)->smart_refctd_ptr + { + auto image = m_device->createImage({{ + .type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = format, + .extent = {resolution.x,resolution.y,1}, + .mipLevels = 1, + .arrayLayers = 1, + .usage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT|IGPUImage::EUF_SAMPLED_BIT + }}); + if (!m_device->allocate(image->getMemoryReqs(),image.get()).isValid()) + return nullptr; + IGPUImageView::SCreationParams params = { + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = format + }; + params.subresourceRange.aspectMask = isDepthOrStencilFormat(format) ? IGPUImage::EAF_DEPTH_BIT:IGPUImage::EAF_COLOR_BIT; + return m_device->createImageView(std::move(params)); + }; + + smart_refctd_ptr colorView; + // detect window minimization + if (resolution.x<0x4000 && resolution.y<0x4000) + { + colorView = createImageAndView(finalSceneRenderFormat); + auto depthView = createImageAndView(sceneRenderDepthFormat); + m_framebuffer = m_device->createFramebuffer({ { + .renderpass = m_renderpass, + .depthStencilAttachments = &depthView.get(), + .colorAttachments = &colorView.get(), + .width = resolution.x, + .height = resolution.y + }}); + } + else + m_framebuffer = nullptr; - float freePercentage = 100.0f * (float)(freeSize) / (float)total; - float allocatedPercentage = (float)(consumedMemory) / (float)total; + // release previous slot and its image + interface.subAllocDS->multi_deallocate(0,1,&interface.renderColorViewDescIndex,{.semaphore=m_semaphore.get(),.value=m_realFrameIx}); + // + if (colorView) + { + interface.subAllocDS->multi_allocate(0,1,&interface.renderColorViewDescIndex); + // update descriptor set + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = colorView; + info.info.image.imageLayout = IGPUImage::LAYOUT::READ_ONLY_OPTIMAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = { + .dstSet = interface.subAllocDS->getDescriptorSet(), + .binding = TexturesImGUIBindingIndex, + .arrayElement = interface.renderColorViewDescIndex, + .count = 1, + .info = &info + }; + m_device->updateDescriptorSets({&write,1},{}); + } + interface.transformParams.sceneTexDescIx = interface.renderColorViewDescIndex; + } - ImVec2 barSize = ImVec2(400, 30); - float windowPadding = 10.0f; - float verticalPadding = ImGui::GetStyle().FramePadding.y; + inline void beginRenderpass(IGPUCommandBuffer* cb, const IGPUCommandBuffer::SRenderpassBeginInfo& info) + { + cb->beginRenderPass(info,IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + cb->setScissor(0,1,&info.renderArea); + const SViewport viewport = { + .x = 0, + .y = 0, + .width = static_cast(info.renderArea.extent.width), + .height = static_cast(info.renderArea.extent.height) + }; + cb->setViewport(0u,1u,&viewport); + } - ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); - ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline auto sceneRenderDepthFormat = EF_D32_SFLOAT; + constexpr static inline auto finalSceneRenderFormat = EF_R8G8B8A8_SRGB; + constexpr static inline auto TexturesImGUIBindingIndex = 0u; + // we create the Descriptor Set with a few slots extra to spare, so we don't have to `waitIdle` the device whenever ImGUI virtual window resizes + constexpr static inline auto MaxImGUITextures = 2u+MaxFramesInFlight; + + // + smart_refctd_ptr m_scene; + smart_refctd_ptr m_renderpass; + smart_refctd_ptr m_renderer; + smart_refctd_ptr m_framebuffer; + // + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + std::array,MaxFramesInFlight> m_cmdBufs; + // + InputSystem::ChannelReader mouse; + InputSystem::ChannelReader keyboard; + // UI stuff + struct CInterface + { + void operator()() + { + ImGuiIO& io = ImGui::GetIO(); - ImGui::Text("Total Allocated Size: %zu bytes", total); - ImGui::Text("In use: %zu bytes", consumedMemory); - ImGui::Text("Buffer Usage:"); + // TODO: why is this a lambda and not just an assignment in a scope ? + camera.setProjectionMatrix([&]() + { + matrix4SIMD projection; - ImGui::SetCursorPosX(windowPadding); + if (isPerspective) + if(isLH) + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH(core::radians(fov), io.DisplaySize.x / io.DisplaySize.y, zNear, zFar); + else + { + float viewHeight = viewWidth * io.DisplaySize.y / io.DisplaySize.x; - if (freePercentage > 70.0f) - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green - else if (freePercentage > 30.0f) - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + if(isLH) + projection = matrix4SIMD::buildProjectionMatrixOrthoLH(viewWidth, viewHeight, zNear, zFar); else - ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red + projection = matrix4SIMD::buildProjectionMatrixOrthoRH(viewWidth, viewHeight, zNear, zFar); + } - ImGui::ProgressBar(allocatedPercentage, barSize, ""); + return projection; + }()); - ImGui::PopStyleColor(); + ImGuizmo::SetOrthographic(false); + ImGuizmo::BeginFrame(); - ImDrawList* drawList = ImGui::GetWindowDrawList(); + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); - ImVec2 progressBarPos = ImGui::GetItemRectMin(); - ImVec2 progressBarSize = ImGui::GetItemRectSize(); + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Editor"); - const char* text = "%.2f%% free"; - char textBuffer[64]; - snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); + if (ImGui::RadioButton("Full view", !transformParams.useWindow)) + transformParams.useWindow = false; - ImVec2 textSize = ImGui::CalcTextSize(textBuffer); - ImVec2 textPos = ImVec2 - ( - progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, - progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f - ); + ImGui::SameLine(); - ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); - drawList->AddRectFilled - ( - ImVec2(textPos.x - 5, textPos.y - 2), - ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), - ImGui::GetColorU32(bgColor) - ); + if (ImGui::RadioButton("Window", transformParams.useWindow)) + transformParams.useWindow = true; - ImGui::SetCursorScreenPos(textPos); - ImGui::Text("%s", textBuffer); + ImGui::Text("Camera"); + bool viewDirty = false; - ImGui::Dummy(ImVec2(0.0f, verticalPadding)); + if (ImGui::RadioButton("LH", isLH)) + isLH = true; - ImGui::End(); - } + ImGui::SameLine(); - ImGui::End(); - } - ); + if (ImGui::RadioButton("RH", !isLH)) + isLH = false; - m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); - m_surface->recreateSwapchain(); - m_winMgr->show(m_window.get()); - oracle.reportBeginFrameRecord(); - camera.mapKeysToArrows(); + if (ImGui::RadioButton("Perspective", isPerspective)) + isPerspective = true; - return true; - } + ImGui::SameLine(); - bool updateGUIDescriptorSet() - { - // texture atlas + our scene texture, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout - static std::array descriptorInfo; - static IGPUDescriptorSet::SWriteDescriptorSet writes[TexturesAmount]; + if (ImGui::RadioButton("Orthographic", !isPerspective)) + isPerspective = false; - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = core::smart_refctd_ptr(pass.ui.manager->getFontAtlasView()); + ImGui::Checkbox("Enable \"view manipulate\"", &transformParams.enableViewManipulate); + ImGui::Checkbox("Enable camera movement", &move); + ImGui::SliderFloat("Move speed", &moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &rotateSpeed, 0.1f, 10.f); - descriptorInfo[OfflineSceneTextureIx].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfo[OfflineSceneTextureIx].desc = pass.scene->getResources().attachments.color; + // ImGui::Checkbox("Flip Gizmo's Y axis", &flipGizmoY); // let's not expose it to be changed in UI but keep the logic in case - for (uint32_t i = 0; i < descriptorInfo.size(); ++i) - { - writes[i].dstSet = pass.ui.descriptorSet.get(); - writes[i].binding = 0u; - writes[i].arrayElement = i; - writes[i].count = 1u; - } - writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; - writes[OfflineSceneTextureIx].info = descriptorInfo.data() + OfflineSceneTextureIx; + if (isPerspective) + ImGui::SliderFloat("Fov", &fov, 20.f, 150.f); + else + ImGui::SliderFloat("Ortho width", &viewWidth, 1, 20); - return m_device->updateDescriptorSets(writes, {}); - } + ImGui::SliderFloat("zNear", &zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &zFar, 110.f, 10000.f); - inline void workLoopBody() override - { - // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. - const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); - // We block for semaphores for 2 reasons here: - // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] - // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] - if (m_realFrameIx >= framesInFlight) - { - const ISemaphore::SWaitInfo cbDonePending[] = + viewDirty |= ImGui::SliderFloat("Distance", &transformParams.camDistance, 1.f, 69.f); + + if (viewDirty || firstFrame) { - { - .semaphore = m_semaphore.get(), - .value = m_realFrameIx + 1 - framesInFlight - } - }; - if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) - return; - } + core::vectorSIMDf cameraPosition(cosf(camYAngle)* cosf(camXAngle)* transformParams.camDistance, sinf(camXAngle)* transformParams.camDistance, sinf(camYAngle)* cosf(camXAngle)* transformParams.camDistance); + core::vectorSIMDf cameraTarget(0.f, 0.f, 0.f); + const static core::vectorSIMDf up(0.f, 1.f, 0.f); - const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + camera.setPosition(cameraPosition); + camera.setTarget(cameraTarget); + camera.setBackupUpVector(up); - // CPU events - update(); + camera.recomputeViewMatrix(); + } + firstFrame = false; - // render whole scene to offline frame buffer & submit - pass.scene->begin(); - { - pass.scene->update(); - pass.scene->record(); - pass.scene->end(); - } - pass.scene->submit(); + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + if (ImGuizmo::IsUsing()) + { + ImGui::Text("Using gizmo"); + } + else + { + ImGui::Text(ImGuizmo::IsOver() ? "Over gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::TRANSLATE) ? "Over translate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::ROTATE) ? "Over rotate gizmo" : ""); + ImGui::SameLine(); + ImGui::Text(ImGuizmo::IsOver(ImGuizmo::SCALE) ? "Over scale gizmo" : ""); + } + ImGui::Separator(); - auto* const cb = m_cmdBufs.data()[resourceIx].get(); - cb->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cb->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - cb->beginDebugMarker("UISampleApp IMGUI Frame"); + /* + * ImGuizmo expects view & perspective matrix to be column major both with 4x4 layout + * and Nabla uses row major matricies - 3x4 matrix for view & 4x4 for projection - auto* queue = getGraphicsQueue(); + - VIEW: - asset::SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - } - cb->setViewport(0u, 1u, &viewport); + ImGuizmo - const VkRect2D currentRenderArea = - { - .offset = {0,0}, - .extent = {m_window->getWidth(),m_window->getHeight()} - }; + | X[0] Y[0] Z[0] 0.0f | + | X[1] Y[1] Z[1] 0.0f | + | X[2] Y[2] Z[2] 0.0f | + | -Dot(X, eye) -Dot(Y, eye) -Dot(Z, eye) 1.0f | - IQueue::SSubmitInfo::SCommandBufferInfo commandBuffersInfo[] = {{.cmdbuf = cb }}; + Nabla - // UI render pass - { - auto scRes = static_cast(m_surface->getSwapchainResources()); - const IGPUCommandBuffer::SRenderpassBeginInfo renderpassInfo = - { - .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), - .colorClearValues = &clear.color, - .depthStencilClearValues = nullptr, - .renderArea = currentRenderArea - }; - nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + | X[0] X[1] X[2] -Dot(X, eye) | + | Y[0] Y[1] Y[2] -Dot(Y, eye) | + | Z[0] Z[1] Z[2] -Dot(Z, eye) | - cb->beginRenderPass(renderpassInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - const auto uiParams = pass.ui.manager->getCreationParameters(); - auto* pipeline = pass.ui.manager->getPipeline(); - cb->bindGraphicsPipeline(pipeline); - cb->bindDescriptorSets(EPBP_GRAPHICS, pipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &pass.ui.descriptorSet.get()); // note that we use default UI pipeline layout where uiParams.resources.textures.setIx == uiParams.resources.samplers.setIx - - if (!keepRunning()) - return; - - if (!pass.ui.manager->render(cb,waitInfo)) - { - // TODO: need to present acquired image before bailing because its already acquired - return; - } - cb->endRenderPass(); - } - cb->end(); - { - const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = - { - { - .semaphore = m_semaphore.get(), - .value = ++m_realFrameIx, - .stageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT - } - }; + = transpose(nbl::core::matrix4SIMD()) - { - { - const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = - { - { - .semaphore = m_currentImageAcquire.semaphore, - .value = m_currentImageAcquire.acquireCount, - .stageMask = PIPELINE_STAGE_FLAGS::NONE - } - }; - - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = acquired, - .commandBuffers = commandBuffersInfo, - .signalSemaphores = rendered - } - }; - - const nbl::video::ISemaphore::SWaitInfo waitInfos[] = - { { - .semaphore = pass.scene->semaphore.progress.get(), - .value = pass.scene->semaphore.finishedValue - } }; - - m_device->blockForSemaphores(waitInfos); - - updateGUIDescriptorSet(); - - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - m_realFrameIx--; - } - } + - PERSPECTIVE [PROJECTION CASE]: - m_window->setCaption("[Nabla Engine] UI App Test Demo"); - m_surface->present(m_currentImageAcquire.imageIndex, rendered); - } - } + ImGuizmo - inline bool keepRunning() override - { - if (m_surface->irrecoverable()) - return false; + | (temp / temp2) (0.0) (0.0) (0.0) | + | (0.0) (temp / temp3) (0.0) (0.0) | + | ((right + left) / temp2) ((top + bottom) / temp3) ((-zfar - znear) / temp4) (-1.0f) | + | (0.0) (0.0) ((-temp * zfar) / temp4) (0.0) | - return true; - } + Nabla - inline bool onAppTerminated() override - { - return device_base_t::onAppTerminated(); - } + | w (0.0) (0.0) (0.0) | + | (0.0) -h (0.0) (0.0) | + | (0.0) (0.0) (-zFar/(zFar-zNear)) (-zNear*zFar/(zFar-zNear)) | + | (0.0) (0.0) (-1.0) (0.0) | - inline void update() - { - camera.setMoveSpeed(moveSpeed); - camera.setRotateSpeed(rotateSpeed); + = transpose() - static std::chrono::microseconds previousEventTimestamp{}; + * + * the ViewManipulate final call (inside EditTransform) returns world space column major matrix for an object, + * note it also modifies input view matrix but projection matrix is immutable + */ - m_inputSystem->getDefaultMouse(&mouse); - m_inputSystem->getDefaultKeyboard(&keyboard); - - auto updatePresentationTimestamp = [&]() - { - m_currentImageAcquire = m_surface->acquireNextImage(); - - oracle.reportEndFrameRecord(); - const auto timestamp = oracle.getNextPresentationTimeStamp(); - oracle.reportBeginFrameRecord(); +// TODO: do all computation using `hlsl::matrix` and its `hlsl::float32_tNxM` aliases + static struct + { + core::matrix4SIMD view, projection, model; + } imguizmoM16InOut; - return timestamp; - }; + ImGuizmo::SetID(0u); - const auto nextPresentationTimestamp = updatePresentationTimestamp(); + imguizmoM16InOut.view = core::transpose(matrix4SIMD(camera.getViewMatrix())); + imguizmoM16InOut.projection = core::transpose(camera.getProjectionMatrix()); + imguizmoM16InOut.model = core::transpose(matrix4SIMD(model)); + { + if (flipGizmoY) // note we allow to flip gizmo just to match our coordinates + imguizmoM16InOut.projection[1][1] *= -1.f; // https://johannesugb.github.io/gpu-programming/why-do-opengl-proj-matrices-fail-in-vulkan/ - struct - { - std::vector mouse{}; - std::vector keyboard{}; - } capturedEvents; + transformParams.editTransformDecomposition = true; + sceneResolution = EditTransform(imguizmoM16InOut.view.pointer(), imguizmoM16InOut.projection.pointer(), imguizmoM16InOut.model.pointer(), transformParams); + } - if (move) camera.beginInputProcessing(nextPresentationTimestamp); - { - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + model = core::transpose(imguizmoM16InOut.model).extractSub3x4(); + // to Nabla + update camera & model matrices +// TODO: make it more nicely, extract: +// - Position by computing inverse of the view matrix and grabbing its translation +// - Target from 3rd row without W component of view matrix multiplied by some arbitrary distance value (can be the length of position from origin) and adding the position +// But then set the view matrix this way anyway, because up-vector may not be compatible + const auto& view = camera.getViewMatrix(); + const_cast(view) = core::transpose(imguizmoM16InOut.view).extractSub3x4(); // a hack, correct way would be to use inverse matrix and get position + target because now it will bring you back to last position & target when switching from gizmo move to manual move (but from manual to gizmo is ok) + // update concatanated matrix + const auto& projection = camera.getProjectionMatrix(); + camera.setProjectionMatrix(projection); + + // object meta display { - if (move) - camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + ImGui::Begin("Object"); + ImGui::Text("type: \"%s\"", objectName.data()); + ImGui::End(); + } + + // view matrices editor + { + ImGui::Begin("Matrices"); - for (const auto& e : events) // here capture + auto addMatrixTable = [&](const char* topText, const char* tableName, const int rows, const int columns, const float* pointer, const bool withSeparator = true) { - if (e.timeStamp < previousEventTimestamp) - continue; + ImGui::Text(topText); + if (ImGui::BeginTable(tableName, columns)) + { + for (int y = 0; y < rows; ++y) + { + ImGui::TableNextRow(); + for (int x = 0; x < columns; ++x) + { + ImGui::TableSetColumnIndex(x); + ImGui::Text("%.3f", *(pointer + (y * columns) + x)); + } + } + ImGui::EndTable(); + } - previousEventTimestamp = e.timeStamp; - capturedEvents.mouse.emplace_back(e); + if (withSeparator) + ImGui::Separator(); + }; - if (e.type == nbl::ui::SMouseEvent::EET_SCROLL) - gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(e.scrollEvent.verticalScroll)), int64_t(0), int64_t(OT_COUNT - (uint8_t)1u)); - } - }, m_logger.get()); + addMatrixTable("Model Matrix", "ModelMatrixTable", 3, 4, model.pointer()); + addMatrixTable("Camera View Matrix", "ViewMatrixTable", 3, 4, view.pointer()); + addMatrixTable("Camera View Projection Matrix", "ViewProjectionMatrixTable", 4, 4, projection.pointer(), false); - keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + ImGui::End(); + } + + // Nabla Imgui backend MDI buffer info + // To be 100% accurate and not overly conservative we'd have to explicitly `cull_frees` and defragment each time, + // so unless you do that, don't use this basic info to optimize the size of your IMGUI buffer. { - if (move) - camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + auto* streaminingBuffer = imGUI->getStreamingBuffer(); - for (const auto& e : events) // here capture - { - if (e.timeStamp < previousEventTimestamp) - continue; + const size_t total = streaminingBuffer->get_total_size(); // total memory range size for which allocation can be requested + const size_t freeSize = streaminingBuffer->getAddressAllocator().get_free_size(); // max total free bloock memory size we can still allocate from total memory available + const size_t consumedMemory = total - freeSize; // memory currently consumed by streaming buffer - previousEventTimestamp = e.timeStamp; - capturedEvents.keyboard.emplace_back(e); - } - }, m_logger.get()); - } - if (move) camera.endInputProcessing(nextPresentationTimestamp); + float freePercentage = 100.0f * (float)(freeSize) / (float)total; + float allocatedPercentage = (float)(consumedMemory) / (float)total; - const auto cursorPosition = m_window->getCursorControl()->getPosition(); + ImVec2 barSize = ImVec2(400, 30); + float windowPadding = 10.0f; + float verticalPadding = ImGui::GetStyle().FramePadding.y; - nbl::ext::imgui::UI::SUpdateParameters params = - { - .mousePosition = nbl::hlsl::float32_t2(cursorPosition.x, cursorPosition.y) - nbl::hlsl::float32_t2(m_window->getX(), m_window->getY()), - .displaySize = { m_window->getWidth(), m_window->getHeight() }, - .mouseEvents = { capturedEvents.mouse.data(), capturedEvents.mouse.size() }, - .keyboardEvents = { capturedEvents.keyboard.data(), capturedEvents.keyboard.size() } - }; + ImGui::SetNextWindowSize(ImVec2(barSize.x + 2 * windowPadding, 110 + verticalPadding), ImGuiCond_Always); + ImGui::Begin("Nabla Imgui MDI Buffer Info", nullptr, ImGuiWindowFlags_NoResize | ImGuiWindowFlags_NoScrollbar); - pass.ui.manager->update(params); - } + ImGui::Text("Total Allocated Size: %zu bytes", total); + ImGui::Text("In use: %zu bytes", consumedMemory); + ImGui::Text("Buffer Usage:"); - private: - // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers - constexpr static inline uint32_t MaxFramesInFlight = 3u; + ImGui::SetCursorPosX(windowPadding); - smart_refctd_ptr m_window; - smart_refctd_ptr> m_surface; - smart_refctd_ptr m_pipeline; - smart_refctd_ptr m_semaphore; - smart_refctd_ptr m_cmdPool; - uint64_t m_realFrameIx = 0; - std::array, MaxFramesInFlight> m_cmdBufs; - ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + if (freePercentage > 70.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(0.0f, 1.0f, 0.0f, 0.4f)); // Green + else if (freePercentage > 30.0f) + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 1.0f, 0.0f, 0.4f)); // Yellow + else + ImGui::PushStyleColor(ImGuiCol_PlotHistogram, ImVec4(1.0f, 0.0f, 0.0f, 0.4f)); // Red - smart_refctd_ptr m_assetManager; - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; + ImGui::ProgressBar(allocatedPercentage, barSize, ""); - constexpr static inline auto TexturesAmount = 2u; + ImGui::PopStyleColor(); - core::smart_refctd_ptr m_descriptorSetPool; + ImDrawList* drawList = ImGui::GetWindowDrawList(); - struct C_UI - { - nbl::core::smart_refctd_ptr manager; + ImVec2 progressBarPos = ImGui::GetItemRectMin(); + ImVec2 progressBarSize = ImGui::GetItemRectSize(); - struct - { - core::smart_refctd_ptr gui, scene; - } samplers; + const char* text = "%.2f%% free"; + char textBuffer[64]; + snprintf(textBuffer, sizeof(textBuffer), text, freePercentage); - core::smart_refctd_ptr descriptorSet; - }; + ImVec2 textSize = ImGui::CalcTextSize(textBuffer); + ImVec2 textPos = ImVec2 + ( + progressBarPos.x + (progressBarSize.x - textSize.x) * 0.5f, + progressBarPos.y + (progressBarSize.y - textSize.y) * 0.5f + ); - struct E_APP_PASS - { - nbl::core::smart_refctd_ptr scene; - C_UI ui; - } pass; + ImVec4 bgColor = ImGui::GetStyleColorVec4(ImGuiCol_WindowBg); + drawList->AddRectFilled + ( + ImVec2(textPos.x - 5, textPos.y - 2), + ImVec2(textPos.x + textSize.x + 5, textPos.y + textSize.y + 2), + ImGui::GetColorU32(bgColor) + ); - Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); - video::CDumbPresentationOracle oracle; + ImGui::SetCursorScreenPos(textPos); + ImGui::Text("%s", textBuffer); - uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + ImGui::Dummy(ImVec2(0.0f, verticalPadding)); - TransformRequestParams transformParams; - bool isPerspective = true, isLH = true, flipGizmoY = true, move = false; - float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; - float viewWidth = 10.f; - float camYAngle = 165.f / 180.f * 3.14159f; - float camXAngle = 32.f / 180.f * 3.14159f; + ImGui::End(); + } + + ImGui::End(); + } - bool firstFrame = true; + smart_refctd_ptr imGUI; + // descriptor set + smart_refctd_ptr subAllocDS; + SubAllocatedDescriptorSet::value_type renderColorViewDescIndex = SubAllocatedDescriptorSet::invalid_value; + // + Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + // mutables + core::matrix3x4SIMD model; + std::string_view objectName; + TransformRequestParams transformParams; + uint16_t2 sceneResolution = {1280,720}; + float fov = 60.f, zNear = 0.1f, zFar = 10000.f, moveSpeed = 1.f, rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + uint16_t gcIndex = {}; // note: this is dirty however since I assume only single object in scene I can leave it now, when this example is upgraded to support multiple objects this needs to be changed + bool isPerspective = true, isLH = true, flipGizmoY = true, move = false; + bool firstFrame = true; + } interface; }; NBL_MAIN_FUNC(UISampleApp) \ No newline at end of file diff --git a/62_CAD/CTriangleMesh.cpp b/62_CAD/CTriangleMesh.cpp new file mode 100644 index 000000000..5564c0a51 --- /dev/null +++ b/62_CAD/CTriangleMesh.cpp @@ -0,0 +1 @@ +#include "CTriangleMesh.h" \ No newline at end of file diff --git a/62_CAD/CTriangleMesh.h b/62_CAD/CTriangleMesh.h new file mode 100644 index 000000000..78f7dd99f --- /dev/null +++ b/62_CAD/CTriangleMesh.h @@ -0,0 +1,137 @@ +#pragma once + +#include +#include +#include "shaders/globals.hlsl" + +using namespace nbl; + +struct DTMHeightShadingSettingsInfo +{ + // Height Shading Mode + E_HEIGHT_SHADING_MODE heightShadingMode; + + // Used as fixed interval length for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + float intervalLength; + + // Converts an interval index to its corresponding height value + // For example, if this value is 10.0, then an interval index of 2 corresponds to a height of 20.0. + // This computed height is later used to determine the interpolated color for shading. + // It makes sense for this variable to be always equal to `intervalLength` but sometimes it's a different scaling so that last index corresponds to largestHeight + float intervalIndexToHeightMultiplier; + + // Used for "DISCRETE_FIXED_LENGTH_INTERVALS" shading mode + // If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the + // first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. + // Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans + // [minHeight, minHeight + intervalLength]. + bool isCenteredShading; + + void addHeightColorMapEntry(float height, float32_t4 color) + { + heightColorSet.emplace(height, color); + } + + bool fillShaderDTMSettingsHeightColorMap(DTMSettings& dtmSettings) const + { + const uint32_t mapSize = heightColorSet.size(); + if (mapSize > DTMHeightShadingSettings::HeightColorMapMaxEntries) + return false; + dtmSettings.heightShadingSettings.heightColorEntryCount = mapSize; + + int index = 0; + for (auto it = heightColorSet.begin(); it != heightColorSet.end(); ++it) + { + dtmSettings.heightShadingSettings.heightColorMapHeights[index] = it->height; + dtmSettings.heightShadingSettings.heightColorMapColors[index] = it->color; + ++index; + } + + return true; + } + +private: + struct HeightColor + { + float height; + float32_t4 color; + + bool operator<(const HeightColor& other) const + { + return height < other.height; + } + }; + + std::set heightColorSet; +}; + +struct DTMContourSettingsInfo +{ + LineStyleInfo lineStyleInfo; + + float startHeight; + float endHeight; + float heightInterval; +}; + +struct DTMSettingsInfo +{ + static constexpr uint32_t MaxContourSettings = DTMSettings::MaxContourSettings; + + uint32_t mode = 0u; // related to E_DTM_MODE + + // outline + LineStyleInfo outlineStyleInfo; + // contours + uint32_t contourSettingsCount = 0u; + DTMContourSettingsInfo contourSettings[MaxContourSettings]; + // height shading + DTMHeightShadingSettingsInfo heightShadingInfo; +}; + +class CTriangleMesh final +{ +public: + using index_t = uint32_t; + using vertex_t = TriangleMeshVertex; + + inline void setVertices(core::vector&& vertices) + { + m_vertices = std::move(vertices); + } + inline void setIndices(core::vector&& indices) + { + m_indices = std::move(indices); + } + + inline const core::vector& getVertices() const + { + return m_vertices; + } + inline const core::vector& getIndices() const + { + return m_indices; + } + + inline size_t getVertexBuffByteSize() const + { + return sizeof(vertex_t) * m_vertices.size(); + } + inline size_t getIndexBuffByteSize() const + { + return sizeof(index_t) * m_indices.size(); + } + inline size_t getIndexCount() const + { + return m_indices.size(); + } + + inline void clear() + { + m_vertices.clear(); + m_indices.clear(); + } + + core::vector m_vertices; + core::vector m_indices; +}; \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.cpp b/62_CAD/DrawResourcesFiller.cpp index 7cf96d693..ec5058232 100644 --- a/62_CAD/DrawResourcesFiller.cpp +++ b/62_CAD/DrawResourcesFiller.cpp @@ -3,10 +3,13 @@ DrawResourcesFiller::DrawResourcesFiller() {} -DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue) : - m_utilities(utils), - m_copyQueue(copyQueue) -{} +DrawResourcesFiller::DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger) : + m_utilities(std::move(utils)), + m_copyQueue(copyQueue), + m_logger(std::move(logger)) +{ + imagesCache = std::unique_ptr(new ImagesCache(ImagesBindingArraySize)); +} // function is called when buffer is filled and we should submit draws and clear the buffers and continue filling @@ -15,116 +18,148 @@ void DrawResourcesFiller::setSubmitDrawsFunction(const SubmitFunc& func) submitDraws = func; } -void DrawResourcesFiller::allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t maxIndices) +// DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding +void DrawResourcesFiller::setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding) +{ + imagesArrayBinding = binding; + suballocatedDescriptorSet = core::make_smart_refctd_ptr(std::move(descriptorSet)); +} + +bool DrawResourcesFiller::allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize) { - maxIndexCount = maxIndices; - const size_t indexBufferSize = maxIndices * sizeof(index_buffer_type); - auto indexBuffer = ICPUBuffer::create({ indexBufferSize }); + // single memory allocation sectioned into images+buffers (images start at offset=0) + const size_t adjustedImagesMemorySize = core::alignUp(requiredImageMemorySize, GPUStructsMaxNaturalAlignment); + const size_t adjustedBuffersMemorySize = core::max(requiredBufferMemorySize, getMinimumRequiredResourcesBufferSize()); + const size_t totalResourcesSize = adjustedImagesMemorySize + adjustedBuffersMemorySize; + + IGPUBuffer::SCreationParams resourcesBufferCreationParams = {}; + resourcesBufferCreationParams.size = adjustedBuffersMemorySize; + resourcesBufferCreationParams.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDEX_BUFFER_BIT; + resourcesGPUBuffer = logicalDevice->createBuffer(std::move(resourcesBufferCreationParams)); + resourcesGPUBuffer->setObjectDebugName("drawResourcesBuffer"); + + IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = resourcesGPUBuffer->getMemoryReqs(); + + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuBufferMemoryReqs = resourcesGPUBuffer->getMemoryReqs(); + const bool memoryRequirementsMatch = + (logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits() & gpuBufferMemoryReqs.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuBufferMemoryReqs.requiresDedicatedAllocation == false); // should not require dedicated allocation - index_buffer_type* indices = reinterpret_cast(indexBuffer->getPointer()); - for (uint32_t i = 0u; i < maxIndices / 6u; ++i) + if (!memoryRequirementsMatch) { - index_buffer_type objIndex = i; - indices[i * 6] = objIndex * 4u + 1u; - indices[i * 6 + 1u] = objIndex * 4u + 0u; - indices[i * 6 + 2u] = objIndex * 4u + 2u; - - indices[i * 6 + 3u] = objIndex * 4u + 1u; - indices[i * 6 + 4u] = objIndex * 4u + 2u; - indices[i * 6 + 5u] = objIndex * 4u + 3u; + m_logger.log("Shouldn't happen: Buffer Memory Requires Dedicated Allocation or can't biind to device local memory.", nbl::system::ILogger::ELL_ERROR); + return false; } + + const auto& memoryProperties = logicalDevice->getPhysicalDevice()->getMemoryProperties(); - IGPUBuffer::SCreationParams indexBufferCreationParams = {}; - indexBufferCreationParams.size = indexBufferSize; - indexBufferCreationParams.usage = IGPUBuffer::EUF_INDEX_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; + uint32_t memoryTypeIdx = ~0u; - m_utilities->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue=m_copyQueue}, std::move(indexBufferCreationParams), indices).move_into(gpuDrawBuffers.indexBuffer); - gpuDrawBuffers.indexBuffer->setObjectDebugName("indexBuffer"); -} + video::IDeviceMemoryAllocator::SAllocation allocation = {}; + for (uint32_t i = 0u; i < memoryProperties.memoryTypeCount; ++i) + { + if (memoryProperties.memoryTypes[i].propertyFlags.hasFlags(IDeviceMemoryAllocation::EMPF_DEVICE_LOCAL_BIT)) + { + memoryTypeIdx = i; -void DrawResourcesFiller::allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects) -{ - maxMainObjects = mainObjects; - size_t mainObjectsBufferSize = maxMainObjects * sizeof(MainObject); + IDeviceMemoryAllocator::SAllocateInfo allocationInfo = + { + .size = totalResourcesSize, + .flags = IDeviceMemoryAllocation::E_MEMORY_ALLOCATE_FLAGS::EMAF_DEVICE_ADDRESS_BIT, // for the buffers + .memoryTypeIndex = memoryTypeIdx, + .dedication = nullptr, + }; + + allocation = logicalDevice->allocate(allocationInfo); + + if (allocation.isValid()) + break; + } + } - IGPUBuffer::SCreationParams mainObjectsCreationParams = {}; - mainObjectsCreationParams.size = mainObjectsBufferSize; - mainObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.mainObjectsBuffer = logicalDevice->createBuffer(std::move(mainObjectsCreationParams)); - gpuDrawBuffers.mainObjectsBuffer->setObjectDebugName("mainObjectsBuffer"); + if (memoryTypeIdx == ~0u) + { + m_logger.log("allocateResourcesBuffer: no device local memory type found!", nbl::system::ILogger::ELL_ERROR); + return false; + } - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.mainObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto mainObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.mainObjectsBuffer.get()); + if (!allocation.isValid()) + return false; - cpuDrawBuffers.mainObjectsBuffer = ICPUBuffer::create({ mainObjectsBufferSize }); -} + imagesMemoryArena = { + .memory = allocation.memory, + .offset = allocation.offset, + }; -void DrawResourcesFiller::allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects) -{ - maxDrawObjects = drawObjects; - size_t drawObjectsBufferSize = maxDrawObjects * sizeof(DrawObject); + buffersMemoryArena = { + .memory = allocation.memory, + .offset = core::alignUp(allocation.offset + adjustedImagesMemorySize, GPUStructsMaxNaturalAlignment), // first natural alignment after images section of the memory allocation + }; + + imagesMemorySubAllocator = core::make_smart_refctd_ptr(adjustedImagesMemorySize); - IGPUBuffer::SCreationParams drawObjectsCreationParams = {}; - drawObjectsCreationParams.size = drawObjectsBufferSize; - drawObjectsCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.drawObjectsBuffer = logicalDevice->createBuffer(std::move(drawObjectsCreationParams)); - gpuDrawBuffers.drawObjectsBuffer->setObjectDebugName("drawObjectsBuffer"); + video::ILogicalDevice::SBindBufferMemoryInfo bindBufferMemory = { + .buffer = resourcesGPUBuffer.get(), + .binding = { + .memory = buffersMemoryArena.memory.get(), + .offset = buffersMemoryArena.offset, + } + }; - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.drawObjectsBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto drawObjectsBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.drawObjectsBuffer.get()); + if (!logicalDevice->bindBufferMemory(1, &bindBufferMemory)) + { + m_logger.log("DrawResourcesFiller::allocateDrawResources, bindBufferMemory failed.", nbl::system::ILogger::ELL_ERROR); + return false; + } - cpuDrawBuffers.drawObjectsBuffer = ICPUBuffer::create({ drawObjectsBufferSize }); + return true; } -void DrawResourcesFiller::allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size) +bool DrawResourcesFiller::allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent, uint32_t maxTries) { - maxGeometryBufferSize = size; - - IGPUBuffer::SCreationParams geometryCreationParams = {}; - geometryCreationParams.size = size; - geometryCreationParams.usage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.geometryBuffer = logicalDevice->createBuffer(std::move(geometryCreationParams)); - gpuDrawBuffers.geometryBuffer->setObjectDebugName("geometryBuffer"); + const size_t minimumAcceptableSize = core::max(MinimumDrawResourcesMemorySize, getMinimumRequiredResourcesBufferSize()); - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.geometryBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto geometryBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.geometryBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - geometryBufferAddress = gpuDrawBuffers.geometryBuffer->getDeviceAddress(); + size_t currentBufferSize = maxBufferMemorySize; + size_t currentImageSize = maxImageMemorySize; + const size_t totalInitialSize = currentBufferSize + currentImageSize; - cpuDrawBuffers.geometryBuffer = ICPUBuffer::create({ size }); -} - -void DrawResourcesFiller::allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount) -{ + // If initial size is less than minimum acceptable then increase the buffer and image size to sum up to minimumAcceptableSize with image:buffer ratios preserved + if (totalInitialSize < minimumAcceptableSize) { - maxLineStyles = lineStylesCount; - size_t lineStylesBufferSize = lineStylesCount * sizeof(LineStyle); - - IGPUBuffer::SCreationParams lineStylesCreationParams = {}; - lineStylesCreationParams.size = lineStylesBufferSize; - lineStylesCreationParams.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT; - gpuDrawBuffers.lineStylesBuffer = logicalDevice->createBuffer(std::move(lineStylesCreationParams)); - gpuDrawBuffers.lineStylesBuffer->setObjectDebugName("lineStylesBuffer"); + // Preserve ratio: R = buffer / (buffer + image) + // scaleFactor = minimumAcceptableSize / totalInitialSize; + const double scaleFactor = static_cast(minimumAcceptableSize) / totalInitialSize; + currentBufferSize = static_cast(currentBufferSize * scaleFactor); + currentImageSize = minimumAcceptableSize - currentBufferSize; // ensures exact sum + } - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuDrawBuffers.lineStylesBuffer->getMemoryReqs(); - memReq.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - auto stylesBufferMem = logicalDevice->allocate(memReq, gpuDrawBuffers.lineStylesBuffer.get()); + uint32_t numTries = 0u; + while ((currentBufferSize + currentImageSize) >= minimumAcceptableSize && numTries < maxTries) + { + if (allocateDrawResources(logicalDevice, currentBufferSize, currentImageSize)) + return true; - cpuDrawBuffers.lineStylesBuffer = ICPUBuffer::create({ lineStylesBufferSize }); + currentBufferSize = (currentBufferSize * (100 - reductionPercent)) / 100; + currentImageSize = (currentImageSize * (100 - reductionPercent)) / 100; + numTries++; + m_logger.log("Allocation of memory for images(%zu) and buffers(%zu) failed; Reducing allocation size by %u%% and retrying...", system::ILogger::ELL_WARNING, currentImageSize, currentBufferSize, reductionPercent); } + + m_logger.log("All attempts to allocate memory for images(%zu) and buffers(%zu) failed.", system::ILogger::ELL_ERROR, currentImageSize, currentBufferSize); + return false; } -void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) +bool DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent) { - msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); - msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); - + // TODO: Make this function failable and report insufficient memory asset::E_FORMAT msdfFormat = MSDFTextureFormat; asset::VkExtent3D MSDFsExtent = { msdfsExtent.x, msdfsExtent.y, 1u }; - assert(maxMSDFs <= logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers); - + if (maxMSDFs > logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers) + { + m_logger.log("requested maxMSDFs is greater than maxImageArrayLayers. lowering the limit...", nbl::system::ILogger::ELL_WARNING); + maxMSDFs = logicalDevice->getPhysicalDevice()->getLimits().maxImageArrayLayers; + } + IPhysicalDevice::SImageFormatPromotionRequest promotionRequest = {}; promotionRequest.originalFormat = msdfFormat; promotionRequest.usages = {}; @@ -146,7 +181,10 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui auto image = logicalDevice->createImage(std::move(imgInfo)); auto imageMemReqs = image->getMemoryReqs(); imageMemReqs.memoryTypeBits &= logicalDevice->getPhysicalDevice()->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(imageMemReqs, image.get()); + const auto allocation = logicalDevice->allocate(imageMemReqs, image.get()); + + if (!allocation.isValid()) + return false; image->setObjectDebugName("MSDFs Texture Array"); @@ -163,6 +201,14 @@ void DrawResourcesFiller::allocateMSDFTextures(ILogicalDevice* logicalDevice, ui msdfTextureArray = logicalDevice->createImageView(std::move(imgViewInfo)); } + + if (!msdfTextureArray) + return false; + + msdfLRUCache = std::unique_ptr(new MSDFsLRUCache(maxMSDFs)); + msdfTextureArrayIndexAllocator = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice), maxMSDFs); + msdfImagesState.resize(maxMSDFs); + return true; } void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit) @@ -170,18 +216,33 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, const Line if (!lineStyleInfo.isVisible()) return; - uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyleInfo, intendedNextSubmit); + setActiveLineStyle(lineStyleInfo); + + beginMainObject(MainObjectType::POLYLINE, TransformationType::TT_NORMAL); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); +} - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); +void DrawResourcesFiller::drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!lineStyleInfo.isVisible()) + return; - drawPolyline(polyline, mainObjIdx, intendedNextSubmit); + setActiveLineStyle(lineStyleInfo); + + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); + beginMainObject(MainObjectType::POLYLINE, transformationType); + drawPolyline(polyline, intendedNextSubmit); + endMainObject(); + popCustomProjection(); } -void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit) { - if (polylineMainObjIdx == InvalidMainObjectIdx) + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) { - // TODO: assert or log error here + m_logger.log("drawPolyline: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); assert(false); return; } @@ -194,7 +255,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p while (currentSectionIdx < sectionsCount) { const auto& currentSection = polyline.getSectionInfoAt(currentSectionIdx); - addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, polylineMainObjIdx); + addPolylineObjects_Internal(polyline, currentSection, currentObjectInSection, mainObjectIdx); if (currentObjectInSection >= currentSection.count) { @@ -202,7 +263,7 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p currentObjectInSection = 0u; } else - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } if (!polyline.getConnectors().empty()) @@ -210,14 +271,92 @@ void DrawResourcesFiller::drawPolyline(const CPolylineBase& polyline, uint32_t p uint32_t currentConnectorPolylineObject = 0u; while (currentConnectorPolylineObject < polyline.getConnectors().size()) { - addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, polylineMainObjIdx); + addPolylineConnectors_Internal(polyline, currentConnectorPolylineObject, mainObjectIdx); if (currentConnectorPolylineObject < polyline.getConnectors().size()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, polylineMainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } } } +void DrawResourcesFiller::drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit) +{ + flushDrawObjects(); // flushes draw call construction of any possible draw objects before dtm, because currently we're sepaerating dtm draw calls from drawObj draw calls + + setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawTriangleMesh: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + DrawCallData drawCallData = {}; + drawCallData.isDTMRendering = true; + + ICPUBuffer::SCreationParams geometryBuffParams; + + // concatenate the index and vertex buffer into the geometry buffer + const auto& indexBuffer = mesh.getIndices(); + const auto& vertexBuffer = mesh.getVertices(); + assert(indexBuffer.size() == vertexBuffer.size()); // We don't have any vertex re-use due to other limitations at the moemnt. + + + const uint32_t numTriangles = indexBuffer.size() / 3u; + uint32_t trianglesUploaded = 0; + while (trianglesUploaded < numTriangles) + { + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t maxUploadableVertices = remainingResourcesSize / (sizeof(CTriangleMesh::vertex_t) + sizeof(CTriangleMesh::index_t)); + const uint32_t maxUploadableTriangles = maxUploadableVertices / 3u; + const uint32_t remainingTrianglesToUpload = numTriangles - trianglesUploaded; + const uint32_t trianglesToUpload = core::min(remainingTrianglesToUpload, maxUploadableTriangles); + const size_t vtxBuffByteSize = trianglesToUpload * 3u * sizeof(CTriangleMesh::vertex_t); + const size_t indexBuffByteSize = trianglesToUpload * 3u * sizeof(CTriangleMesh::index_t); + const size_t trianglesToUploadByteSize = vtxBuffByteSize + indexBuffByteSize; + + // Copy VertexBuffer + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(trianglesToUploadByteSize, alignof(CTriangleMesh::vertex_t)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + // the actual bda address will be determined only after all copies are finalized, later we will do += `baseBDAAddress + geometryInfo.bufferOffset` + // the - is a small hack because index buffer grows but vertex buffer needs to start from 0, remove that once we either get rid of the index buffer or implement an algorithm that can have vertex reuse + drawCallData.dtm.triangleMeshVerticesBaseAddress = geometryBufferOffset - (sizeof(CTriangleMesh::vertex_t) * trianglesUploaded * 3); + memcpy(dst, &vertexBuffer[trianglesUploaded * 3u], vtxBuffByteSize); + geometryBufferOffset += vtxBuffByteSize; + + // Copy IndexBuffer + dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + drawCallData.dtm.indexBufferOffset = geometryBufferOffset; + memcpy(dst, &indexBuffer[trianglesUploaded * 3u], indexBuffByteSize); + geometryBufferOffset += indexBuffByteSize; + + trianglesUploaded += trianglesToUpload; + + drawCallData.dtm.triangleMeshMainObjectIndex = mainObjectIdx; + drawCallData.dtm.indexCount = trianglesToUpload * 3u; + drawCalls.push_back(drawCallData); + + //if (trianglesUploaded == 0u) + //{ + // m_logger.log("drawTriangleMesh: not enough vram allocation for a single triangle!", nbl::system::ILogger::ELL_ERROR); + // assert(false); + // break; + //} + + // Requires Auto-Submit If All Triangles of the Mesh couldn't fit into Memory + if (trianglesUploaded < numTriangles) + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); + } + + endMainObject(); +} + // TODO[Erfan]: Makes more sense if parameters are: solidColor + fillPattern + patternColor void DrawResourcesFiller::drawHatch( const Hatch& hatch, @@ -226,10 +365,8 @@ void DrawResourcesFiller::drawHatch( const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) { - // TODO[Optimization Idea]: don't draw hatch twice if both colors are visible: instead do the msdf inside the alpha resolve by detecting mainObj being a hatch - // https://discord.com/channels/593902898015109131/856835291712716820/1228337893366300743 - // TODO: Come back to this idea when doing color resolve for ecws (they don't have mainObj/style Index, instead they have uv into a texture - + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) + // if backgroundColor is visible drawHatch(hatch, backgroundColor, intendedNextSubmit); // if foregroundColor is visible @@ -241,38 +378,92 @@ void DrawResourcesFiller::drawHatch( const float32_t4& color, const HatchFillPattern fillPattern, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) +{ + drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + // TODO[Optimization Idea]: don't draw hatch twice, we now have color storage buffer and we can treat rendering hatches like a procedural texture (requires 2 colors so no more abusing of linestyle for hatches) + + // if backgroundColor is visible + drawFixedGeometryHatch(hatch, backgroundColor, transformation, transformationType, intendedNextSubmit); + // if foregroundColor is visible + drawFixedGeometryHatch(hatch, foregroundColor, fillPattern, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + pushCustomProjection(getFixedGeometryFinalTransformationMatrix(transformation, transformationType)); + drawHatch_impl(hatch, color, fillPattern, intendedNextSubmit, transformationType); + popCustomProjection(); +} + +void DrawResourcesFiller::drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit) +{ + drawFixedGeometryHatch(hatch, color, HatchFillPattern::SOLID_FILL, transformation, transformationType, intendedNextSubmit); +} + +void DrawResourcesFiller::drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + TransformationType transformationType) { if (color.a == 0.0f) // not visible return; - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; if (fillPattern != HatchFillPattern::SOLID_FILL) { MSDFInputInfo msdfInfo = MSDFInputInfo(fillPattern); textureIdx = getMSDFIndexFromInputInfo(msdfInfo, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), InvalidMainObjectIdx, intendedNextSubmit); - _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIdx); // probably getHatchFillPatternMSDF returned nullptr + if (textureIdx == InvalidTextureIndex) + textureIdx = addMSDFTexture(msdfInfo, getHatchFillPatternMSDF(fillPattern), intendedNextSubmit); + _NBL_DEBUG_BREAK_IF(textureIdx == InvalidTextureIndex); // probably getHatchFillPatternMSDF returned nullptr } LineStyleInfo lineStyle = {}; lineStyle.color = color; lineStyle.screenSpaceLineWidth = nbl::hlsl::bit_cast(textureIdx); - const uint32_t styleIdx = addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); - uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject used in vertex shader. You can think of it as a Cage. + setActiveLineStyle(lineStyle); + beginMainObject(MainObjectType::HATCH, transformationType); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + uint32_t currentObjectInSection = 0u; // Object here refers to DrawObject. You can think of it as a Cage. while (currentObjectInSection < hatch.getHatchBoxCount()) { - addHatch_Internal(hatch, currentObjectInSection, mainObjIdx); + addHatch_Internal(hatch, currentObjectInSection, mainObjectIdx); if (currentObjectInSection < hatch.getHatchBoxCount()) - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); } -} -void DrawResourcesFiller::drawHatch(const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit) -{ - drawHatch(hatch, color, HatchFillPattern::SOLID_FILL, intendedNextSubmit); + endMainObject(); } void DrawResourcesFiller::drawFontGlyph( @@ -282,193 +473,752 @@ void DrawResourcesFiller::drawFontGlyph( float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) { - uint32_t textureIdx = InvalidTextureIdx; + uint32_t textureIdx = InvalidTextureIndex; const MSDFInputInfo msdfInput = MSDFInputInfo(fontFace->getHash(), glyphIdx); textureIdx = getMSDFIndexFromInputInfo(msdfInput, intendedNextSubmit); - if (textureIdx == InvalidTextureIdx) - textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), mainObjIdx, intendedNextSubmit); + if (textureIdx == InvalidTextureIndex) + textureIdx = addMSDFTexture(msdfInput, getGlyphMSDF(fontFace, glyphIdx), intendedNextSubmit); - if (textureIdx != InvalidTextureIdx) + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("drawFontGlyph: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + if (textureIdx != InvalidTextureIndex) { GlyphInfo glyphInfo = GlyphInfo(topLeft, dirU, aspectRatio, textureIdx, minUV); if (!addFontGlyph_Internal(glyphInfo, mainObjIdx)) { // single font glyph couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single GlyphInfo + const bool success = addFontGlyph_Internal(glyphInfo, mainObjIdx); + if (!success) + { + m_logger.log("addFontGlyph_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } } } else { - // TODO: Log, probably getGlyphMSDF(face,glyphIdx) returned nullptr ICPUImage ptr + m_logger.log("drawFontGlyph: textureIdx is invalid.", nbl::system::ILogger::ELL_ERROR); _NBL_DEBUG_BREAK_IF(true); } } -bool DrawResourcesFiller::finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - success &= finalizeMainObjectCopiesToGPU(intendedNextSubmit); - success &= finalizeGeometryCopiesToGPU(intendedNextSubmit); - success &= finalizeLineStyleCopiesToGPU(intendedNextSubmit); - success &= finalizeTextureCopies(intendedNextSubmit); - return success; -} + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(staticImage.imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN -uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) -{ - uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); - if (outLineStyleIdx == InvalidStyleIdx) + if (cachedImageRecord->arrayIndex != InvalidTextureIndex && staticImage.forceUpdate) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - outLineStyleIdx = addLineStyle_Internal(lineStyle); - assert(outLineStyleIdx != InvalidStyleIdx); + // found in cache, and we want to force new data into the image + if (cachedImageRecord->staticCPUImage) + { + const auto cachedImageParams = cachedImageRecord->staticCPUImage->getCreationParameters(); + const auto newImageParams = staticImage.cpuImage->getCreationParameters(); + const bool needsRecreation = newImageParams != cachedImageParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena along with it's allocated array slot from the suballocated descriptor set + evictCallback(staticImage.imageID, *cachedImageRecord); + + // Instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + *cachedImageRecord = CachedImageRecord(currentFrameIndex); + } + else + { + // Doesn't need image recreation, we'll use the same array index in descriptor set + the same bound memory. + // reset it's state + update the cpu image used for copying. + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; + } + } + else + { + m_logger.log("found static image has empty cpu image, shouldn't happen", nbl::system::ILogger::ELL_ERROR); + } } - return outLineStyleIdx; -} -uint32_t DrawResourcesFiller::addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit) -{ - MainObject mainObject = {}; - mainObject.styleIdx = styleIdx; - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - uint32_t outMainObjectIdx = addMainObject_Internal(mainObject); - if (outMainObjectIdx == InvalidMainObjectIdx) + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint - // geometries needs to be reset because they reference draw objects and draw objects reference main objects that are now unavailable and reset - resetGeometryCounters(); - // mainObjects needs to be reset because we submitted every previous main object - resetMainObjectCounters(); - // we shouldn't reset linestyles and clip projections here because it was possibly requested to push to mem before addMainObjects - // but clip projections are reset due to geometry/bda buffer being reset so we need to push again - - // acquireCurrentClipProjectionAddress again here because clip projection should exist in the geometry buffer, and reseting geometry counters will invalidate the current clip proj and requires repush - mainObject.clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - outMainObjectIdx = addMainObject_Internal(mainObject); - assert(outMainObjectIdx != InvalidMainObjectIdx); + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + IGPUImage::SCreationParams imageParams = {}; + imageParams = staticImage.cpuImage->getCreationParameters(); + imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) + }; + imageParams.format = physDev->promoteImageFormat(request,imageParams.tiling); + } + + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageParams, staticImage.imageViewFormatOverride, intendedNextSubmit, std::to_string(staticImage.imageID)); + + if (allocResults.isValid()) + { + cachedImageRecord->type = ImageType::STATIC; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = staticImage.cpuImage; + } + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + m_logger.log("ensureStaticImageAvailability failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); + } + + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } + + // erase the entry we failed to allocate an image for, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(staticImage.imageID); + } + } + else + { + m_logger.log("ensureStaticImageAvailability failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } } - return outMainObjectIdx; -} + + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; -void DrawResourcesFiller::pushClipProjectionData(const ClipProjectionData& clipProjectionData) -{ - clipProjections.push_back(clipProjectionData); - clipProjectionAddresses.push_back(InvalidClipProjectionAddress); + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return cachedImageRecord->arrayIndex != InvalidTextureIndex; } -void DrawResourcesFiller::popClipProjectionData() +bool DrawResourcesFiller::ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjections.empty()) - return; + if (staticImages.size() > ImagesBindingArraySize) + return false; - clipProjections.pop_back(); - clipProjectionAddresses.pop_back(); + for (auto& staticImage : staticImages) + { + if (!ensureStaticImageAvailability(staticImage, intendedNextSubmit)) + return false; // failed ensuring a single staticImage is available, shouldn't happen unless the image is larger than the memory arena allocated for images. + } + for (auto& staticImage : staticImages) + { + if (imagesCache->peek(staticImage.imageID) == nullptr) + return false; // this means one of the images evicted another, most likely due to VRAM limitations not all images can be resident all at once. + } + return true; } -bool DrawResourcesFiller::finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) { - bool success = true; - // Copy MainObjects - uint32_t remainingMainObjects = currentMainObjectCount - inMemMainObjectCount; - SBufferRange mainObjectsRange = { sizeof(MainObject) * inMemMainObjectCount, sizeof(MainObject) * remainingMainObjects, gpuDrawBuffers.mainObjectsBuffer }; - if (mainObjectsRange.size > 0u) - { - const MainObject* srcMainObjData = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()) + inMemMainObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, mainObjectsRange, srcMainObjData)) - inMemMainObjectCount = currentMainObjectCount; - else + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + // Try inserting or updating the image usage in the cache. + // If the image is already present, updates its semaphore value. + auto evictCallback = [&](image_id imageID, const CachedImageRecord& evicted) { evictImage_SubmitIfNeeded(imageID, evicted, intendedNextSubmit); }; + CachedImageRecord* cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + + // TODO: Function call that gets you image creaation params based on georeferencedImageParams (extents and mips and whatever), it will also get you the GEOREFERENED TYPE + IGPUImage::SCreationParams imageCreationParams = {}; + ImageType georeferenceImageType; + determineGeoreferencedImageCreationParams(imageCreationParams, georeferenceImageType, params); + + // imageParams = cpuImage->getCreationParameters(); + imageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = imageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageCreationParams.usage) + }; + imageCreationParams.format = physDev->promoteImageFormat(request,imageCreationParams.tiling); + } + + // if cachedImageRecord->index was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema + // But we need to check if the cached image needs resizing/recreation. + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // found in cache, but does it require resize? recreation? + if (cachedImageRecord->gpuImageView) { - // TODO: Log - success = false; + auto imgViewParams = cachedImageRecord->gpuImageView->getCreationParameters(); + if (imgViewParams.image) + { + const auto cachedParams = static_cast(imgViewParams.image->getCreationParameters()); + const auto cachedImageType = cachedImageRecord->type; + // image type and creation params (most importantly extent and format) should match, otherwise we evict, recreate and re-pus + const auto currentParams = static_cast(imageCreationParams); + const bool needsRecreation = cachedImageType != georeferenceImageType || cachedParams != currentParams; + if (needsRecreation) + { + // call the eviction callback so the currently cached imageID gets eventually deallocated from memory arena. + evictCallback(imageID, *cachedImageRecord); + + // instead of erasing and inserting the imageID into the cache, we just reset it, so the next block of code goes into array index allocation + creating our new image + *cachedImageRecord = CachedImageRecord(currentFrameIndex); + // imagesCache->erase(imageID); + // cachedImageRecord = imagesCache->insert(imageID, intendedNextSubmit.getFutureScratchSemaphore().value, evictCallback); + } + } + else + { + m_logger.log("Cached georeferenced image has invalid gpu image.", nbl::system::ILogger::ELL_ERROR); + } } - } - return success; -} - -bool DrawResourcesFiller::finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) -{ - bool success = true; - // Copy DrawObjects - uint32_t remainingDrawObjects = currentDrawObjectCount - inMemDrawObjectCount; - SBufferRange drawObjectsRange = { sizeof(DrawObject) * inMemDrawObjectCount, sizeof(DrawObject) * remainingDrawObjects, gpuDrawBuffers.drawObjectsBuffer }; - if (drawObjectsRange.size > 0u) - { - const DrawObject* srcDrawObjData = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + inMemDrawObjectCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, drawObjectsRange, srcDrawObjData)) - inMemDrawObjectCount = currentDrawObjectCount; else { - // TODO: Log - success = false; + m_logger.log("Cached georeferenced image has invalid gpu image view.", nbl::system::ILogger::ELL_ERROR); } } - // Copy GeometryBuffer - uint64_t remainingGeometrySize = currentGeometryBufferSize - inMemGeometryBufferSize; - SBufferRange geomRange = { inMemGeometryBufferSize, remainingGeometrySize, gpuDrawBuffers.geometryBuffer }; - if (geomRange.size > 0u) + // in which case we don't queue anything for upload, and return the idx + if (cachedImageRecord->arrayIndex == InvalidTextureIndex) { - const uint8_t* srcGeomData = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + inMemGeometryBufferSize; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, geomRange, srcGeomData)) - inMemGeometryBufferSize = currentGeometryBufferSize; + // This is a new image (cache miss). Allocate a descriptor index for it. + cachedImageRecord->arrayIndex = video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address; + // Blocking allocation attempt; if the descriptor pool is exhausted, this may stall. + suballocatedDescriptorSet->multi_allocate(std::chrono::time_point::max(), imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex); // if the prev submit causes DEVICE_LOST then we'll get a deadlock here since we're using max timepoint + + if (cachedImageRecord->arrayIndex != video::SubAllocatedDescriptorSet::AddressAllocator::invalid_address) + { + // Attempt to create a GPU image and image view for this texture. + ImageAllocateResults allocResults = tryCreateAndAllocateImage_SubmitIfNeeded(imageCreationParams, asset::E_FORMAT::EF_COUNT, intendedNextSubmit, std::to_string(imageID)); + + if (allocResults.isValid()) + { + cachedImageRecord->type = georeferenceImageType; + cachedImageRecord->state = ImageState::CREATED_AND_MEMORY_BOUND; + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; // there was an eviction + auto-submit, we need to update AGAIN + cachedImageRecord->allocationOffset = allocResults.allocationOffset; + cachedImageRecord->allocationSize = allocResults.allocationSize; + cachedImageRecord->gpuImageView = allocResults.gpuImageView; + cachedImageRecord->staticCPUImage = nullptr; + } + else + { + // All attempts to try create the GPU image and its corresponding view have failed. + // Most likely cause: insufficient GPU memory or unsupported image parameters. + + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed, likely due to low VRAM.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + + if (cachedImageRecord->allocationOffset != ImagesMemorySubAllocator::InvalidAddress) + { + // We previously successfully create and allocated memory for the Image + // but failed to bind and create image view + // It's crucial to deallocate the offset+size form our images memory suballocator + imagesMemorySubAllocator->deallocate(cachedImageRecord->allocationOffset, cachedImageRecord->allocationSize); + } + + if (cachedImageRecord->arrayIndex != InvalidTextureIndex) + { + // We previously allocated a descriptor index, but failed to create a usable GPU image. + // It's crucial to deallocate this index to avoid leaks and preserve descriptor pool space. + // No semaphore wait needed here, as the GPU never got to use this slot. + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &cachedImageRecord->arrayIndex, {}); + cachedImageRecord->arrayIndex = InvalidTextureIndex; + } + + // erase the entry we failed to fill, no need for `evictImage_SubmitIfNeeded`, because it didn't get to be used in any submit to defer it's memory and index deallocation + imagesCache->erase(imageID); + } + } else { - // TODO: Log - success = false; + m_logger.log("ensureGeoreferencedImageAvailability_AllocateIfNeeded failed index allocation. shouldn't have happened.", nbl::system::ILogger::ELL_ERROR); + cachedImageRecord->arrayIndex = InvalidTextureIndex; } } - return success; + + + // cached or just inserted, we update the lastUsedFrameIndex + cachedImageRecord->lastUsedFrameIndex = currentFrameIndex; + + assert(cachedImageRecord->arrayIndex != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed + return (cachedImageRecord->arrayIndex != InvalidTextureIndex); } -bool DrawResourcesFiller::finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit) +bool DrawResourcesFiller::queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy) { - bool success = true; - // Copy LineStyles - uint32_t remainingLineStyles = currentLineStylesCount - inMemLineStylesCount; - SBufferRange stylesRange = { sizeof(LineStyle) * inMemLineStylesCount, sizeof(LineStyle) * remainingLineStyles, gpuDrawBuffers.lineStylesBuffer }; - if (stylesRange.size > 0u) - { - const LineStyle* srcLineStylesData = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()) + inMemLineStylesCount; - if (m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, stylesRange, srcLineStylesData)) - inMemLineStylesCount = currentLineStylesCount; - else + auto& vec = streamedImageCopies[imageID]; + vec.emplace_back(imageCopy); + return true; +} + +// TODO[Przemek]: similar to other drawXXX and drawXXX_internal functions that create mainobjects, drawObjects and push additional info in geometry buffer, input to function would be a GridDTMInfo +// We don't have an allocator or memory management for texture updates yet, see how `_test_addImageObject` is being temporarily used (Descriptor updates and pipeline barriers) to upload an image into gpu and update a descriptor slot (it will become more sophisticated but doesn't block you) +void DrawResourcesFiller::drawGridDTM( + const float64_t2& topLeft, + float64_t2 worldSpaceExtents, + float gridCellWidth, + uint64_t textureID, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit) +{ + if (dtmSettingsInfo.mode == 0u) + return; + + GridDTMInfo gridDTMInfo; + gridDTMInfo.topLeft = topLeft; + gridDTMInfo.worldSpaceExtents = worldSpaceExtents; + gridDTMInfo.gridCellWidth = gridCellWidth; + if (textureID != InvalidTextureIndex) + gridDTMInfo.textureID = getImageIndexFromID(textureID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + else + gridDTMInfo.textureID = InvalidTextureIndex; + + // determine the thickes line + float thickestLineThickness = 0.0f; + if (dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) + { + thickestLineThickness = dtmSettingsInfo.outlineStyleInfo.worldSpaceLineWidth + dtmSettingsInfo.outlineStyleInfo.screenSpaceLineWidth; + } + else if (dtmSettingsInfo.mode & E_DTM_MODE::CONTOUR) + { + for (int i = 0; i < dtmSettingsInfo.contourSettingsCount; ++i) { - // TODO: Log - success = false; + const auto& contourLineStyle = dtmSettingsInfo.contourSettings[i].lineStyleInfo; + const float contourLineThickness = contourLineStyle.worldSpaceLineWidth + contourLineStyle.screenSpaceLineWidth; + thickestLineThickness = std::max(thickestLineThickness, contourLineThickness); } } - return success; + gridDTMInfo.thicknessOfTheThickestLine = thickestLineThickness; + + setActiveDTMSettings(dtmSettingsInfo); + beginMainObject(MainObjectType::GRID_DTM); + + uint32_t mainObjectIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjectIdx == InvalidMainObjectIdx) + { + m_logger.log("drawGridDTM: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + if (!addGridDTM_Internal(gridDTMInfo, mainObjectIdx)) + { + // single grid DTM couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjectIdx); + const bool success = addGridDTM_Internal(gridDTMInfo, mainObjectIdx); + if (!success) + { + m_logger.log("addGridDTM_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + + endMainObject(); } -bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit) { - msdfTextureArrayIndicesUsed.clear(); // clear msdf textures used in the frame, because the frame finished and called this function. + beginMainObject(MainObjectType::STATIC_IMAGE); - if (!msdfTextureCopies.size() && m_hasInitializedMSDFTextureArrays) // even if the textureCopies are empty, we want to continue if not initialized yet so that the layout of all layers become READ_ONLY_OPTIMAL - return true; // yay successfully copied nothing + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addImageObject: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } - auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); - - if (cmdBuffInfo) + ImageObjectInfo info = {}; + info.topLeft = obb.topLeft; + info.dirU = obb.dirU; + info.aspectRatio = obb.aspectRatio; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addImageObject_Internal(info, mainObjIdx)) { - IGPUCommandBuffer* cmdBuff = cmdBuffInfo->cmdbuf; + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addImageObject_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addImageObject_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } - auto msdfImage = msdfTextureArray->getCreationParameters().image; + endMainObject(); +} - // preparing msdfs for copy - using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; +void DrawResourcesFiller::addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit) +{ + beginMainObject(MainObjectType::STREAMED_IMAGE); + + uint32_t mainObjIdx = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); + if (mainObjIdx == InvalidMainObjectIdx) + { + m_logger.log("addGeoreferencedImage: acquireActiveMainObjectIndex returned invalid index", nbl::system::ILogger::ELL_ERROR); + assert(false); + return; + } + + GeoreferencedImageInfo info = {}; + info.topLeft = params.worldspaceOBB.topLeft; + info.dirU = params.worldspaceOBB.dirU; + info.aspectRatio = params.worldspaceOBB.aspectRatio; + info.textureID = getImageIndexFromID(imageID, intendedNextSubmit); // for this to be valid and safe, this function needs to be called immediately after `addStaticImage` function to make sure image is in memory + if (!addGeoreferencedImageInfo_Internal(info, mainObjIdx)) + { + // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects + submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + const bool success = addGeoreferencedImageInfo_Internal(info, mainObjIdx); + if (!success) + { + m_logger.log("addGeoreferencedImageInfo_Internal failed, even after overflow-submission, this is irrecoverable.", nbl::system::ILogger::ELL_ERROR); + assert(false); + } + } + + endMainObject(); +} + +bool DrawResourcesFiller::pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (!intendedNextSubmit.valid()) + { + // It is a caching submit without command buffer, just for the purpose of accumulation of staging resources + // In that case we don't push any uploads (i.e. we don't record any imageRecord commmand in active command buffer, because there is no active command buffer) + return false; + } + + bool success = true; + if (currentReplayCache) + { + // This means we're in a replay cache scope, use the replay cache to push to GPU instead of internal accumulation + success &= pushBufferUploads(intendedNextSubmit, currentReplayCache->resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, currentReplayCache->msdfImagesState); + + // Push Static Images Uploads from replay cache, all the work below is necessary to detect whether our image to replay is already in the cache in the exact form OR we need to create new image + bind memory and set array index + auto* device = m_utilities->getLogicalDevice(); + bool replayCacheFullyCovered = true; + for (auto& [imageID, toReplayRecord] : *currentReplayCache->imagesCache) + { + if (toReplayRecord.type != ImageType::STATIC) // non-static images (Georeferenced) won't be replayed like this + continue; + + auto* cachedRecord = imagesCache->peek(imageID); + bool alreadyResident = false; + + // compare with existing state, and check whether image id is already resident. + if (cachedRecord != nullptr) + { + const bool allocationMatches = + cachedRecord->allocationOffset == toReplayRecord.allocationOffset && + cachedRecord->allocationSize == toReplayRecord.allocationSize; + + const bool arrayIndexMatches = cachedRecord->arrayIndex == toReplayRecord.arrayIndex; + + alreadyResident = allocationMatches && arrayIndexMatches && cachedRecord->state == ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + } + + // if already resident, just update the state to the cached state (to make sure it doesn't get issued for upload again) and move on. + if (alreadyResident) + { + toReplayRecord.state = cachedRecord->state; // update the toReplayImageRecords's state, to completely match the currently resident state + continue; + } + + replayCacheFullyCovered = false; + + bool successCreateNewImage = false; + + // Not already resident, we need to recreate the image and bind the image memory to correct location again, and update the descriptor set and push the uploads + auto existingGPUImageViewParams = toReplayRecord.gpuImageView->getCreationParameters(); + IGPUImage::SCreationParams imageParams = {}; + imageParams = existingGPUImageViewParams.image->getCreationParameters(); + + auto newGPUImage = device->createImage(std::move(imageParams)); + if (newGPUImage) + { + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = newGPUImage.get(), + .binding = {.memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + toReplayRecord.allocationOffset } + }; + + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + newGPUImage->setObjectDebugName((std::to_string(imageID) + " Static Image 2D").c_str()); + IGPUImageView::SCreationParams viewParams = existingGPUImageViewParams; + viewParams.image = newGPUImage; + + auto newGPUImageView = device->createImageView(std::move(viewParams)); + if (newGPUImageView) + { + successCreateNewImage = true; + toReplayRecord.gpuImageView = newGPUImageView; + toReplayRecord.state = ImageState::CREATED_AND_MEMORY_BOUND; + newGPUImageView->setObjectDebugName((std::to_string(imageID) + " Static Image View 2D").c_str()); + } + + } + } + + if (!successCreateNewImage) + { + m_logger.log("Couldn't create new gpu image in pushAllUploads: cache and replay mode.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + // Our actual `imageCache` (which represents GPU state) didn't cover the replayCache fully, so new images had to be created, bound to memory. and they need to be written into their respective descriptor array indices again. + // imagesCache = std::make_unique(*currentReplayCache->imagesCache); + imagesCache->clear(); + for (auto it = currentReplayCache->imagesCache->rbegin(); it != currentReplayCache->imagesCache->rend(); it++) + imagesCache->base_t::insert(it->first, it->second); + + if (!replayCacheFullyCovered) + { + // We need to block for previous submit in order to safely update the descriptor set array index next. + // + // [FUTURE_CONSIDERATION]: To avoid stalling the CPU when replaying caches that overflow GPU memory, + // we could recreate the image and image view, binding them to entirely new memory locations. + // This would require an indirection mechanism in the shader to remap references from cached geometry or objects to the new image array indices. + // Note: This isn't a problem if the replayed scene fits in memory and doesn't require overflow submissions due to image memory exhaustion. + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + device->blockForSemaphores({ &waitInfo, 1u }); + } + + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + // Streamed uploads in cache&replay?! + } + else + { + flushDrawObjects(); + success &= pushBufferUploads(intendedNextSubmit, resourcesCollection); + success &= pushMSDFImagesUploads(intendedNextSubmit, msdfImagesState); + success &= bindImagesToArrayIndices(*imagesCache); + success &= pushStaticImagesUploads(intendedNextSubmit, *imagesCache); + success &= pushStreamedImagesUploads(intendedNextSubmit); + } + return success; +} + +const DrawResourcesFiller::ResourcesCollection& DrawResourcesFiller::getResourcesCollection() const +{ + if (currentReplayCache) + return currentReplayCache->resourcesCollection; + else + return resourcesCollection; +} + +void DrawResourcesFiller::setActiveLineStyle(const LineStyleInfo& lineStyle) +{ + activeLineStyle = lineStyle; + activeLineStyleIndex = InvalidStyleIdx; +} + +void DrawResourcesFiller::setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo) +{ + activeDTMSettings = dtmSettingsInfo; + activeDTMSettingsIndex = InvalidDTMSettingsIdx; +} + +void DrawResourcesFiller::beginMainObject(MainObjectType type, TransformationType transformationType) +{ + activeMainObjectType = type; + activeMainObjectTransformationType = transformationType; + activeMainObjectIndex = InvalidMainObjectIdx; +} + +void DrawResourcesFiller::endMainObject() +{ + activeMainObjectType = MainObjectType::NONE; + activeMainObjectTransformationType = TransformationType::TT_NORMAL; + activeMainObjectIndex = InvalidMainObjectIdx; +} + +void DrawResourcesFiller::pushCustomProjection(const float64_t3x3& projection) +{ + activeProjections.push_back(projection); + activeProjectionIndices.push_back(InvalidCustomProjectionIndex); +} + +void DrawResourcesFiller::popCustomProjection() +{ + if (activeProjections.empty()) + return; + + activeProjections.pop_back(); + activeProjectionIndices.pop_back(); +} + +void DrawResourcesFiller::pushCustomClipRect(const WorldClipRect& clipRect) +{ + activeClipRects.push_back(clipRect); + activeClipRectIndices.push_back(InvalidCustomClipRectIndex); +} + +void DrawResourcesFiller::popCustomClipRect() +{ if (activeClipRects.empty()) + return; + + activeClipRects.pop_back(); + activeClipRectIndices.pop_back(); +} + +/// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj +uint32_t DrawResourcesFiller::getActiveMainObjectIndex() const +{ + if (currentReplayCache) + return currentReplayCache->activeMainObjectIndex; + else + return activeMainObjectIndex; +} + +const std::vector& DrawResourcesFiller::getDrawCalls() const +{ + if (currentReplayCache) + return currentReplayCache->drawCallsData; + else + return drawCalls; +} + +std::unique_ptr DrawResourcesFiller::createReplayCache() +{ + flushDrawObjects(); + std::unique_ptr ret = std::unique_ptr(new ReplayCache); + ret->resourcesCollection = resourcesCollection; + ret->msdfImagesState = msdfImagesState; + for (auto& stagedMSDF : ret->msdfImagesState) + stagedMSDF.uploadedToGPU = false; // to trigger upload for all msdf functions again. + ret->drawCallsData = drawCalls; + ret->activeMainObjectIndex = activeMainObjectIndex; + ret->imagesCache = std::unique_ptr(new ImagesCache(*imagesCache)); + return ret; +} + +void DrawResourcesFiller::setReplayCache(ReplayCache* cache) +{ + currentReplayCache = cache; +} + +void DrawResourcesFiller::unsetReplayCache() +{ + currentReplayCache = nullptr; +} + +bool DrawResourcesFiller::pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resources) +{ + copiedResourcesSize = 0ull; + + if (resourcesCollection.calculateTotalConsumption() > resourcesGPUBuffer->getSize()) + { + m_logger.log("some bug has caused the resourcesCollection to consume more memory than available in resourcesGPUBuffer without overflow submit", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + auto copyCPUFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type CPUGeneratedResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `copyCPUFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + if (copyRange.size > 0ull) + { + if (!m_utilities->updateBufferRangeViaStagingBuffer(intendedNextSubmit, copyRange, drawBuffer.vector.data())) + return false; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + } + return true; + }; + + auto addComputeReservedFilledDrawBuffer = [&](auto& drawBuffer) -> bool + { + // drawBuffer must be of type ReservedComputeResource + SBufferRange copyRange = { copiedResourcesSize, drawBuffer.getStorageSize(), resourcesGPUBuffer}; + + if (copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()) + { + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); + assert(false); + return false; + } + + drawBuffer.bufferOffset = copyRange.offset; + copiedResourcesSize += drawBuffer.getAlignedStorageSize(); + }; + + copyCPUFilledDrawBuffer(resources.lineStyles); + copyCPUFilledDrawBuffer(resources.dtmSettings); + copyCPUFilledDrawBuffer(resources.customProjections); + copyCPUFilledDrawBuffer(resources.customClipRects); + copyCPUFilledDrawBuffer(resources.mainObjects); + copyCPUFilledDrawBuffer(resources.drawObjects); + copyCPUFilledDrawBuffer(resources.indexBuffer); + copyCPUFilledDrawBuffer(resources.geometryInfo); + + return true; +} + +bool DrawResourcesFiller::pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& stagedMSDFCPUImages) +{ + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + auto msdfImage = msdfTextureArray->getCreationParameters().image; + + // preparing msdfs for imageRecord + using image_barrier_t = IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t; image_barrier_t beforeTransferImageBarrier[] = { { @@ -493,25 +1243,24 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeTransferImageBarrier }); // Do the copies and advance the iterator. // this is the pattern we use for iterating when entries will get erased if processed successfully, but may get skipped for later. - auto oit = msdfTextureCopies.begin(); - for (auto iit = msdfTextureCopies.begin(); iit != msdfTextureCopies.end(); iit++) + for (uint32_t i = 0u; i < stagedMSDFCPUImages.size(); ++i) { - bool copySuccess = true; - if (iit->image && iit->index < msdfImage->getCreationParameters().arrayLayers) + auto& stagedMSDF = stagedMSDFCPUImages[i]; + if (stagedMSDF.image && i < msdfImage->getCreationParameters().arrayLayers) { - for (uint32_t mip = 0; mip < iit->image->getCreationParameters().mipLevels; mip++) + for (uint32_t mip = 0; mip < stagedMSDF.image->getCreationParameters().mipLevels; mip++) { - auto mipImageRegion = iit->image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); + auto mipImageRegion = stagedMSDF.image->getRegion(mip, core::vectorSIMDu32(0u, 0u)); if (mipImageRegion) { asset::IImage::SBufferCopy region = {}; region.imageSubresource.aspectMask = asset::IImage::EAF_COLOR_BIT; region.imageSubresource.mipLevel = mipImageRegion->imageSubresource.mipLevel; - region.imageSubresource.baseArrayLayer = iit->index; + region.imageSubresource.baseArrayLayer = i; region.imageSubresource.layerCount = 1u; region.bufferOffset = 0u; region.bufferRowLength = mipImageRegion->getExtent().width; @@ -519,46 +1268,31 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex region.imageExtent = mipImageRegion->imageExtent; region.imageOffset = { 0u, 0u, 0u }; - auto buffer = reinterpret_cast(iit->image->getBuffer()->getPointer()); + auto buffer = reinterpret_cast(stagedMSDF.image->getBuffer()->getPointer()); auto bufferOffset = mipImageRegion->bufferOffset; - if (!m_utilities->updateImageViaStagingBuffer( + stagedMSDF.uploadedToGPU = m_utilities->updateImageViaStagingBuffer( intendedNextSubmit, buffer + bufferOffset, nbl::ext::TextRendering::TextRenderer::MSDFTextureFormat, msdfImage.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - { ®ion, ®ion + 1 })) - { - // TODO: Log which mip failed - copySuccess = false; - } + { ®ion, ®ion + 1 }); } else { - // TODO: Log - copySuccess = false; + assert(false); + stagedMSDF.uploadedToGPU = false; } } } else { - assert(false); - copySuccess = false; - } - - if (!copySuccess) - { - // we move the failed copy to the oit and advance it - if (oit != iit) - *oit = *iit; - oit++; + stagedMSDF.uploadedToGPU = false; } } - // trim - const auto newSize = std::distance(msdfTextureCopies.begin(), oit); - _NBL_DEBUG_BREAK_IF(newSize != 0u); // we had failed copies - msdfTextureCopies.resize(newSize); + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change // preparing msdfs for use image_barrier_t afterTransferImageBarrier[] = @@ -585,8 +1319,8 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, } }; - cmdBuff->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); - + commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterTransferImageBarrier }); + if (!m_hasInitializedMSDFTextureArrays) m_hasInitializedMSDFTextureArrays = true; @@ -594,136 +1328,588 @@ bool DrawResourcesFiller::finalizeTextureCopies(SIntendedSubmitInfo& intendedNex } else { - // TODO: Log no valid command buffer to record into + m_logger.log("`copyRange.offset + copyRange.size > resourcesGPUBuffer->getSize()` is true in `addComputeReservedFilledDrawBuffer`, this shouldn't happen with correct auto-submission mechanism.", nbl::system::ILogger::ELL_ERROR); return false; } } -void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex) +bool DrawResourcesFiller::bindImagesToArrayIndices(ImagesCache& imagesCache) { - finalizeAllCopiesToGPU(intendedNextSubmit); - submitDraws(intendedNextSubmit); + bool success = true; + + auto* device = m_utilities->getLogicalDevice(); + auto* descriptorSet = suballocatedDescriptorSet->getDescriptorSet(); + + // DescriptorSet Updates + std::vector descriptorInfos; + std::vector descriptorWrites; + descriptorInfos.resize(imagesCache.size()); + descriptorWrites.resize(imagesCache.size()); + + uint32_t descriptorWriteCount = 0u; + for (auto& [id, record] : imagesCache) + { + if (record.state >= ImageState::BOUND_TO_DESCRIPTOR_SET || !record.gpuImageView) + continue; + + // Bind gpu image view to descriptor set + video::IGPUDescriptorSet::SDescriptorInfo descriptorInfo = {}; + descriptorInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo.desc = record.gpuImageView; + descriptorInfos[descriptorWriteCount] = descriptorInfo; + + // consider batching contiguous writes, if descriptor set updating was a hotspot + IGPUDescriptorSet::SWriteDescriptorSet descriptorWrite = {}; + descriptorWrite.dstSet = descriptorSet; + descriptorWrite.binding = imagesArrayBinding; + descriptorWrite.arrayElement = record.arrayIndex; + descriptorWrite.count = 1u; + descriptorWrite.info = &descriptorInfos[descriptorWriteCount]; + descriptorWrites[descriptorWriteCount] = descriptorWrite; + + record.state = ImageState::BOUND_TO_DESCRIPTOR_SET; + descriptorWriteCount++; + } + + if (descriptorWriteCount > 0u) + success &= device->updateDescriptorSets(descriptorWriteCount, descriptorWrites.data(), 0u, nullptr); + return success; +} + +bool DrawResourcesFiller::pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache) +{ + bool success = true; + + // Push Static Images Uploads, only those who are not gpu resident + // TODO: remove this vector and check state in each for loop below? + std::vector nonResidentImageRecords; + for (auto& [id, record] : imagesCache) + { + if (record.staticCPUImage && record.type == ImageType::STATIC && record.state < ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA) + nonResidentImageRecords.push_back(&record); + } + + if (nonResidentImageRecords.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); + + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.resize(nonResidentImageRecords.size()); + + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + beforeCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageRecord.staticCPUImage->getBuffer()->getPointer(), imageRecord.staticCPUImage->getCreationParameters().format, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + imageRecord.staticCPUImage->getRegions()); + + if (success) + imageRecord.state = ImageState::GPU_RESIDENT_WITH_VALID_STATIC_DATA; + else + { + m_logger.log("Failed `updateImageViaStagingBuffer` in pushStaticImagesUploads.", nbl::system::ILogger::ELL_ERROR); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.resize(nonResidentImageRecords.size()); + + // Pipeline Barriers before imageRecord + for (uint32_t i = 0u; i < nonResidentImageRecords.size(); ++i) + { + auto& imageRecord = *nonResidentImageRecords[i]; + const auto& gpuImg = imageRecord.gpuImageView->getCreationParameters().image; + afterCopyImageBarriers[i] = + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }; + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } - // We reset Geometry Counters (drawObj+geometryInfos) because we're done rendering previous geometry - // We don't reset counters for styles because we will be reusing them - resetGeometryCounters(); + if (!success) + { + m_logger.log("Failure in `pushStaticImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; +} + +bool DrawResourcesFiller::pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit) +{ + bool success = true; + + if (streamedImageCopies.size() > 0ull) + { + auto* device = m_utilities->getLogicalDevice(); + auto* cmdBuffInfo = intendedNextSubmit.getCommandBufferForRecording(); -#if 1 - if (mainObjectIndex < maxMainObjects) + if (cmdBuffInfo) + { + IGPUCommandBuffer* commandBuffer = cmdBuffInfo->cmdbuf; + + std::vector beforeCopyImageBarriers; + beforeCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + beforeCopyImageBarriers.push_back( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::UNDEFINED, + .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); + + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + for (auto& imageCopy : imageCopies) + { + success &= m_utilities->updateImageViaStagingBuffer( + intendedNextSubmit, + imageCopy.srcBuffer->getPointer(), imageCopy.srcFormat, + gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + { &imageCopy.region, 1u }); + } + } + + commandBuffer = intendedNextSubmit.getCommandBufferForRecording()->cmdbuf; // overflow-submit in utilities calls might've cause current recording command buffer to change + + std::vector afterCopyImageBarriers; + afterCopyImageBarriers.reserve(streamedImageCopies.size()); + + // Pipeline Barriers before imageCopy + for (auto& [imageID, imageCopies] : streamedImageCopies) + { + auto* imageRecord = imagesCache->peek(imageID); + if (imageRecord == nullptr) + continue; + + const auto& gpuImg = imageRecord->gpuImageView->getCreationParameters().image; + + afterCopyImageBarriers.push_back ( + { + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + } + // .ownershipOp. No queueFam ownership transfer + }, + .image = gpuImg.get(), + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + }, + .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, + .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, + }); + } + success &= commandBuffer->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + + streamedImageCopies.clear(); + } + else + { + _NBL_DEBUG_BREAK_IF(true); + success = false; + } + } + + if (!success) + { + m_logger.log("Failure in `pushStreamedImagesUploads`.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + return success; +} + +const size_t DrawResourcesFiller::calculateRemainingResourcesSize() const +{ + assert(resourcesGPUBuffer->getSize() >= resourcesCollection.calculateTotalConsumption()); + return resourcesGPUBuffer->getSize() - resourcesCollection.calculateTotalConsumption(); +} + +void DrawResourcesFiller::submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex) +{ + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + mainObjectIndex = acquireActiveMainObjectIndex_SubmitIfNeeded(intendedNextSubmit); // it will be 0 because it's first mainObjectIndex after reset and invalidation +} + +uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const bool enoughMem = remainingResourcesSize >= sizeof(LineStyle); // enough remaining memory for 1 more linestyle? + if (!enoughMem) + return InvalidStyleIdx; + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed + + LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); + _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( + for (uint32_t i = 0u; i < resourcesCollection.lineStyles.vector.size(); ++i) { - // Check if user is following proper usage, mainObjectIndex should be the last mainObj added before an autosubmit, because this is the only mainObj we want to maintain. - // See comments on`addMainObject_SubmitIfNeeded` function - // TODO: consider forcing this by not expose mainObjectIndex to user and keep track of a "currentMainObj" (?) - _NBL_DEBUG_BREAK_IF(mainObjectIndex != (currentMainObjectCount - 1u)); + const LineStyle& itr = resourcesCollection.lineStyles.vector[i]; + if (itr == gpuLineStyle) + return i; + } + + return resourcesCollection.lineStyles.addAndGetOffset(gpuLineStyle); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} + +uint32_t DrawResourcesFiller::addDTMSettings_Internal(const DTMSettingsInfo& dtmSettingsInfo, SIntendedSubmitInfo& intendedNextSubmit) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t noOfLineStylesRequired = ((dtmSettingsInfo.mode & E_DTM_MODE::OUTLINE) ? 1u : 0u) + dtmSettingsInfo.contourSettingsCount; + const size_t maxMemRequired = sizeof(DTMSettings) + noOfLineStylesRequired * sizeof(LineStyle); + const bool enoughMem = remainingResourcesSize >= maxMemRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) + return InvalidDTMSettingsIdx; + // TODO: Maybe constraint by a max size? and return InvalidIdx if it would exceed - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it existed in geometry data and it was erased) - uint64_t newClipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - // only re-upload mainObjData if it's clipProjectionAddress was changed - if (newClipProjectionAddress != getMainObject(mainObjectIndex)->clipProjectionAddress) + DTMSettings dtmSettings; + + ////dtmSettingsInfo.mode = E_DTM_MODE::HEIGHT_SHADING | E_DTM_MODE::CONTOUR | E_DTM_MODE::OUTLINE; + + dtmSettings.mode = dtmSettingsInfo.mode; + if (dtmSettings.mode & E_DTM_MODE::HEIGHT_SHADING) + { + switch (dtmSettingsInfo.heightShadingInfo.heightShadingMode) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = std::numeric_limits::infinity(); + break; + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = dtmSettingsInfo.heightShadingInfo.intervalLength; + break; + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + dtmSettings.heightShadingSettings.intervalLength = 0.0f; + break; + } + dtmSettings.heightShadingSettings.intervalIndexToHeightMultiplier = dtmSettingsInfo.heightShadingInfo.intervalIndexToHeightMultiplier; + dtmSettings.heightShadingSettings.isCenteredShading = static_cast(dtmSettingsInfo.heightShadingInfo.isCenteredShading); + dtmSettingsInfo.heightShadingInfo.fillShaderDTMSettingsHeightColorMap(dtmSettings); + } + if (dtmSettings.mode & E_DTM_MODE::CONTOUR) + { + dtmSettings.contourSettingsCount = dtmSettingsInfo.contourSettingsCount; + for (uint32_t i = 0u; i < dtmSettings.contourSettingsCount; ++i) { - // then modify the mainObject data - getMainObject(mainObjectIndex)->clipProjectionAddress = newClipProjectionAddress; - // we need to rewind back inMemMainObjectCount to this mainObjIndex so it re-uploads the current mainObject (because we modified it) - inMemMainObjectCount = core::min(inMemMainObjectCount, mainObjectIndex); + dtmSettings.contourSettings[i].contourLinesStartHeight = dtmSettingsInfo.contourSettings[i].startHeight; + dtmSettings.contourSettings[i].contourLinesEndHeight = dtmSettingsInfo.contourSettings[i].endHeight; + dtmSettings.contourSettings[i].contourLinesHeightInterval = dtmSettingsInfo.contourSettings[i].heightInterval; + dtmSettings.contourSettings[i].contourLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.contourSettings[i].lineStyleInfo); } } + if (dtmSettings.mode & E_DTM_MODE::OUTLINE) + { + dtmSettings.outlineLineStyleIdx = addLineStyle_Internal(dtmSettingsInfo.outlineStyleInfo); + } - // TODO: Consider resetting MainObjects here as well and addMainObject for the new data again, but account for the fact that mainObjectIndex now changed (either change through uint32_t& or keeping track of "currentMainObj" in drawResourcesFiller -#else - resetMainObjectCounters(); + for (uint32_t i = 0u; i < resourcesCollection.dtmSettings.vector.size(); ++i) + { + const DTMSettings& itr = resourcesCollection.dtmSettings.vector[i]; + if (itr == dtmSettings) + return i; + } - // If there is a mainObject data we need to maintain and keep it's clipProjectionAddr valid - if (mainObjectIndex < maxMainObjects) + return resourcesCollection.dtmSettings.addAndGetOffset(dtmSettings); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers +} + +float64_t3x3 DrawResourcesFiller::getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const +{ + if (!activeProjections.empty()) { - MainObject mainObjToMaintain = *getMainObject(mainObjectIndex); + float64_t3x3 newTransformation = nbl::hlsl::mul(activeProjections.back(), transformation); - // If the clip projection stack is non-empty, then it means we need to re-push the clipProjectionData (because it exists in geometry data and it was reset) - // `acquireCurrentClipProjectionAddress` shouldn't/won't trigger auto-submit because geometry buffer counters were reset and our geometry buffer is supposed to be larger than a single clipProjectionData - mainObjToMaintain->clipProjectionAddress = acquireCurrentClipProjectionAddress(intendedNextSubmit); - - // We're calling `addMainObject_Internal` instead of safer `addMainObject_SubmitIfNeeded` because we've reset our mainObject and we're sure this won't need an autoSubmit. - addMainObject_Internal(mainObjToMaintain); + if (transformationType == TransformationType::TT_NORMAL) + { + return newTransformation; + } + else if (transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) + { + // Extract normalized rotation columns + float64_t2 column0 = nbl::hlsl::normalize(float64_t2(newTransformation[0][0], newTransformation[1][0])); + float64_t2 column1 = nbl::hlsl::normalize(float64_t2(newTransformation[0][1], newTransformation[1][1])); + + // Extract fixed screen-space scale from the original transformation + float64_t2 fixedScale = float64_t2( + nbl::hlsl::length(float64_t2(transformation[0][0], transformation[1][0])), + nbl::hlsl::length(float64_t2(transformation[0][1], transformation[1][1]))); + + // Apply fixed scale to normalized directions + column0 *= fixedScale.x; + column1 *= fixedScale.y; + + // Compose final matrix with adjusted columns + newTransformation[0][0] = column0[0]; + newTransformation[1][0] = column0[1]; + newTransformation[0][1] = column1[0]; + newTransformation[1][1] = column1[1]; + + return newTransformation; + } + else + { + // Fallback if transformationType is unrecognized, shouldn't happen + return newTransformation; + } + } + else + { + // Within no active projection scope, return transformation directly + return transformation; } -#endif } -uint32_t DrawResourcesFiller::addMainObject_Internal(const MainObject& mainObject) +uint32_t DrawResourcesFiller::acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeLineStyleIndex == InvalidStyleIdx) + activeLineStyleIndex = addLineStyle_SubmitIfNeeded(activeLineStyle, intendedNextSubmit); + + return activeLineStyleIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeDTMSettingsIndex == InvalidDTMSettingsIdx) + activeDTMSettingsIndex = addDTMSettings_SubmitIfNeeded(activeDTMSettings, intendedNextSubmit); + + return activeDTMSettingsIndex; +} + +uint32_t DrawResourcesFiller::acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeProjectionIndices.empty()) + return InvalidCustomProjectionIndex; + + if (activeProjectionIndices.back() == InvalidCustomProjectionIndex) + activeProjectionIndices.back() = addCustomProjection_SubmitIfNeeded(activeProjections.back(), intendedNextSubmit); + + return activeProjectionIndices.back(); +} + +uint32_t DrawResourcesFiller::acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); + if (activeClipRectIndices.empty()) + return InvalidCustomClipRectIndex; + + if (activeClipRectIndices.back() == InvalidCustomClipRectIndex) + activeClipRectIndices.back() = addCustomClipRect_SubmitIfNeeded(activeClipRects.back(), intendedNextSubmit); - if (currentMainObjectCount >= MaxIndexableMainObjects) - return InvalidMainObjectIdx; - if (currentMainObjectCount >= maxMainObjects) + return activeClipRectIndices.back(); +} + +uint32_t DrawResourcesFiller::acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit) +{ + if (activeMainObjectIndex != InvalidMainObjectIdx) + return activeMainObjectIndex; + if (activeMainObjectType == MainObjectType::NONE) + { + assert(false); // You're probably trying to acquire mainObjectIndex outside of startMainObject, endMainObject scope return InvalidMainObjectIdx; + } - void* dst = mainObjsArray + currentMainObjectCount; - memcpy(dst, &mainObject, sizeof(MainObject)); - uint32_t ret = currentMainObjectCount; - currentMainObjectCount++; - return ret; + const bool needsLineStyle = + (activeMainObjectType == MainObjectType::POLYLINE) || + (activeMainObjectType == MainObjectType::HATCH) || + (activeMainObjectType == MainObjectType::TEXT); + const bool needsDTMSettings = (activeMainObjectType == MainObjectType::DTM || activeMainObjectType == MainObjectType::GRID_DTM); + const bool needsCustomProjection = (!activeProjectionIndices.empty()); + const bool needsCustomClipRect = (!activeClipRectIndices.empty()); + + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // making sure MainObject and everything it references fits into remaining resources mem + size_t memRequired = sizeof(MainObject); + if (needsLineStyle) memRequired += sizeof(LineStyle); + if (needsDTMSettings) memRequired += sizeof(DTMSettings); + if (needsCustomProjection) memRequired += sizeof(float64_t3x3); + if (needsCustomClipRect) memRequired += sizeof(WorldClipRect); + + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + const bool needToOverflowSubmit = (!enoughMem) || (resourcesCollection.mainObjects.vector.size() >= MaxIndexableMainObjects); + + if (needToOverflowSubmit) + { + // failed to fit into remaining resources mem or exceeded max indexable mainobj + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + MainObject mainObject = {}; + // These 3 calls below shouldn't need to Submit because we made sure there is enough memory for all of them. + // if something here triggers a auto-submit it's a possible bug with calculating `memRequired` above, TODO: assert that somehow? + mainObject.styleIdx = (needsLineStyle) ? acquireActiveLineStyleIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidStyleIdx; + mainObject.dtmSettingsIdx = (needsDTMSettings) ? acquireActiveDTMSettingsIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidDTMSettingsIdx; + mainObject.customProjectionIndex = (needsCustomProjection) ? acquireActiveCustomProjectionIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomProjectionIndex; + mainObject.customClipRectIndex = (needsCustomClipRect) ? acquireActiveCustomClipRectIndex_SubmitIfNeeded(intendedNextSubmit) : InvalidCustomClipRectIndex; + mainObject.transformationType = (uint32_t)activeMainObjectTransformationType; + activeMainObjectIndex = resourcesCollection.mainObjects.addAndGetOffset(mainObject); + return activeMainObjectIndex; } -uint32_t DrawResourcesFiller::addLineStyle_Internal(const LineStyleInfo& lineStyleInfo) +uint32_t DrawResourcesFiller::addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit) { - LineStyle gpuLineStyle = lineStyleInfo.getAsGPUData(); - _NBL_DEBUG_BREAK_IF(gpuLineStyle.stipplePatternSize > LineStyle::StipplePatternMaxSize); // Oops, even after style normalization the style is too long to be in gpu mem :( - LineStyle* stylesArray = reinterpret_cast(cpuDrawBuffers.lineStylesBuffer->getPointer()); - for (uint32_t i = 0u; i < currentLineStylesCount; ++i) + uint32_t outLineStyleIdx = addLineStyle_Internal(lineStyle); + if (outLineStyleIdx == InvalidStyleIdx) { - const LineStyle& itr = stylesArray[i]; + // There wasn't enough resource memory remaining to fit a single LineStyle + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! - if (itr == gpuLineStyle) - return i; + outLineStyleIdx = addLineStyle_Internal(lineStyle); + assert(outLineStyleIdx != InvalidStyleIdx); } - if (currentLineStylesCount >= maxLineStyles) - return InvalidStyleIdx; - - void* dst = stylesArray + currentLineStylesCount; - memcpy(dst, &gpuLineStyle, sizeof(LineStyle)); - return currentLineStylesCount++; + return outLineStyleIdx; } -uint64_t DrawResourcesFiller::acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit) { - if (clipProjectionAddresses.empty()) - return InvalidClipProjectionAddress; + // before calling `addDTMSettings_Internal` we have made sute we have enough mem for + uint32_t outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + if (outDTMSettingIdx == InvalidDTMSettingsIdx) + { + // There wasn't enough resource memory remaining to fit dtmsettings struct + 2 linestyles structs. + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! - if (clipProjectionAddresses.back() == InvalidClipProjectionAddress) - clipProjectionAddresses.back() = addClipProjectionData_SubmitIfNeeded(clipProjections.back(), intendedNextSubmit); - - return clipProjectionAddresses.back(); + outDTMSettingIdx = addDTMSettings_Internal(dtmSettings, intendedNextSubmit); + assert(outDTMSettingIdx != InvalidDTMSettingsIdx); + } + return outDTMSettingIdx; } -uint64_t DrawResourcesFiller::addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit) +uint32_t DrawResourcesFiller::addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit) { - uint64_t outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - if (outClipProjectionAddress == InvalidClipProjectionAddress) + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(float64_t3x3); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? + + if (!enoughMem) { - finalizeAllCopiesToGPU(intendedNextSubmit); submitDraws(intendedNextSubmit); - - resetGeometryCounters(); - resetMainObjectCounters(); - - outClipProjectionAddress = addClipProjectionData_Internal(clipProjectionData); - assert(outClipProjectionAddress != InvalidClipProjectionAddress); + reset(); // resets everything! be careful! } - return outClipProjectionAddress; + + resourcesCollection.customProjections.vector.push_back(projection); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customProjections.vector.size() - 1u; } -uint64_t DrawResourcesFiller::addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData) +uint32_t DrawResourcesFiller::addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit) { - const uint64_t maxGeometryBufferClipProjData = (maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ClipProjectionData); - if (maxGeometryBufferClipProjData <= 0) - return InvalidClipProjectionAddress; - - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &clipProjectionData, sizeof(ClipProjectionData)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const size_t memRequired = sizeof(WorldClipRect); + const bool enoughMem = remainingResourcesSize >= memRequired; // enough remaining memory for 1 more dtm settings with 2 referenced line styles? - const uint64_t ret = currentGeometryBufferSize + geometryBufferAddress; - currentGeometryBufferSize += sizeof(ClipProjectionData); - return ret; + if (!enoughMem) + { + submitDraws(intendedNextSubmit); + reset(); // resets everything! be careful! + } + + resourcesCollection.customClipRects.vector.push_back(clipRect); // this will implicitly increase total resource consumption and reduce remaining size --> no need for mem size trackers + return resourcesCollection.customClipRects.vector.size() - 1u; } void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) @@ -738,39 +1924,49 @@ void DrawResourcesFiller::addPolylineObjects_Internal(const CPolylineBase& polyl void DrawResourcesFiller::addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferConnectors = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(PolylineConnector)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferConnectors); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(PolylineConnector) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + const uint32_t connectorCount = static_cast(polyline.getConnectors().size()); const uint32_t remainingObjects = connectorCount - currentPolylineConnectorObj; - const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(connectorsByteSize, alignof(PolylineConnector)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const PolylineConnector& connector = polyline.getConnectors()[currentPolylineConnectorObj]; + memcpy(dst, &connector, connectorsByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::POLYLINE_CONNECTOR) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(PolylineConnector); - } - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto connectorsByteSize = sizeof(PolylineConnector) * objectsToUpload; - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& connector = polyline.getConnectors()[currentPolylineConnectorObj]; - memcpy(dst, &connector, connectorsByteSize); - currentGeometryBufferSize += connectorsByteSize; - } + } currentPolylineConnectorObj += objectsToUpload; } @@ -780,154 +1976,545 @@ void DrawResourcesFiller::addLines_Internal(const CPolylineBase& polyline, const assert(section.count >= 1u); assert(section.type == ObjectType::LINE); - const uint32_t maxGeometryBufferPoints = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(LinePointInfo)); - const uint32_t maxGeometryBufferLines = (maxGeometryBufferPoints <= 1u) ? 0u : maxGeometryBufferPoints - 1u; - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferLines); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + if (remainingResourcesSize < sizeof(LinePointInfo)) + return; + + // how many lines fit into mem? --> memConsumption = sizeof(LinePointInfo) + sizeof(LinePointInfo)*lineCount + sizeof(DrawObject)*lineCount + sizeof(uint32_t) * 6u * lineCount + const uint32_t uploadableObjects = (remainingResourcesSize - sizeof(LinePointInfo)) / (sizeof(LinePointInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex const uint32_t lineCount = section.count; const uint32_t remainingObjects = lineCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(pointsByteSize, alignof(LinePointInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const LinePointInfo& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); + memcpy(dst, &linePoint, pointsByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::LINE) | 0 << 16); - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i] = drawObj; drawObj.geometryAddress += sizeof(LinePointInfo); - } - - // Add Geometry - if (objectsToUpload > 0u) - { - const auto pointsByteSize = sizeof(LinePointInfo) * (objectsToUpload + 1u); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& linePoint = polyline.getLinePointAt(section.index + currentObjectInSection); - memcpy(dst, &linePoint, pointsByteSize); - currentGeometryBufferSize += pointsByteSize; - } + } currentObjectInSection += objectsToUpload; } void DrawResourcesFiller::addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx) { - constexpr uint32_t CagesPerQuadBezier = getCageCountPerPolylineObject(ObjectType::QUAD_BEZIER); + constexpr uint32_t CagesPerQuadBezier = 3u; // TODO: Break into 3 beziers in compute shader. + assert(section.type == ObjectType::QUAD_BEZIER); - const uint32_t maxGeometryBufferBeziers = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(QuadraticBezierInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + // how many quad bezier objects fit into mem? + // memConsumption = quadBezCount * (sizeof(QuadraticBezierInfo) + 3*(sizeof(DrawObject)+6u*sizeof(uint32_t)) + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(QuadraticBezierInfo) + (sizeof(DrawObject) + 6u * sizeof(uint32_t)) * CagesPerQuadBezier); + // TODO[ERFAN]: later take into account: our maximum indexable vertex - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferBeziers); - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects /= CagesPerQuadBezier; - const uint32_t beziersCount = section.count; const uint32_t remainingObjects = beziersCount - currentObjectInSection; - uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); + const uint32_t cagesCount = objectsToUpload * CagesPerQuadBezier; + + if (objectsToUpload <= 0u) + return; + + // Add Geometry + const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(beziersByteSize, alignof(QuadraticBezierInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const QuadraticBezierInfo& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); + memcpy(dst, &quadBezier, beziersByteSize); + + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u*cagesCount); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < cagesCount; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(cagesCount); DrawObject drawObj = {}; drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geometryBufferAddress + currentGeometryBufferSize; + drawObj.geometryAddress = geometryBufferOffset; for (uint32_t i = 0u; i < objectsToUpload; ++i) { for (uint16_t subObject = 0; subObject < CagesPerQuadBezier; subObject++) { drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::QUAD_BEZIER) | (subObject << 16)); - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + drawObjectsToBeFilled[i * CagesPerQuadBezier + subObject] = drawObj; } drawObj.geometryAddress += sizeof(QuadraticBezierInfo); } - // Add Geometry - if (objectsToUpload > 0u) - { - const auto beziersByteSize = sizeof(QuadraticBezierInfo) * (objectsToUpload); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - auto& quadBezier = polyline.getQuadBezierInfoAt(section.index + currentObjectInSection); - memcpy(dst, &quadBezier, beziersByteSize); - currentGeometryBufferSize += beziersByteSize; - } currentObjectInSection += objectsToUpload; } void DrawResourcesFiller::addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex) { - const uint32_t maxGeometryBufferHatchBoxes = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(Hatch::CurveHatchBox)); - - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferHatchBoxes); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(Hatch::CurveHatchBox) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + uint32_t remainingObjects = hatch.getHatchBoxCount() - currentObjectInSection; - uploadableObjects = core::min(uploadableObjects, remainingObjects); - - for (uint32_t i = 0; i < uploadableObjects; i++) - { - const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(i + currentObjectInSection); + const uint32_t objectsToUpload = core::min(uploadableObjects, remainingObjects); - uint64_t hatchBoxAddress; - { - static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); - void* dst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dst, &hatchBox, sizeof(CurveBox)); - hatchBoxAddress = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(CurveBox); - } + if (objectsToUpload <= 0u) + return; - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); - drawObj.mainObjIndex = mainObjIndex; - drawObj.geometryAddress = hatchBoxAddress; - void* dst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount + i; - memcpy(dst, &drawObj, sizeof(DrawObject)); + // Add Geometry + static_assert(sizeof(CurveBox) == sizeof(Hatch::CurveHatchBox)); + const auto curveBoxesByteSize = sizeof(Hatch::CurveHatchBox) * objectsToUpload; + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(curveBoxesByteSize, alignof(Hatch::CurveHatchBox)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + const Hatch::CurveHatchBox& hatchBox = hatch.getHatchBox(currentObjectInSection); // WARNING: This is assuming hatch boxes are contigous in memory, TODO: maybe make that more obvious through Hatch interface + memcpy(dst, &hatchBox, curveBoxesByteSize); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * objectsToUpload); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + } + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(objectsToUpload); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIndex; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::CURVE_BOX) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + for (uint32_t i = 0u; i < objectsToUpload; ++i) + { + drawObjectsToBeFilled[i] = drawObj; + drawObj.geometryAddress += sizeof(Hatch::CurveHatchBox); } // Add Indices - currentDrawObjectCount += uploadableObjects; currentObjectInSection += uploadableObjects; } bool DrawResourcesFiller::addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx) { - const uint32_t maxGeometryBufferFontGlyphs = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(GlyphInfo)); + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GlyphInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferFontGlyphs); + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GlyphInfo), alignof(GlyphInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &glyphInfo, sizeof(GlyphInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i*6] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 1u] = (startObj+i)*4u + 0u; + indexBufferToBeFilled[i*6 + 2u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 3u] = (startObj+i)*4u + 1u; + indexBufferToBeFilled[i*6 + 4u] = (startObj+i)*4u + 2u; + indexBufferToBeFilled[i*6 + 5u] = (startObj+i)*4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GridDTMInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GridDTMInfo), alignof(GridDTMInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &gridDTMInfo, sizeof(GridDTMInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::GRID_DTM) | (0 << 16)); + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} - if (uploadableObjects >= 1u) +bool DrawResourcesFiller::addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(ImageObjectInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(ImageObjectInfo), alignof(ImageObjectInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &imageObjectInfo, sizeof(ImageObjectInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STATIC_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +bool DrawResourcesFiller::addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx) +{ + const size_t remainingResourcesSize = calculateRemainingResourcesSize(); + + const uint32_t uploadableObjects = (remainingResourcesSize) / (sizeof(GeoreferencedImageInfo) + sizeof(DrawObject) + sizeof(uint32_t) * 6u); + // TODO[ERFAN]: later take into account: our maximum indexable vertex + + if (uploadableObjects <= 0u) + return false; + + // Add Geometry + size_t geometryBufferOffset = resourcesCollection.geometryInfo.increaseSizeAndGetOffset(sizeof(GeoreferencedImageInfo), alignof(GeoreferencedImageInfo)); + void* dst = resourcesCollection.geometryInfo.data() + geometryBufferOffset; + memcpy(dst, &georeferencedImageInfo, sizeof(GeoreferencedImageInfo)); + + // Push Indices, remove later when compute fills this + uint32_t* indexBufferToBeFilled = resourcesCollection.indexBuffer.increaseCountAndGetPtr(6u * 1u); + const uint32_t startObj = resourcesCollection.drawObjects.getCount(); + uint32_t i = 0u; + indexBufferToBeFilled[i * 6] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 1u] = (startObj + i) * 4u + 0u; + indexBufferToBeFilled[i * 6 + 2u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 3u] = (startObj + i) * 4u + 1u; + indexBufferToBeFilled[i * 6 + 4u] = (startObj + i) * 4u + 2u; + indexBufferToBeFilled[i * 6 + 5u] = (startObj + i) * 4u + 3u; + + // Add DrawObjs + DrawObject* drawObjectsToBeFilled = resourcesCollection.drawObjects.increaseCountAndGetPtr(1u); + DrawObject drawObj = {}; + drawObj.mainObjIndex = mainObjIdx; + drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::STREAMED_IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function + drawObj.geometryAddress = geometryBufferOffset; + drawObjectsToBeFilled[0u] = drawObj; + + return true; +} + +uint32_t DrawResourcesFiller::getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + CachedImageRecord* imageRef = imagesCache->get(imageID); + if (imageRef) + { + textureIdx = imageRef->arrayIndex; + imageRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frane + } + return textureIdx; +} + +void DrawResourcesFiller::evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit) +{ + if (evicted.arrayIndex == InvalidTextureIndex) { - void* geomDst = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(geomDst, &glyphInfo, sizeof(GlyphInfo)); - uint64_t fontGlyphAddr = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(GlyphInfo); + m_logger.log("evictImage_SubmitIfNeeded: `evicted.arrayIndex == InvalidTextureIndex` is true, shouldn't happen under normal circumstances.", nbl::system::ILogger::ELL_WARNING); + _NBL_DEBUG_BREAK_IF(true); + return; + } + // Later used to release the image's memory range. + core::smart_refctd_ptr cleanupObject = core::make_smart_refctd_ptr(); + cleanupObject->imagesMemorySuballocator = imagesMemorySubAllocator; + cleanupObject->addr = evicted.allocationOffset; + cleanupObject->size = evicted.allocationSize; - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::FONT_GLYPH) | (0 << 16)); - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = fontGlyphAddr; - void* drawObjDst = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(drawObjDst, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); - return true; + // NOTE: `deallocationWaitInfo` is crucial for both paths, we need to make sure we'll write to a descriptor arrayIndex when it's 100% done with previous usages. + if (imageUsedForNextIntendedSubmit) + { + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); } else { - return false; + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value conservatively, which is the immediate prev submit. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + suballocatedDescriptorSet->multi_deallocate(imagesArrayBinding, 1u, &evicted.arrayIndex, deallocationWaitInfo, &cleanupObject.get()); + } +} + +DrawResourcesFiller::ImageAllocateResults DrawResourcesFiller::tryCreateAndAllocateImage_SubmitIfNeeded( + const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName) +{ + ImageAllocateResults ret = {}; + + auto* device = m_utilities->getLogicalDevice(); + auto* physDev = m_utilities->getLogicalDevice()->getPhysicalDevice(); + + bool alreadyBlockedForDeferredFrees = false; + + // Attempt to create a GPU image and corresponding image view for this texture. + // If creation or memory allocation fails (likely due to VRAM exhaustion), + // we'll evict another texture from the LRU cache and retry until successful, or until only the currently-cachedImageRecord image remains. + while (imagesCache->size() > 0u) + { + // Try creating the image and allocating memory for it: + nbl::video::IGPUImage::SCreationParams params = {}; + params = imageParams; + + if (imageViewFormatOverride != asset::E_FORMAT::EF_COUNT && imageViewFormatOverride != imageParams.format) + { + params.viewFormats.set(static_cast(imageViewFormatOverride), true); + params.flags |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + } + auto gpuImage = device->createImage(std::move(params)); + + if (gpuImage) + { + nbl::video::IDeviceMemoryBacked::SDeviceMemoryRequirements gpuImageMemoryRequirements = gpuImage->getMemoryReqs(); + uint32_t actualAlignment = 1u << gpuImageMemoryRequirements.alignmentLog2; + const bool imageMemoryRequirementsMatch = + (physDev->getDeviceLocalMemoryTypeBits() & gpuImageMemoryRequirements.memoryTypeBits) != 0 && // should have device local memory compatible + (gpuImageMemoryRequirements.requiresDedicatedAllocation == false) && // should not require dedicated allocation + ((ImagesMemorySubAllocator::MaxMemoryAlignment % actualAlignment) == 0u); // should be consistent with our suballocator's max alignment + + if (imageMemoryRequirementsMatch) + { + ret.allocationOffset = imagesMemorySubAllocator->allocate(gpuImageMemoryRequirements.size, 1u << gpuImageMemoryRequirements.alignmentLog2); + const bool allocationFromImagesMemoryArenaSuccessfull = ret.allocationOffset != ImagesMemorySubAllocator::InvalidAddress; + if (allocationFromImagesMemoryArenaSuccessfull) + { + ret.allocationSize = gpuImageMemoryRequirements.size; + nbl::video::ILogicalDevice::SBindImageMemoryInfo bindImageMemoryInfo = + { + .image = gpuImage.get(), + .binding = { .memory = imagesMemoryArena.memory.get(), .offset = imagesMemoryArena.offset + ret.allocationOffset } + }; + const bool boundToMemorySuccessfully = device->bindImageMemory({ &bindImageMemoryInfo, 1u }); + if (boundToMemorySuccessfully) + { + gpuImage->setObjectDebugName(imageDebugName.c_str()); + IGPUImageView::SCreationParams viewParams = { + .image = gpuImage, + .viewType = IGPUImageView::ET_2D, + .format = (imageViewFormatOverride == asset::E_FORMAT::EF_COUNT) ? gpuImage->getCreationParameters().format : imageViewFormatOverride + }; + + const uint32_t channelCount = nbl::asset::getFormatChannelCount(viewParams.format); + if (channelCount == 1u) + { + // for rendering grayscale: + viewParams.components.r = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_R; + viewParams.components.g = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_R; + viewParams.components.b = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_R; + viewParams.components.a = nbl::asset::IImageViewBase::SComponentMapping::E_SWIZZLE::ES_ONE; + } + + ret.gpuImageView = device->createImageView(std::move(viewParams)); + if (ret.gpuImageView) + { + // SUCCESS! + ret.gpuImageView->setObjectDebugName((imageDebugName + " View").c_str()); + } + else + { + // irrecoverable error if simple image creation fails. + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImageView creation failed, that's rare and irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + } + + // succcessful with everything, just break and get out of this retry loop + break; + } + else + { + // irrecoverable error if simple bindImageMemory fails. + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: bindImageMemory failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + // printf(std::format("Allocation Failed, Trying again, ImageID={} Size={} \n", imageID, gpuImageMemoryRequirements.size).c_str()); + // recoverable error when allocation fails, we don't log anything, next code will try evicting other images and retry + } + } + else + { + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: memory requirements of the gpu image doesn't match our preallocated device memory, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + } + else + { + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: gpuImage creation failed, that's irrecoverable when adding a new image.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + + // Getting here means we failed creating or allocating the image, evict and retry. + + + // If imageCache size is 1 it means there is nothing else to evict, but there may still be already evicts/frees queued up. + // `cull_frees` will make sure all pending deallocations will be blocked for. + if (imagesCache->size() == 1u && alreadyBlockedForDeferredFrees) + { + // We give up, it's really nothing we can do, no image to evict (alreadyBlockedForDeferredFrees==1) and no more memory to free up (alreadyBlockedForDeferredFrees). + // We probably have evicted almost every other texture except the one we just allocated an index for. + // This is most likely due to current image memory requirement being greater than the whole memory allocated for all images + m_logger.log("tryCreateAndAllocateImage_SubmitIfNeeded: failed allocating an image, there is nothing more from mcache to evict, the current memory requirement is simply greater than the whole memory allocated for all images.", nbl::system::ILogger::ELL_ERROR); + _NBL_DEBUG_BREAK_IF(true); + break; + } + + if (imagesCache->size() > 1u) + { + const image_id evictionCandidate = imagesCache->select_eviction_candidate(); + CachedImageRecord* imageRef = imagesCache->peek(evictionCandidate); + if (imageRef) + evictImage_SubmitIfNeeded(evictionCandidate, *imageRef, intendedNextSubmit); + imagesCache->erase(evictionCandidate); + } + + while (suballocatedDescriptorSet->cull_frees()) {}; // to make sure deallocation requests in eviction callback are blocked for. + alreadyBlockedForDeferredFrees = true; + + // we don't hold any references to the GPUImageView or GPUImage so descriptor binding will be the last reference + // hopefully by here the suballocated descriptor set freed some VRAM by dropping the image last ref and it's dedicated allocation. + } + + return ret; +} + +void DrawResourcesFiller::determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams) +{ + // Decide whether the image can reside fully into memory rather than get streamed. + // TODO: Improve logic, currently just a simple check to see if the full-screen image has more pixels that viewport or not + // TODO: add criterial that the size of the full-res image shouldn't consume more than 30% of the total memory arena for images (if we allowed larger than viewport extents) + const bool betterToResideFullyInMem = georeferencedImageParams.imageExtents.x * georeferencedImageParams.imageExtents.y <= georeferencedImageParams.viewportExtents.x * georeferencedImageParams.viewportExtents.y; + + if (betterToResideFullyInMem) + outImageType = ImageType::GEOREFERENCED_FULL_RESOLUTION; + else + outImageType = ImageType::GEOREFERENCED_STREAMED; + + outImageParams.type = asset::IImage::ET_2D; + outImageParams.samples = asset::IImage::ESCF_1_BIT; + outImageParams.format = georeferencedImageParams.format; + + if (outImageType == ImageType::GEOREFERENCED_FULL_RESOLUTION) + { + outImageParams.extent = { georeferencedImageParams.imageExtents.x, georeferencedImageParams.imageExtents.y, 1u }; + } + else + { + // TODO: Better Logic, area around the view, etc... + outImageParams.extent = { georeferencedImageParams.viewportExtents.x, georeferencedImageParams.viewportExtents.y, 1u }; } + + + outImageParams.mipLevels = 1u; // TODO: Later do mipmapping + outImageParams.arrayLayers = 1u; } void DrawResourcesFiller::setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func) @@ -940,45 +2527,94 @@ void DrawResourcesFiller::setHatchFillMSDFTextureFunction(const GetHatchFillPatt getHatchFillPatternMSDF = func; } -uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit) +void DrawResourcesFiller::markFrameUsageComplete(uint64_t drawSubmitWaitValue) +{ + currentFrameIndex++; + // TODO[LATER]: take into account that currentFrameIndex was submitted with drawSubmitWaitValue; Use that value when deallocating the resources marked with this frame index + // Currently, for evictions the worst case value will be waited for, as there is no way yet to know which semaphoroe value will signal the completion of the (to be evicted) resource's usage +} + +uint32_t DrawResourcesFiller::getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit) +{ + uint32_t textureIdx = InvalidTextureIndex; + MSDFReference* tRef = msdfLRUCache->get(msdfInfo); + if (tRef) + { + textureIdx = tRef->alloc_idx; + tRef->lastUsedFrameIndex = currentFrameIndex; // update this because the texture will get used on the next frame + } + return textureIdx; +} + +uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit) { if (!cpuImage) - return InvalidTextureIdx; // TODO: Log + { + m_logger.log("addMSDFTexture: cpuImage is nullptr.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } const auto cpuImageSize = cpuImage->getMipSize(0); const bool sizeMatch = cpuImageSize.x == getMSDFResolution().x && cpuImageSize.y == getMSDFResolution().y && cpuImageSize.z == 1u; if (!sizeMatch) - return InvalidTextureIdx; // TODO: Log - - // TextureReferences hold the semaValue related to the "scratch semaphore" in IntendedSubmitInfo - // Every single submit increases this value by 1 - // The reason for hiolding on to the lastUsedSema is deferred dealloc, which we call in the case of eviction, making sure we get rid of the entry inside the allocator only when the texture is done being used - const auto nextSemaSignal = intendedNextSubmit.getFutureScratchSemaphore(); + { + m_logger.log("addMSDFTexture: cpuImage size doesn't match with msdf array image.", nbl::system::ILogger::ELL_ERROR); + return InvalidTextureIndex; + } + /* + * The `msdfTextureArrayIndexAllocator` manages indices (slots) into a texture array for MSDF images. + * When all slots are occupied, the least recently used entry is evicted via `msdfLRUCache`. + * This callback is invoked on eviction, and must: + * - Ensure safe deallocation of the slot. + * - Submit any pending draw calls if the evicted MSDF was scheduled to be used in the upcoming submission. + */ auto evictionCallback = [&](const MSDFReference& evicted) { - if (msdfTextureArrayIndicesUsed.contains(evicted.alloc_idx)) + // `deallocationWaitInfo` is used to prepare wait info to defer index deallocation until the GPU has finished using the resource. + // NOTE: `deallocationWaitInfo` is currently *not* required for correctness because: + // - Both the image upload (msdfImagesState) and usage occur within the same timeline (`intendedNextSubmit`). + // - timeline semaphores guarantee proper ordering: the next submit's msdfImagesState will wait on the prior usage. + // - Therefore, we can safely overwrite or reallocate the slot without waiting for explicit GPU completion. + // + // However, this `deallocationWaitInfo` *will* become essential if we start interacting with MSDF images + // outside the `intendedNextSubmit` timeline for example, issuing uploads via a transfer queue or using a separate command buffer and timeline. + + const bool imageUsedForNextIntendedSubmit = (evicted.lastUsedFrameIndex == currentFrameIndex); + + if (imageUsedForNextIntendedSubmit) { - // Dealloc once submission is finished - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, nextSemaSignal); - - // If we reset main objects will cause an auto submission bug, where adding an msdf texture while constructing glyphs will have wrong main object references (See how SingleLineTexts add Glyphs with a single mainObject) - // for the same reason we don't reset line styles - // `submitCurrentObjectsAndReset` function handles the above + updating clipProjectionData and making sure the mainObjectIdx references to the correct clipProj data after reseting geometry buffer - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); + // The evicted image is scheduled for use in the upcoming submit. + // To avoid rendering artifacts, we must flush the current draw queue now. + // After submission, we reset state so that data referencing the evicted slot can be re-uploaded. + submitDraws(intendedNextSubmit); + reset(); // resets everything, things referenced through mainObj and other shit will be pushed again through acquireXXX_SubmitIfNeeded + + // Prepare wait info to defer index deallocation until the GPU has finished using the resource. + // we wait on the signal semaphore for the submit we just did above. + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } else { - // We didn't use it this frame, so it's safe to dealloc now, withou needing to "overflow" submit - msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx); + // The image is not used in the current frame, so we can deallocate without submitting any draws. + // Still wait on the semaphore to ensure past GPU usage is complete. + // TODO: We don't know which semaphore value the frame with `evicted.lastUsedFrameIndex` index was submitted with, so we wait for the worst case value which is the immediate prev submit (scratchSemaphore.value). + ISemaphore::SWaitInfo deallocationWaitInfo = { .semaphore = intendedNextSubmit.scratchSemaphore.semaphore, .value = intendedNextSubmit.scratchSemaphore.value }; + msdfTextureArrayIndexAllocator->multi_deallocate(1u, &evicted.alloc_idx, deallocationWaitInfo); } + + // Clear CPU-side metadata associated with the evicted slot. + msdfImagesState[evicted.alloc_idx].evict(); }; // We pass nextSemaValue instead of constructing a new MSDFReference and passing it into `insert` that's because we might get a cache hit and only update the value of the nextSema - MSDFReference* inserted = msdfLRUCache->insert(msdfInput, nextSemaSignal.value, evictionCallback); + MSDFReference* inserted = msdfLRUCache->insert(msdfInput, currentFrameIndex, evictionCallback); - // if inserted->alloc_idx was not InvalidTextureIdx then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx - if (inserted->alloc_idx == InvalidTextureIdx) + inserted->lastUsedFrameIndex = currentFrameIndex; // in case there was an eviction + auto-submit, we need to update AGAIN + + // if cachedImageRecord->alloc_idx was not InvalidTextureIndex then it means we had a cache hit and updated the value of our sema, in which case we don't queue anything for upload, and return the idx + if (inserted->alloc_idx == InvalidTextureIndex) { // New insertion == cache miss happened and insertion was successfull inserted->alloc_idx = IndexAllocator::AddressAllocator::invalid_address; @@ -986,19 +2622,31 @@ uint32_t DrawResourcesFiller::addMSDFTexture(const MSDFInputInfo& msdfInput, cor if (inserted->alloc_idx != IndexAllocator::AddressAllocator::invalid_address) { - // We queue copy and finalize all on `finalizeTextureCopies` function called before draw calls to make sure it's in mem - msdfTextureCopies.push_back({ .image = std::move(cpuImage), .index = inserted->alloc_idx }); + // We stage msdfImagesState, pushMSDFImagesUploads will push it into GPU + msdfImagesState[inserted->alloc_idx].image = std::move(cpuImage); + msdfImagesState[inserted->alloc_idx].uploadedToGPU = false; } else { - // TODO: log here, assert will be called in a few lines - inserted->alloc_idx = InvalidTextureIdx; + m_logger.log("addMSDFTexture: index allocation failed.", nbl::system::ILogger::ELL_ERROR); + inserted->alloc_idx = InvalidTextureIndex; } } - assert(inserted->alloc_idx != InvalidTextureIdx); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed - if (inserted->alloc_idx != InvalidTextureIdx) - msdfTextureArrayIndicesUsed.emplace(inserted->alloc_idx); + assert(inserted->alloc_idx != InvalidTextureIndex); // shouldn't happen, because we're using LRU cache, so worst case eviction will happen + multi-deallocate and next next multi_allocate should definitely succeed return inserted->alloc_idx; +} + +void DrawResourcesFiller::flushDrawObjects() +{ + if (resourcesCollection.drawObjects.getCount() > drawObjectsFlushedToDrawCalls) + { + DrawCallData drawCall = {}; + drawCall.isDTMRendering = false; + drawCall.drawObj.drawObjectStart = drawObjectsFlushedToDrawCalls; + drawCall.drawObj.drawObjectCount = resourcesCollection.drawObjects.getCount() - drawObjectsFlushedToDrawCalls; + drawCalls.push_back(drawCall); + drawObjectsFlushedToDrawCalls = resourcesCollection.drawObjects.getCount(); + } } \ No newline at end of file diff --git a/62_CAD/DrawResourcesFiller.h b/62_CAD/DrawResourcesFiller.h index e20514651..547926767 100644 --- a/62_CAD/DrawResourcesFiller.h +++ b/62_CAD/DrawResourcesFiller.h @@ -1,11 +1,13 @@ #pragma once #include "Polyline.h" +#include "CTriangleMesh.h" #include "Hatch.h" #include "IndexAllocator.h" +#include "Images.h" #include #include #include - +// #include using namespace nbl; using namespace nbl::video; using namespace nbl::core; @@ -13,20 +15,8 @@ using namespace nbl::asset; using namespace nbl::ext::TextRendering; static_assert(sizeof(DrawObject) == 16u); -static_assert(sizeof(MainObject) == 16u); -static_assert(sizeof(Globals) == 128u); +static_assert(sizeof(MainObject) == 20u); static_assert(sizeof(LineStyle) == 88u); -static_assert(sizeof(ClipProjectionData) == 88u); - -template -struct DrawBuffers -{ - smart_refctd_ptr indexBuffer; // only is valid for IGPUBuffer because it's filled at allocation time and never touched again - smart_refctd_ptr mainObjectsBuffer; - smart_refctd_ptr drawObjectsBuffer; - smart_refctd_ptr geometryBuffer; - smart_refctd_ptr lineStylesBuffer; -}; // ! DrawResourcesFiller // ! This class provides important functionality to manage resources needed for a draw. @@ -37,27 +27,149 @@ struct DrawBuffers struct DrawResourcesFiller { public: + + // We pack multiple data types in a single buffer, we need to makes sure each offset starts aligned to avoid mis-aligned accesses + static constexpr size_t GPUStructsMaxNaturalAlignment = 8u; + static constexpr size_t MinimumDrawResourcesMemorySize = 512u * 1 << 20u; // 512MB - typedef uint32_t index_buffer_type; + /// @brief general parent struct for 1.ReservedCompute and 2.CPUGenerated Resources + struct ResourceBase + { + static constexpr size_t InvalidBufferOffset = ~0u; + size_t bufferOffset = InvalidBufferOffset; // set when copy to gpu buffer is issued + virtual size_t getCount() const = 0; + virtual size_t getStorageSize() const = 0; + virtual size_t getAlignedStorageSize() const { return core::alignUp(getStorageSize(), GPUStructsMaxNaturalAlignment); } + }; - DrawResourcesFiller(); + /// @brief ResourceBase reserved for compute shader stages input/output + template + struct ReservedComputeResource : ResourceBase + { + size_t count = 0ull; + size_t getCount() const override { return count; } + size_t getStorageSize() const override { return count * sizeof(T); } + }; - DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue); + /// @brief ResourceBase which is filled by CPU, packed and sent to GPU + template + struct CPUGeneratedResource : ResourceBase + { + core::vector vector; + size_t getCount() const { return vector.size(); } + size_t getStorageSize() const { return vector.size() * sizeof(T); } + + /// @return pointer to start of the data to be filled, up to additionalCount + T* increaseCountAndGetPtr(size_t additionalCount) + { + size_t offset = vector.size(); + vector.resize(offset + additionalCount); + return &vector[offset]; + } - typedef std::function SubmitFunc; - void setSubmitDrawsFunction(const SubmitFunc& func); + /// @brief increases size of general-purpose resources that hold bytes + /// @param alignment: Alignment of the pointer returned to be filled, should be PoT and <= GPUStructsMaxNaturalAlignment, only use this if storing raw bytes in vector + /// @return pointer to start of the data to be filled, up to additional size + size_t increaseSizeAndGetOffset(size_t additionalSize, size_t alignment) + { + assert(core::isPoT(alignment) && alignment <= GPUStructsMaxNaturalAlignment); + size_t offset = core::alignUp(vector.size(), alignment); + vector.resize(offset + additionalSize); + return offset; + } + + uint32_t addAndGetOffset(const T& val) + { + vector.push_back(val); + return vector.size() - 1u; + } - void allocateIndexBuffer(ILogicalDevice* logicalDevice, uint32_t indices); + T* data() { return vector.data(); } + }; + + /// @brief struct to hold all resources + // TODO: rename to staged resources buffers or something like that + struct ResourcesCollection + { + // auto-submission level 0 resources (settings that mainObj references) + CPUGeneratedResource lineStyles; + CPUGeneratedResource dtmSettings; + CPUGeneratedResource customProjections; + CPUGeneratedResource customClipRects; + + // auto-submission level 1 buffers (mainObj that drawObjs references, if all drawObjs+idxBuffer+geometryInfo doesn't fit into mem this will be broken down into many) + CPUGeneratedResource mainObjects; + + // auto-submission level 2 buffers + CPUGeneratedResource drawObjects; + CPUGeneratedResource indexBuffer; // TODO: this is going to change to ReservedComputeResource where index buffer gets filled by compute shaders + CPUGeneratedResource geometryInfo; // general purpose byte buffer for custom data for geometries (eg. line points, bezier definitions, aabbs) + + // Get Total memory consumption, If all ResourcesCollection get packed together with GPUStructsMaxNaturalAlignment + // used to decide the remaining memory and when to overflow + size_t calculateTotalConsumption() const + { + return + lineStyles.getAlignedStorageSize() + + dtmSettings.getAlignedStorageSize() + + customProjections.getAlignedStorageSize() + + customClipRects.getAlignedStorageSize() + + mainObjects.getAlignedStorageSize() + + drawObjects.getAlignedStorageSize() + + indexBuffer.getAlignedStorageSize() + + geometryInfo.getAlignedStorageSize(); + } + }; + + DrawResourcesFiller(); - void allocateMainObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t mainObjects); + DrawResourcesFiller(smart_refctd_ptr&& utils, IQueue* copyQueue, core::smart_refctd_ptr&& logger); - void allocateDrawObjectsBuffer(ILogicalDevice* logicalDevice, uint32_t drawObjects); + typedef std::function SubmitFunc; + void setSubmitDrawsFunction(const SubmitFunc& func); + + // DrawResourcesFiller needs to access these in order to allocate GPUImages and write the to their correct descriptor set binding + void setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr&& descriptorSet, uint32_t binding); - void allocateGeometryBuffer(ILogicalDevice* logicalDevice, size_t size); + /// @brief Get minimum required size for resources buffer (containing objects and geometry info and their settings) + static constexpr size_t getMinimumRequiredResourcesBufferSize() + { + // for auto-submission to work correctly, memory needs to serve at least 2 linestyle, 1 dtm settings, 1 clip proj, 1 main obj, 1 draw obj and 512 bytes of additional mem for geometries and index buffer + // this is the ABSOLUTE MINIMUM (if this value is used rendering will probably be as slow as CPU drawing :D) + return core::alignUp(sizeof(LineStyle) + sizeof(LineStyle) * DTMSettings::MaxContourSettings + sizeof(DTMSettings) + sizeof(WorldClipRect) + sizeof(float64_t3x3) + sizeof(MainObject) + sizeof(DrawObject) + 512ull, GPUStructsMaxNaturalAlignment); + } - void allocateStylesBuffer(ILogicalDevice* logicalDevice, uint32_t lineStylesCount); + /** + * @brief Attempts to allocate a single contiguous device-local memory block for draw resources, divided into image and buffer sections. + * + * The function allocates a single memory block and splits it into image and buffer arenas. + * + * @param logicalDevice Pointer to the logical device used for memory allocation and resource creation. + * @param requiredImageMemorySize The size in bytes of the memory required for images. + * @param requiredBufferMemorySize The size in bytes of the memory required for buffers. + * + * @return true if the memory allocation and resource setup succeeded; false otherwise. + */ + bool allocateDrawResources(ILogicalDevice* logicalDevice, size_t requiredImageMemorySize, size_t requiredBufferMemorySize); - void allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); + /** + * @brief Attempts to allocate draw resources within a given VRAM budget, retrying with progressively smaller sizes on failure. + * + * This function preserves the initial image-to-buffer memory ratio. If the initial sizes are too small, + * it scales them up to meet a minimum required threshold. On allocation failure, it reduces the memory + * sizes by a specified percentage and retries, until it either succeeds or the number of attempts exceeds `maxTries`. + * + * @param logicalDevice Pointer to the logical device used for allocation. + * @param maxImageMemorySize Initial image memory size (in bytes) to attempt allocation with. + * @param maxBufferMemorySize Initial buffer memory size (in bytes) to attempt allocation with. + * @param reductionPercent The percentage by which to reduce the memory sizes after each failed attempt (e.g., 10 means reduce by 10%). + * @param maxTries Maximum number of attempts to try reducing and allocating memory. + * + * @return true if the allocation succeeded at any iteration; false if all attempts failed. + */ + bool allocateDrawResourcesWithinAvailableVRAM(ILogicalDevice* logicalDevice, size_t maxImageMemorySize, size_t maxBufferMemorySize, uint32_t reductionPercent = 10u, uint32_t maxTries = 32u); + + bool allocateMSDFTextures(ILogicalDevice* logicalDevice, uint32_t maxMSDFs, uint32_t2 msdfsExtent); // functions that user should set to get MSDF texture if it's not available in cache. // it's up to user to return cached or generate on the fly. @@ -66,6 +178,13 @@ struct DrawResourcesFiller void setGlyphMSDFTextureFunction(const GetGlyphMSDFTextureFunc& func); void setHatchFillMSDFTextureFunction(const GetHatchFillPatternMSDFTextureFunc& func); + // Must be called at the end of each frame. + // right before submitting the main draw that uses the currently queued geometry, images, or other objects/resources. + // Registers the semaphore/value that will signal completion of this frame�s draw, + // This allows future frames to safely deallocate or evict resources used in the current frame by waiting on this signal before reuse or destruction. + // `drawSubmitWaitValue` should reference the wait value of the draw submission finishing this frame using the `intendedNextSubmit`; + void markFrameUsageComplete(uint64_t drawSubmitWaitValue); + // TODO[Przemek]: try to draft up a `CTriangleMesh` Class in it's own header (like CPolyline), simplest form is basically two cpu buffers (1 array of uint index buffer, 1 array of float64_t3 vertexBuffer) // TODO[Przemek]: Then have a `drawMesh` function here similar to drawXXX's below, this will fit both vertex and index buffer in the `geometryBuffer`. // take a `SIntendedSubmitInfo` like others, but don't use it as I don't want you to handle anything regarding autoSubmit @@ -74,8 +193,19 @@ struct DrawResourcesFiller //! this function fills buffers required for drawing a polyline and submits a draw through provided callback when there is not enough memory. void drawPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, SIntendedSubmitInfo& intendedNextSubmit); - void drawPolyline(const CPolylineBase& polyline, uint32_t polylineMainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + //! Draws a fixed-geometry polyline using a custom transformation. + //! TODO: Change `polyline` input to an ID referencing a possibly cached instance in our buffers, allowing reuse and avoiding redundant uploads. + void drawFixedGeometryPolyline(const CPolylineBase& polyline, const LineStyleInfo& lineStyleInfo, const float64_t3x3& transformation, TransformationType transformationType, SIntendedSubmitInfo& intendedNextSubmit); + + /// Use this in a begin/endMainObject scope when you want to draw different polylines that should essentially be a single main object (no self-blending between components of a single main object) + /// WARNING: make sure this function is called within begin/endMainObject scope + void drawPolyline(const CPolylineBase& polyline, SIntendedSubmitInfo& intendedNextSubmit); + void drawTriangleMesh( + const CTriangleMesh& mesh, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit); + // ! Convinience function for Hatch with MSDF Pattern and a solid background void drawHatch( const Hatch& hatch, @@ -96,8 +226,36 @@ struct DrawResourcesFiller const Hatch& hatch, const float32_t4& color, SIntendedSubmitInfo& intendedNextSubmit); + + //! Convinience function for fixed-geometry Hatch with MSDF Pattern and a solid background + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& foregroundColor, + const float32_t4& backgroundColor, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); - // ! Draw Font Glyph, will auto submit if there is no space + // ! Fixed-geometry Hatch with MSDF Pattern + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + // ! Solid Fill Fixed-geometry Hatch + void drawFixedGeometryHatch( + const Hatch& hatch, + const float32_t4& color, + const float64_t3x3& transformation, + TransformationType transformationType, + SIntendedSubmitInfo& intendedNextSubmit); + + /// Used by SingleLineText, Issue drawing a font glyph + /// WARNING: make sure this function is called within begin/endMainObject scope void drawFontGlyph( nbl::ext::TextRendering::FontFace* fontFace, uint32_t glyphIdx, @@ -105,113 +263,139 @@ struct DrawResourcesFiller float32_t2 dirU, float32_t aspectRatio, float32_t2 minUV, - uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); - - void _test_addImageObject( - float64_t2 topLeftPos, - float32_t2 size, - float32_t rotation, - SIntendedSubmitInfo& intendedNextSubmit) - { - auto addImageObject_Internal = [&](const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx) -> bool - { - const uint32_t maxGeometryBufferImageObjects = static_cast((maxGeometryBufferSize - currentGeometryBufferSize) / sizeof(ImageObjectInfo)); - uint32_t uploadableObjects = (maxIndexCount / 6u) - currentDrawObjectCount; - uploadableObjects = core::min(uploadableObjects, maxDrawObjects - currentDrawObjectCount); - uploadableObjects = core::min(uploadableObjects, maxGeometryBufferImageObjects); - - if (uploadableObjects >= 1u) - { - void* dstGeom = reinterpret_cast(cpuDrawBuffers.geometryBuffer->getPointer()) + currentGeometryBufferSize; - memcpy(dstGeom, &imageObjectInfo, sizeof(ImageObjectInfo)); - uint64_t geomBufferAddr = geometryBufferAddress + currentGeometryBufferSize; - currentGeometryBufferSize += sizeof(ImageObjectInfo); - - DrawObject drawObj = {}; - drawObj.type_subsectionIdx = uint32_t(static_cast(ObjectType::IMAGE) | (0 << 16)); // TODO: use custom pack/unpack function - drawObj.mainObjIndex = mainObjIdx; - drawObj.geometryAddress = geomBufferAddr; - void* dstDrawObj = reinterpret_cast(cpuDrawBuffers.drawObjectsBuffer->getPointer()) + currentDrawObjectCount; - memcpy(dstDrawObj, &drawObj, sizeof(DrawObject)); - currentDrawObjectCount += 1u; - - return true; - } - else - return false; - }; - - uint32_t mainObjIdx = addMainObject_SubmitIfNeeded(InvalidStyleIdx, intendedNextSubmit); - - ImageObjectInfo info = {}; - info.topLeft = topLeftPos; - info.dirU = float32_t2(size.x * cos(rotation), size.x * sin(rotation)); // - info.aspectRatio = size.y / size.x; - info.textureID = 0u; - if (!addImageObject_Internal(info, mainObjIdx)) - { - // single image object couldn't fit into memory to push to gpu, so we submit rendering current objects and reset geometry buffer and draw objects - submitCurrentDrawObjectsAndReset(intendedNextSubmit, mainObjIdx); - bool success = addImageObject_Internal(info, mainObjIdx); - assert(success); // this should always be true, otherwise it's either bug in code or not enough memory allocated to hold a single image object - } - } - bool finalizeAllCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - inline uint32_t getLineStyleCount() const { return currentLineStylesCount; } + void drawGridDTM(const float64_t2& topLeft, + float64_t2 worldSpaceExtents, + float gridCellWidth, + uint64_t textureID, + const DTMSettingsInfo& dtmSettingsInfo, + SIntendedSubmitInfo& intendedNextSubmit); - inline uint32_t getDrawObjectCount() const { return currentDrawObjectCount; } + /** + * @brief Adds a static 2D image to the draw resource set for rendering. + * + * This function ensures that a given image is available as a GPU-resident texture for future draw submissions. + * It uses an LRU cache to manage descriptor set slots and evicts old images if necessary to make room for new ones. + * + * If the image is already cached and its slot is valid, it returns true; + * Otherwise, it performs the following: + * - Allocates a new descriptor set slot. + * - Promotes the image format to be GPU-compatible. + * - Creates a GPU image and GPU image view. + * - Queues the image for uploading via staging in the next submit. + * - If memory is constrained, attempts to evict other images to free up space. + * + * @param staticImage Unique identifier for the image resource plus the CPU-side image resource to (possibly) upload. + * @param staticImage::forceUpdate If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @note This function ensures that the descriptor slot is not reused while the GPU may still be reading from it. + * If an eviction is required and the evicted image is scheduled to be used in the next submit, it triggers + * a flush of pending draws to preserve correctness. + * + * @note The function uses the `imagesCache` LRU cache to track usage and validity of texture slots. + * If an insertion leads to an eviction, a callback ensures proper deallocation and synchronization. + * @return true if the image was successfully cached and is ready for use; false if allocation failed most likely due to the image being larger than the memory arena allocated for all images. + */ + bool ensureStaticImageAvailability(const StaticImageInfo& staticImage, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Ensures that multiple static 2D images are resident and ready for rendering. + * + * Attempts to make all provided static images GPU-resident by calling `ensureStaticImageAvailability` + * for each. Afterward, it verifies that none of the newly ensured images have been evicted, + * which could happen due to limited VRAM or memory fragmentation. + * + * This function is expected to succeed if: + * - The number of images does not exceed `ImagesBindingArraySize`. + * - Each image individually fits into the image memory arena. + * - There is enough VRAM to hold all images simultaneously. + * + * @param staticImages A span of StaticImageInfo structures describing the images to be ensured. + * @param intendedNextSubmit Struct representing the upcoming submission, including a semaphore for safe scheduling. + * + * @return true If all images were successfully made resident and none were evicted during the process. + * @return false If: + * - The number of images exceeds the descriptor binding array size. + * - Any individual image could not be made resident (e.g., larger than the allocator can support). + * - Some images were evicted due to VRAM pressure or allocator fragmentation, in which case Clearing the image cache and retrying MIGHT be a success (TODO: handle internally) + */ + bool ensureMultipleStaticImagesAvailability(std::span staticImages, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Ensures a GPU-resident georeferenced image exists in the cache, allocating resources if necessary. + * + * If the specified image ID is not already present in the cache, or if the cached version is incompatible + * with the requested parameters (e.g. extent, format, or type), this function allocates GPU memory, + * creates the image and its view, to be bound to a descriptor binding in the future. + * + * If the image already exists and matches the requested parameters, its usage metadata is updated. + * In either case, the cache is updated to reflect usage in the current frame. + * + * This function also handles automatic eviction of old images via an LRU policy when space is limited. + * + * @param imageID Unique identifier of the image to add or reuse. + * @param params Georeferenced Image Params + * @param intendedNextSubmit Submit info object used to track resources pending GPU submission. + * + * @return true if the image was successfully cached and is ready for use; false if allocation failed. + * [TODO]: should be internal protected member function. + */ + bool ensureGeoreferencedImageAvailability_AllocateIfNeeded(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); + + // [TODO]: should be internal protected member function. + bool queueGeoreferencedImageCopy_Internal(image_id imageID, const StreamedImageCopy& imageCopy); + + // This function must be called immediately after `addStaticImage` for the same imageID. + void addImageObject(image_id imageID, const OrientedBoundingBox2D& obb, SIntendedSubmitInfo& intendedNextSubmit); + + // This function must be called immediately after `addStaticImage` for the same imageID. + void addGeoreferencedImage(image_id imageID, const GeoreferencedImageParams& params, SIntendedSubmitInfo& intendedNextSubmit); - inline uint32_t getMainObjectCount() const { return currentMainObjectCount; } + /// @brief call this function before submitting to ensure all buffer and textures resourcesCollection requested via drawing calls are copied to GPU + /// records copy command into intendedNextSubmit's active command buffer and might possibly submits if fails allocation on staging upload memory. + bool pushAllUploads(SIntendedSubmitInfo& intendedNextSubmit); - inline size_t getCurrentMainObjectsBufferSize() const + /// @brief resets staging buffers and images + void reset() { - return sizeof(MainObject) * currentMainObjectCount; + resetDrawObjects(); + resetMainObjects(); + resetCustomProjections(); + resetCustomClipRects(); + resetLineStyles(); + resetDTMSettings(); + + drawObjectsFlushedToDrawCalls = 0ull; + drawCalls.clear(); } - inline size_t getCurrentDrawObjectsBufferSize() const - { - return sizeof(DrawObject) * currentDrawObjectCount; - } + /// @brief collection of all the resources that will eventually be reserved or copied to in the resourcesGPUBuffer, will be accessed via individual BDA pointers in shaders + const ResourcesCollection& getResourcesCollection() const; - inline size_t getCurrentGeometryBufferSize() const - { - return currentGeometryBufferSize; - } + /// @brief buffer containing all non-texture type resources + nbl::core::smart_refctd_ptr getResourcesGPUBuffer() const { return resourcesGPUBuffer; } - inline size_t getCurrentLineStylesBufferSize() const - { - return sizeof(LineStyle) * currentLineStylesCount; - } + /// @return how far resourcesGPUBuffer was copied to by `finalizeAllCopiesToGPU` in `resourcesCollection` + const size_t getCopiedResourcesSize() { return copiedResourcesSize; } - void reset() - { - resetGeometryCounters(); - resetMainObjectCounters(); - resetLineStyleCounters(); - } + // Setting Active Resources: + void setActiveLineStyle(const LineStyleInfo& lineStyle); + + void setActiveDTMSettings(const DTMSettingsInfo& dtmSettingsInfo); - DrawBuffers cpuDrawBuffers; - DrawBuffers gpuDrawBuffers; + void beginMainObject(MainObjectType type, TransformationType transformationType = TransformationType::TT_NORMAL); + void endMainObject(); - uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + void pushCustomProjection(const float64_t3x3& projection); + void popCustomProjection(); - // TODO[Przemek]: Read after reading the fragment shader comments and having a basic understanding of the relationship between "mainObject" and our programmable blending resolve: - // Use `addMainObject_SubmitIfNeeded` to push your single mainObject you'll be using for the enitre triangle mesh (this will ensure overlaps between triangles of the same mesh is resolved correctly) - // Delete comment when you understand this - - // [ADVANCED] Do not use this function unless you know what you're doing (It may cause auto submit) - // Never call this function multiple times in a row before indexing it in a drawable, because future auto-submits may invalidate mainObjects, so do them one by one, for example: - // Valid: addMainObject1 --> addXXX(mainObj1) ---> addMainObject2 ---> addXXX(mainObj2) .... - // Invalid: addMainObject1 ---> addMainObject2 ---> addXXX(mainObj1) ---> addXXX(mainObj2) .... - uint32_t addMainObject_SubmitIfNeeded(uint32_t styleIdx, SIntendedSubmitInfo& intendedNextSubmit); + void pushCustomClipRect(const WorldClipRect& clipRect); + void popCustomClipRect(); - // we need to store the clip projection stack to make sure the front is always available in memory - void pushClipProjectionData(const ClipProjectionData& clipProjectionData); - void popClipProjectionData(); - const std::deque& getClipProjectionStack() const { return clipProjections; } + const std::deque& getCustomProjectionStack() const { return activeProjections; } + const std::deque& getCustomClipRectsStack() const { return activeClipRects; } smart_refctd_ptr getMSDFsTextureArray() { return msdfTextureArray; } @@ -223,100 +407,317 @@ struct DrawResourcesFiller return msdfTextureArray->getCreationParameters().image->getCreationParameters().mipLevels; } -protected: - - struct MSDFTextureCopy + /// For advanced use only, (passed to shaders for them to know if we overflow-submitted in the middle if a main obj + uint32_t getActiveMainObjectIndex() const; + + struct MSDFImageState { core::smart_refctd_ptr image; - uint32_t index; + bool uploadedToGPU : 1u; + + bool isValid() const { return image.get() != nullptr; } + void evict() + { + image = nullptr; + uploadedToGPU = false; + } }; - SubmitFunc submitDraws; - - bool finalizeMainObjectCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + // NOTE: Most probably Going to get removed soon with a single draw call in GPU-driven rendering + struct DrawCallData + { + union + { + struct Dtm + { + uint64_t indexBufferOffset; + uint64_t indexCount; + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + } dtm; + struct DrawObj + { + uint64_t drawObjectStart = 0ull; + uint64_t drawObjectCount = 0ull; + } drawObj; + }; + bool isDTMRendering; + }; - bool finalizeGeometryCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + const std::vector& getDrawCalls() const; + + /// @brief Stores all CPU-side resources that were staged and prepared for a single GPU submission. + /// + /// *** This cache includes anything used or referenced from DrawResourcesFiller in the Draw Submit: + /// - Buffer data (geometry, indices, etc.) + /// - MSDF CPU images + /// - Draw call metadata + /// - Active MainObject Index --> this is another state of the submit that we need to store + /// + /// The data is fully preprocessed and ready to be pushed to the GPU with no further transformation. + /// This enables efficient replays without traversing or re-generating scene content. + struct ReplayCache + { + std::vector drawCallsData; + ResourcesCollection resourcesCollection; + std::vector msdfImagesState; + std::unique_ptr imagesCache; + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; + // TODO: non msdf general CPU Images + // TODO: Get total memory consumption for logging? + }; - bool finalizeLineStyleCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Creates a snapshot of all currently staged CPU-side resourcesCollection for future replay or deferred submission. + /// + /// @warning This cache corresponds to a **single intended GPU submit**. + /// If your frame submission overflows into multiple submits due to staging memory limits or batching, + /// you are responsible for creating **multiple ReplayCache instances**, one per submit. + /// + /// @return A heap-allocated ReplayCache containing a copy of all staged CPU-side resourcesCollection and draw call data. + std::unique_ptr createReplayCache(); + + /// @brief Redirects all subsequent resource upload and getters to use an external ReplayCache. + /// + /// After calling this function, staging, resource getters, and upload mechanisms will pull data from the given ReplayCache + /// instead of the internal accumulation cache. + /// + /// User is responsible for management of cache and making sure it's alive in the ReplayCache scope + void setReplayCache(ReplayCache* cache); - bool finalizeCustomClipProjectionCopiesToGPU(SIntendedSubmitInfo& intendedNextSubmit); - - bool finalizeTextureCopies(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Reverts internal logic to use the default internal staging and resource accumulation cache. + /// Must be called once per corresponding `pushReplayCacheUse()`. + void unsetReplayCache(); + +protected: - // Internal Function to call whenever we overflow while filling our buffers with geometry (potential limiters: indexBuffer, drawObjectsBuffer or geometryBuffer) - // ! mainObjIdx: is the mainObject the "overflowed" drawObjects belong to. - // mainObjIdx is required to ensure that valid data, especially the `clipProjectionData`, remains linked to the main object. - // This is important because, while other data may change during overflow handling, the main object must persist to maintain consistency throughout rendering all parts of it. (for example all lines and beziers of a single polyline) - // [ADVANCED] If you have not created your mainObject yet, pass `InvalidMainObjectIdx` (See drawHatch) - void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t mainObjectIndex); + SubmitFunc submitDraws; - uint32_t addMainObject_Internal(const MainObject& mainObject); + /// @brief Records GPU copy commands for all staged buffer resourcesCollection into the active command buffer. + bool pushBufferUploads(SIntendedSubmitInfo& intendedNextSubmit, ResourcesCollection& resourcesCollection); + + /// @brief Records GPU copy commands for all staged msdf images into the active command buffer. + bool pushMSDFImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, std::vector& msdfImagesState); - uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + /// @brief binds cached images into their correct descriptor set slot if not already resident. + bool bindImagesToArrayIndices(ImagesCache& imagesCache); - // Gets the current clip projection data (the top of stack) gpu addreess inside the geometryBuffer - // If it's been invalidated then it will request to upload again with a possible auto-submit on low geometry buffer memory. - uint64_t acquireCurrentClipProjectionAddress(SIntendedSubmitInfo& intendedNextSubmit); + /// @brief Records GPU copy commands for all staged images into the active command buffer. + bool pushStaticImagesUploads(SIntendedSubmitInfo& intendedNextSubmit, ImagesCache& imagesCache); - uint64_t addClipProjectionData_SubmitIfNeeded(const ClipProjectionData& clipProjectionData, SIntendedSubmitInfo& intendedNextSubmit); + /// @brief copies the queued up streamed copies. + bool pushStreamedImagesUploads(SIntendedSubmitInfo& intendedNextSubmit); - uint64_t addClipProjectionData_Internal(const ClipProjectionData& clipProjectionData); + const size_t calculateRemainingResourcesSize() const; - static constexpr uint32_t getCageCountPerPolylineObject(ObjectType type) - { - if (type == ObjectType::LINE) - return 1u; - else if (type == ObjectType::QUAD_BEZIER) - return 3u; - return 0u; - }; + /// @brief Internal Function to call whenever we overflow when we can't fill all of mainObject's drawObjects + /// @param intendedNextSubmit + /// @param mainObjectIndex: function updates mainObjectIndex after submitting, clearing everything and acquiring mainObjectIndex again. + void submitCurrentDrawObjectsAndReset(SIntendedSubmitInfo& intendedNextSubmit, uint32_t& mainObjectIndex); - void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveLineStyleIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active linestyle data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveDTMSettingsIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); - void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); + // Gets resource index to the active projection data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveCustomProjectionIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active clip data from the top of stack + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveCustomClipRectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + + // Gets resource index to the active main object data + // If it's been invalidated then it will request to add to resources again ( auto-submission happens If there is not enough memory to add again) + uint32_t acquireActiveMainObjectIndex_SubmitIfNeeded(SIntendedSubmitInfo& intendedNextSubmit); + /// Attempts to add lineStyle to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addLineStyle_SubmitIfNeeded(const LineStyleInfo& lineStyle, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add dtmSettings to resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addDTMSettings_SubmitIfNeeded(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add custom projection to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomProjection_SubmitIfNeeded(const float64_t3x3& projection, SIntendedSubmitInfo& intendedNextSubmit); + + /// Attempts to add custom clip to gpu resources. If it fails to do, due to resource limitations, auto-submits and tries again. + uint32_t addCustomClipRect_SubmitIfNeeded(const WorldClipRect& clipRect, SIntendedSubmitInfo& intendedNextSubmit); + + /// returns index to added LineStyleInfo, returns Invalid index if it exceeds resource limitations + uint32_t addLineStyle_Internal(const LineStyleInfo& lineStyleInfo); + + /// returns index to added DTMSettingsInfo, returns Invalid index if it exceeds resource limitations + uint32_t addDTMSettings_Internal(const DTMSettingsInfo& dtmSettings, SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Computes the final transformation matrix for fixed geometry rendering, + * considering any active custom projections and the transformation type. + * + * This function handles how a given transformation should be applied depending on the + * current transformation type and the presence of any active projection matrices. + * + * - If no active projection exists, the input transformation is returned unmodified. + * + * - If an active projection exists: + * - For TT_NORMAL, the input transformation is simply multiplied by the top of the projection stack. + * - For TT_FIXED_SCREENSPACE_SIZE, the input transformation is multiplied by the top of the projection stack, + * but the resulting scale is replaced with the screen-space scale from the original input `transformation`. + * + * @param transformation The input 3x3 transformation matrix to apply. + * @param transformationType The type of transformation to apply (e.g., TT_NORMAL or TT_FIXED_SCREENSPACE_SIZE). + * + */ + float64_t3x3 getFixedGeometryFinalTransformationMatrix(const float64_t3x3& transformation, TransformationType transformationType) const; + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations + void addPolylineObjects_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); + + /// Attempts to upload as many draw objects as possible within the given polyline connectors considering resource limitations + void addPolylineConnectors_Internal(const CPolylineBase& polyline, uint32_t& currentPolylineConnectorObj, uint32_t mainObjIdx); + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addLines_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given polyline section considering resource limitations void addQuadBeziers_Internal(const CPolylineBase& polyline, const CPolylineBase::SectionInfo& section, uint32_t& currentObjectInSection, uint32_t mainObjIdx); - + + /// Attempts to upload as many draw objects as possible within the given hatch considering resource limitations void addHatch_Internal(const Hatch& hatch, uint32_t& currentObjectInSection, uint32_t mainObjIndex); + /// Attempts to upload a single GlyphInfo considering resource limitations bool addFontGlyph_Internal(const GlyphInfo& glyphInfo, uint32_t mainObjIdx); - void resetMainObjectCounters() + /// Attempts to upload a single GridDTMInfo considering resource limitations + bool addGridDTM_Internal(const GridDTMInfo& gridDTMInfo, uint32_t mainObjIdx); + /// Attempts to upload a single image object considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) + bool addImageObject_Internal(const ImageObjectInfo& imageObjectInfo, uint32_t mainObjIdx);; + + /// Attempts to upload a georeferenced image info considering resource limitations (not accounting for the resource image added using ensureStaticImageAvailability function) + bool addGeoreferencedImageInfo_Internal(const GeoreferencedImageInfo& georeferencedImageInfo, uint32_t mainObjIdx);; + + uint32_t getImageIndexFromID(image_id imageID, const SIntendedSubmitInfo& intendedNextSubmit); + + /** + * @brief Evicts a GPU image and deallocates its associated descriptor and memory, flushing draws if needed. + * + * This function is called when an image must be removed from GPU memory (typically due to VRAM pressure). + * If the evicted image is scheduled to be used in the next draw submission, a flush is performed to avoid + * use-after-free issues. Otherwise, it proceeds with deallocation immediately. + * + * It prepares a cleanup object that ensures the memory range used by the image will be returned to the suballocator + * only after the GPU has finished using it, guarded by a semaphore wait. + * + * @param imageID The unique ID of the image being evicted. + * @param evicted A reference to the evicted image, containing metadata such as allocation offset, size, usage frame, etc. + * @param intendedNextSubmit Reference to the intended submit information. Used for synchronizing draw submission and safe deallocation. + * + * @warning Deallocation may use a conservative semaphore wait value if exact usage information is unavailable. [future todo: fix] + */ + void evictImage_SubmitIfNeeded(image_id imageID, const CachedImageRecord& evicted, SIntendedSubmitInfo& intendedNextSubmit); + + struct ImageAllocateResults { - inMemMainObjectCount = 0u; - currentMainObjectCount = 0u; - } + nbl::core::smart_refctd_ptr gpuImageView = nullptr; + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; + bool isValid() const { return (gpuImageView && (allocationOffset != ImagesMemorySubAllocator::InvalidAddress)); } + }; + + /** + * @brief Attempts to create and allocate a GPU image and its view, with fallback eviction on failure. + * + * This function tries to create a GPU image using the specified creation parameters, allocate memory + * from the shared image memory arena, bind it to device-local memory, and create an associated image view. + * If memory allocation fails (e.g. due to VRAM exhaustion), the function will evict textures from the internal + * LRU cache and retry the operation until successful, or until only the currently-inserted image remains. + * + * This is primarily used by the draw resource filler to manage GPU image memory for streamed or cached images. + * + * @param imageParams Creation parameters for the image. Should match `nbl::asset::IImage::SCreationParams`. + * @param imageViewFormatOverride Specifies whether the image view format should differ from the image format. If set to asset::E_FORMAT_ET_COUNT, the image view uses the same format as the image + * @param intendedNextSubmit Reference to the current intended submit info. Used for synchronizing evictions. + * @param imageDebugName Debug name assigned to the image and its view for easier profiling/debugging. + * + * @return ImageAllocateResults A struct containing: + * - `allocationOffset`: Offset into the memory arena (or InvalidAddress on failure). + * - `allocationSize`: Size of the allocated memory region. + * - `gpuImageView`: The created GPU image view (nullptr if creation failed). + */ + ImageAllocateResults tryCreateAndAllocateImage_SubmitIfNeeded(const nbl::asset::IImage::SCreationParams& imageParams, + const asset::E_FORMAT imageViewFormatOverride, + nbl::video::SIntendedSubmitInfo& intendedNextSubmit, + std::string imageDebugName); + + /** + * @brief Determines creation parameters for a georeferenced image based on heuristics. + * + * This function decides whether a georeferenced image should be treated as a fully resident GPU texture + * or as a streamable image based on the relationship between its total resolution and the viewport size. + * It then fills out the appropriate Nabla image creation parameters. + * + * @param[out] outImageParams Structure to be filled with image creation parameters (format, size, etc.). + * @param[out] outImageType Indicates whether the image should be fully resident or streamed. + * @param[in] georeferencedImageParams Parameters describing the full image extents, viewport extents, and format. + */ + void determineGeoreferencedImageCreationParams(nbl::asset::IImage::SCreationParams& outImageParams, ImageType& outImageType, const GeoreferencedImageParams& georeferencedImageParams); + + /** + * @brief Used to implement both `drawHatch` and `drawFixedGeometryHatch` without exposing the transformation type parameter + */ + void drawHatch_impl( + const Hatch& hatch, + const float32_t4& color, + const HatchFillPattern fillPattern, + SIntendedSubmitInfo& intendedNextSubmit, + TransformationType transformationType = TransformationType::TT_NORMAL); - // WARN: If you plan to use this, make sure you either reset the mainObjectCounters as well - // Or if you want to keep your mainObject around, make sure you're using the `submitCurrentObjectsAndReset` function instead of calling this directly - // So that it makes your mainObject point to the correct clipProjectionData (which exists in the geometry buffer) - void resetGeometryCounters() + void resetMainObjects() { - inMemDrawObjectCount = 0u; - currentDrawObjectCount = 0u; + resourcesCollection.mainObjects.vector.clear(); + activeMainObjectIndex = InvalidMainObjectIdx; + } - inMemGeometryBufferSize = 0u; - currentGeometryBufferSize = 0u; + // these resources are data related to chunks of a whole mainObject + void resetDrawObjects() + { + resourcesCollection.drawObjects.vector.clear(); + resourcesCollection.indexBuffer.vector.clear(); + resourcesCollection.geometryInfo.vector.clear(); + } - // Invalidate all the clip projection addresses because geometry buffer got reset - for (auto& clipProjAddr : clipProjectionAddresses) - clipProjAddr = InvalidClipProjectionAddress; + void resetCustomProjections() + { + resourcesCollection.customProjections.vector.clear(); + + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeProjectionIndices) + addr = InvalidCustomProjectionIndex; } - void resetLineStyleCounters() + void resetCustomClipRects() { - currentLineStylesCount = 0u; - inMemLineStylesCount = 0u; + resourcesCollection.customClipRects.vector.clear(); + + // Invalidate all the clip projection addresses because activeProjections buffer got reset + for (auto& addr : activeClipRectIndices) + addr = InvalidCustomClipRectIndex; } - MainObject* getMainObject(uint32_t idx) + void resetLineStyles() { - MainObject* mainObjsArray = reinterpret_cast(cpuDrawBuffers.mainObjectsBuffer->getPointer()); - return &mainObjsArray[idx]; + resourcesCollection.lineStyles.vector.clear(); + activeLineStyleIndex = InvalidStyleIdx; } + void resetDTMSettings() + { + resourcesCollection.dtmSettings.vector.clear(); + activeDTMSettingsIndex = InvalidDTMSettingsIdx; + } + // MSDF Hashing and Caching Internal Functions enum class MSDFType : uint8_t { @@ -377,75 +778,90 @@ struct DrawResourcesFiller }; struct MSDFInputInfoHash { std::size_t operator()(const MSDFInputInfo& info) const { return info.lookupHash; } }; - + struct MSDFReference { uint32_t alloc_idx; - uint64_t lastUsedSemaphoreValue; + uint64_t lastUsedFrameIndex; - MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedSemaphoreValue(semaphoreVal) {} - MSDFReference(uint64_t semaphoreVal) : MSDFReference(InvalidTextureIdx, semaphoreVal) {} - MSDFReference() : MSDFReference(InvalidTextureIdx, ~0ull) {} + MSDFReference(uint32_t alloc_idx, uint64_t semaphoreVal) : alloc_idx(alloc_idx), lastUsedFrameIndex(semaphoreVal) {} + MSDFReference(uint64_t currentFrameIndex) : MSDFReference(InvalidTextureIndex, currentFrameIndex) {} + MSDFReference() : MSDFReference(InvalidTextureIndex, ~0ull) {} // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value to MSDFReference without changing `alloc_idx` - inline MSDFReference& operator=(uint64_t semamphoreVal) { lastUsedSemaphoreValue = semamphoreVal; return *this; } + inline MSDFReference& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } }; - uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, SIntendedSubmitInfo& intendedNextSubmit) - { - uint32_t textureIdx = InvalidTextureIdx; - MSDFReference* tRef = msdfLRUCache->get(msdfInfo); - if (tRef) - { - textureIdx = tRef->alloc_idx; - tRef->lastUsedSemaphoreValue = intendedNextSubmit.getFutureScratchSemaphore().value; // update this because the texture will get used on the next submit - } - return textureIdx; - } + uint32_t getMSDFIndexFromInputInfo(const MSDFInputInfo& msdfInfo, const SIntendedSubmitInfo& intendedNextSubmit); - // ! mainObjIdx: make sure to pass your mainObjIdx to it if you want it to stay synced/updated if some overflow submit occured which would potentially erase what your mainObject points at. - // If you haven't created a mainObject yet, then pass InvalidMainObjectIdx - uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, uint32_t mainObjIdx, SIntendedSubmitInfo& intendedNextSubmit); + uint32_t addMSDFTexture(const MSDFInputInfo& msdfInput, core::smart_refctd_ptr&& cpuImage, SIntendedSubmitInfo& intendedNextSubmit); + // Flushes Current Draw Call and adds to drawCalls + void flushDrawObjects(); + + // Logger + nbl::system::logger_opt_smart_ptr m_logger = nullptr; + + // FrameIndex used as a criteria for resource/image eviction in case of limitations + uint32_t currentFrameIndex = 0u; + + // Replay Cache override + ReplayCache* currentReplayCache = nullptr; + + // DrawCalls Data + uint64_t drawObjectsFlushedToDrawCalls = 0ull; + std::vector drawCalls; // either dtms or objects + + // ResourcesCollection and packed into GPUBuffer + ResourcesCollection resourcesCollection; + IDeviceMemoryAllocator::SAllocation buffersMemoryArena; + nbl::core::smart_refctd_ptr resourcesGPUBuffer; + size_t copiedResourcesSize; + + // GPUImages Memory Arena + AddressAllocator + IDeviceMemoryAllocator::SAllocation imagesMemoryArena; + smart_refctd_ptr imagesMemorySubAllocator; + // Members smart_refctd_ptr m_utilities; IQueue* m_copyQueue; - uint32_t maxIndexCount; - - uint32_t inMemMainObjectCount = 0u; - uint32_t currentMainObjectCount = 0u; - uint32_t maxMainObjects = 0u; - - uint32_t inMemDrawObjectCount = 0u; - uint32_t currentDrawObjectCount = 0u; - uint32_t maxDrawObjects = 0u; + // Active Resources we need to keep track of and push to resources buffer if needed. + LineStyleInfo activeLineStyle; + uint32_t activeLineStyleIndex = InvalidStyleIdx; - uint64_t inMemGeometryBufferSize = 0u; - uint64_t currentGeometryBufferSize = 0u; - uint64_t maxGeometryBufferSize = 0u; + DTMSettingsInfo activeDTMSettings; + uint32_t activeDTMSettingsIndex = InvalidDTMSettingsIdx; - uint32_t inMemLineStylesCount = 0u; - uint32_t currentLineStylesCount = 0u; - uint32_t maxLineStyles = 0u; + MainObjectType activeMainObjectType; + TransformationType activeMainObjectTransformationType; - uint64_t geometryBufferAddress = 0u; // Actual BDA offset 0 of the gpu buffer + uint32_t activeMainObjectIndex = InvalidMainObjectIdx; - std::deque clipProjections; // stack of clip projectios stored so we can resubmit them if geometry buffer got reset. - std::deque clipProjectionAddresses; // stack of clip projection gpu addresses in geometry buffer. to keep track of them in push/pops + // The ClipRects & Projections are stack, because user can push/pop ClipRects & Projections in any order + std::deque activeProjections; // stack of projections stored so we can resubmit them if geometry buffer got reset. + std::deque activeProjectionIndices; // stack of projection gpu addresses in geometry buffer. to keep track of them in push/pops + + std::deque activeClipRects; // stack of clips stored so we can resubmit them if geometry buffer got reset. + std::deque activeClipRectIndices; // stack of clips gpu addresses in geometry buffer. to keep track of them in push/pops - // MSDF GetGlyphMSDFTextureFunc getGlyphMSDF; GetHatchFillPatternMSDFTextureFunc getHatchFillPatternMSDF; - using MSDFsLRUCache = core::LRUCache; + using MSDFsLRUCache = core::ResizableLRUCache; smart_refctd_ptr msdfTextureArray; // view to the resource holding all the msdfs in it's layers smart_refctd_ptr msdfTextureArrayIndexAllocator; - std::set msdfTextureArrayIndicesUsed = {}; // indices in the msdf texture array allocator that have been used in the current frame // TODO: make this a dynamic bitset - std::vector msdfTextureCopies = {}; // queued up texture copies std::unique_ptr msdfLRUCache; // LRU Cache to evict Least Recently Used in case of overflow - static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; + std::vector msdfImagesState = {}; // cached cpu imaged + their status, size equals to LRUCache size + static constexpr asset::E_FORMAT MSDFTextureFormat = asset::E_FORMAT::EF_R8G8B8A8_SNORM; bool m_hasInitializedMSDFTextureArrays = false; + + // Images: + std::unique_ptr imagesCache; + smart_refctd_ptr suballocatedDescriptorSet; + uint32_t imagesArrayBinding = 0u; + + std::unordered_map> streamedImageCopies; }; diff --git a/62_CAD/GeoTexture.cpp b/62_CAD/GeoTexture.cpp index 71cbcef34..de8a974d0 100644 --- a/62_CAD/GeoTexture.cpp +++ b/62_CAD/GeoTexture.cpp @@ -1,8 +1,8 @@ #include "GeoTexture.h" bool GeoTextureRenderer::initialize( - IGPUShader* vertexShader, - IGPUShader* fragmentShader, + IShader* vertexShader, + IShader* fragmentShader, IGPURenderpass* compatibleRenderPass, const smart_refctd_ptr& globalsBuffer) { @@ -87,14 +87,15 @@ bool GeoTextureRenderer::initialize( // Create Main Graphics Pipelines { - IGPUShader::SSpecInfo specInfo[2] = { + video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { {.shader=vertexShader }, {.shader=fragmentShader }, }; IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = m_pipelineLayout.get(); - params[0].shaders = specInfo; + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; params[0].cached = { .vertexInput = {}, .primitiveAssembly = { diff --git a/62_CAD/GeoTexture.h b/62_CAD/GeoTexture.h index c43208e32..f471009fc 100644 --- a/62_CAD/GeoTexture.h +++ b/62_CAD/GeoTexture.h @@ -29,8 +29,8 @@ class GeoTextureRenderer {} bool initialize( - IGPUShader* vertexShader, - IGPUShader* fragmentShader, + IShader* vertexShader, + IShader* fragmentShader, IGPURenderpass* compatibleRenderPass, const smart_refctd_ptr& globalsBuffer); diff --git a/62_CAD/Images.h b/62_CAD/Images.h new file mode 100644 index 000000000..a341eadd6 --- /dev/null +++ b/62_CAD/Images.h @@ -0,0 +1,219 @@ +#pragma once +using namespace nbl; +using namespace nbl::video; +using namespace nbl::core; +using namespace nbl::asset; + +using image_id = uint64_t; // Could later be templated or replaced with a stronger type or hash key. + +enum class ImageState : uint8_t +{ + INVALID = 0, + CREATED_AND_MEMORY_BOUND, // GPU image created, not bound to descriptor set yet + BOUND_TO_DESCRIPTOR_SET, // Bound to descriptor set, GPU resident, but may contain uninitialized or partial data + GPU_RESIDENT_WITH_VALID_STATIC_DATA, // When data for static images gets issued for upload successfully +}; + +enum class ImageType : uint8_t +{ + INVALID = 0, + STATIC, // Regular non-georeferenced image, fully loaded once + GEOREFERENCED_STREAMED, // Streamed image, resolution depends on camera/view + GEOREFERENCED_FULL_RESOLUTION // For smaller georeferenced images, entire image is eventually loaded and not streamed or view-dependant +}; + +struct GeoreferencedImageParams +{ + OrientedBoundingBox2D worldspaceOBB = {}; + uint32_t2 imageExtents = {}; + uint32_t2 viewportExtents = {}; + asset::E_FORMAT format = {}; + // TODO: Need to add other stuff later. +}; + +/** + * @class ImagesMemorySubAllocator + * @brief A memory sub-allocator designed for managing sub-allocations within a pre-allocated GPU memory arena for images. + * + * This class wraps around `nbl::core::GeneralpurposeAddressAllocator` to provide offset-based memory allocation + * for image resources within a contiguous block of GPU memory. + * + * @note This class only manages address offsets. The actual memory must be bound separately. + */ +class ImagesMemorySubAllocator : public core::IReferenceCounted +{ +public: + using AddressAllocator = nbl::core::GeneralpurposeAddressAllocator; + using ReservedAllocator = nbl::core::allocator; + static constexpr uint64_t InvalidAddress = AddressAllocator::invalid_address; + static constexpr uint64_t MaxMemoryAlignment = 4096u; // safe choice based on hardware reports + static constexpr uint64_t MinAllocSize = 128 * 1024u; // 128KB, the larger this is the better + + ImagesMemorySubAllocator(uint64_t memoryArenaSize) + { + m_reservedAllocSize = AddressAllocator::reserved_size(MaxMemoryAlignment, memoryArenaSize, MinAllocSize); + m_reservedAllocator = std::unique_ptr(new ReservedAllocator()); + m_reservedAlloc = m_reservedAllocator->allocate(m_reservedAllocSize, _NBL_SIMD_ALIGNMENT); + m_addressAllocator = std::unique_ptr(new AddressAllocator( + m_reservedAlloc, 0u, 0u, MaxMemoryAlignment, memoryArenaSize, MinAllocSize + )); + } + + // return offset, will return InvalidAddress if failed + uint64_t allocate(uint64_t size, uint64_t alignment) + { + return m_addressAllocator->alloc_addr(size, alignment); + } + + void deallocate(uint64_t addr, uint64_t size) + { + m_addressAllocator->free_addr(addr, size); + } + + ~ImagesMemorySubAllocator() + { + if (m_reservedAlloc) + m_reservedAllocator->deallocate(reinterpret_cast(m_reservedAlloc), m_reservedAllocSize); + } + +private: + std::unique_ptr m_addressAllocator = nullptr; + + // Memory Allocation Required for the AddressAllocator + std::unique_ptr m_reservedAllocator = nullptr; + void* m_reservedAlloc = nullptr; + size_t m_reservedAllocSize = 0; + +}; + +// This will be dropped when the descriptor gets dropped from SuballocatedDescriptorSet. +// Destructor will then deallocate from GeneralPurposeAllocator, making the previously allocated range of the image available/free again. +struct ImageCleanup : public core::IReferenceCounted +{ + ImageCleanup() + : imagesMemorySuballocator(nullptr) + , addr(ImagesMemorySubAllocator::InvalidAddress) + , size(0ull) + {} + + ~ImageCleanup() override + { + // printf(std::format("Actual Eviction size={}, offset={} \n", size, addr).c_str()); + if (imagesMemorySuballocator && addr != ImagesMemorySubAllocator::InvalidAddress) + imagesMemorySuballocator->deallocate(addr, size); + } + + smart_refctd_ptr imagesMemorySuballocator; + uint64_t addr; + uint64_t size; + +}; + +struct CachedImageRecord +{ + static constexpr uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; + + uint32_t arrayIndex = InvalidTextureIndex; // index in our array of textures binding + ImageType type = ImageType::INVALID; + ImageState state = ImageState::INVALID; + uint64_t lastUsedFrameIndex = 0ull; // last used semaphore value on this image + uint64_t allocationOffset = ImagesMemorySubAllocator::InvalidAddress; + uint64_t allocationSize = 0ull; + core::smart_refctd_ptr gpuImageView = nullptr; + core::smart_refctd_ptr staticCPUImage = nullptr; // cached cpu image for uploading to gpuImageView when needed. + + // In LRU Cache `insert` function, in case of cache miss, we need to construct the refereence with semaphore value + CachedImageRecord(uint64_t currentFrameIndex) + : arrayIndex(InvalidTextureIndex) + , type(ImageType::INVALID) + , state(ImageState::INVALID) + , lastUsedFrameIndex(currentFrameIndex) + , allocationOffset(ImagesMemorySubAllocator::InvalidAddress) + , allocationSize(0ull) + , gpuImageView(nullptr) + , staticCPUImage(nullptr) + {} + + CachedImageRecord() + : CachedImageRecord(0ull) + {} + + // In LRU Cache `insert` function, in case of cache hit, we need to assign semaphore value without changing `index` + inline CachedImageRecord& operator=(uint64_t currentFrameIndex) { lastUsedFrameIndex = currentFrameIndex; return *this; } +}; + +// A resource-aware image cache with an LRU eviction policy. +// This cache tracks image usage by ID and provides hooks for eviction logic (such as releasing descriptor slots and deallocating GPU memory done by user of this class) +// Currently, eviction is purely LRU-based. In the future, eviction decisions may incorporate additional factors: +// - memory usage per image. +// - lastUsedFrameIndex. +// This class helps coordinate images' lifetimes in sync with GPU usage via eviction callbacks. +class ImagesCache : public core::ResizableLRUCache +{ +public: + using base_t = core::ResizableLRUCache; + + ImagesCache(size_t capacity) + : base_t(capacity) + {} + + // Attempts to insert a new image into the cache. + // If the cache is full, invokes the provided `evictCallback` to evict an image. + // Returns a pointer to the inserted or existing ImageReference. + template EvictionCallback> + inline CachedImageRecord* insert(image_id imageID, uint64_t lastUsedSema, EvictionCallback&& evictCallback) + { + return base_t::insert(imageID, lastUsedSema, evictCallback); + } + + // Retrieves the image associated with `imageID`, updating its LRU position. + inline CachedImageRecord* get(image_id imageID) + { + return base_t::get(imageID); + } + + // Retrieves the ImageReference without updating LRU order. + inline CachedImageRecord* peek(image_id imageID) + { + return base_t::peek(imageID); + } + + inline size_t size() const { return base_t::size(); } + + // Selects an eviction candidate based on LRU policy. + // In the future, this could factor in memory pressure or semaphore sync requirements. + inline image_id select_eviction_candidate() + { + const image_id* lru = base_t::get_least_recently_used(); + if (lru) + return *lru; + else + { + // we shouldn't select eviction candidate if lruCache is empty + _NBL_DEBUG_BREAK_IF(true); + return ~0ull; + } + } + + // Removes a specific image from the cache (manual eviction). + inline void erase(image_id imageID) + { + base_t::erase(imageID); + } +}; + +struct StreamedImageCopy +{ + asset::E_FORMAT srcFormat; + core::smart_refctd_ptr srcBuffer; // Make it 'std::future' later? + asset::IImage::SBufferCopy region; +}; + +// TODO: Rename to StaticImageAvailabilityRequest? +struct StaticImageInfo +{ + image_id imageID = ~0ull; + core::smart_refctd_ptr cpuImage = nullptr; + bool forceUpdate = false; // If true, bypasses the existing GPU-side cache and forces an update of the image data; Useful when replacing the contents of a static image that may already be resident. + asset::E_FORMAT imageViewFormatOverride = asset::E_FORMAT::EF_COUNT; // if asset::E_FORMAT::EF_COUNT then image view will have the same format as `cpuImage` +}; diff --git a/62_CAD/Polyline.h b/62_CAD/Polyline.h index 03b2f2c30..31ba9eb15 100644 --- a/62_CAD/Polyline.h +++ b/62_CAD/Polyline.h @@ -66,8 +66,6 @@ struct LineStyleInfo rigidSegmentIdx = InvalidRigidSegmentIndex; phaseShift = 0.0f; - assert(stipplePatternUnnormalizedRepresentation.size() <= StipplePatternMaxSize); - if (stipplePatternUnnormalizedRepresentation.size() == 0) { stipplePatternSize = 0; @@ -110,6 +108,8 @@ struct LineStyleInfo stipplePatternTransformed[0] += stipplePatternTransformed[stipplePatternTransformed.size() - 1]; stipplePatternTransformed.pop_back(); } + + assert(stipplePatternTransformed.size() <= StipplePatternMaxSize); if (stipplePatternTransformed.size() != 1) { @@ -1116,9 +1116,6 @@ class CPolyline : public CPolylineBase if (crossProductZ < 0.0f) res.v = -res.v; - // Negating y to avoid doing it in vertex shader when working in screen space, where y is in the opposite direction of worldspace y direction - res.v.y = -res.v.y; - m_polylineConnector.push_back(res); } } diff --git a/62_CAD/SingleLineText.cpp b/62_CAD/SingleLineText.cpp index 4b41cb628..76eb797e7 100644 --- a/62_CAD/SingleLineText.cpp +++ b/62_CAD/SingleLineText.cpp @@ -1,6 +1,6 @@ #include "SingleLineText.h" -SingleLineText::SingleLineText(nbl::ext::TextRendering::FontFace* face, const std::string& text) +SingleLineText::SingleLineText(nbl::ext::TextRendering::FontFace* face, const std::wstring& text) { m_glyphBoxes.reserve(text.length()); @@ -11,7 +11,7 @@ SingleLineText::SingleLineText(nbl::ext::TextRendering::FontFace* face, const st float64_t2 currentPos = float32_t2(0.0, 0.0); for (uint32_t i = 0; i < text.length(); i++) { - const auto glyphIndex = face->getGlyphIndex(wchar_t(text.at(i))); + const auto glyphIndex = face->getGlyphIndex(text.at(i)); const auto glyphMetrics = face->getGlyphMetrics(glyphIndex); const bool skipGenerateGlyph = (glyphIndex == 0 || (glyphMetrics.size.x == 0.0 && glyphMetrics.size.y == 0.0)); @@ -63,8 +63,8 @@ void SingleLineText::Draw( lineStyle.color = color; lineStyle.screenSpaceLineWidth = tan(tiltTiltAngle); lineStyle.worldSpaceLineWidth = boldInPixels; - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - auto glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); + drawResourcesFiller.setActiveLineStyle(lineStyle); + drawResourcesFiller.beginMainObject(MainObjectType::TEXT); for (const auto& glyphBox : m_glyphBoxes) { @@ -75,7 +75,8 @@ void SingleLineText::Draw( // float32_t3 xx = float64_t3(0.0, -glyphBox.size.y, 0.0); const float32_t aspectRatio = static_cast(glm::length(dirV) / glm::length(dirU)); // check if you can just do: (glyphBox.size.y * scale.y) / glyphBox.size.x * scale.x) const float32_t2 minUV = face->getUV(float32_t2(0.0f,0.0f), glyphBox.size, drawResourcesFiller.getMSDFResolution(), MSDFPixelRange); - drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, glyphObjectIdx, intendedNextSubmit); + drawResourcesFiller.drawFontGlyph(face, glyphBox.glyphIdx, topLeft, dirU, aspectRatio, minUV, intendedNextSubmit); } + drawResourcesFiller.endMainObject(); } \ No newline at end of file diff --git a/62_CAD/SingleLineText.h b/62_CAD/SingleLineText.h index aef22892a..624f3399f 100644 --- a/62_CAD/SingleLineText.h +++ b/62_CAD/SingleLineText.h @@ -12,7 +12,7 @@ class SingleLineText { public: // constructs and fills the `glyphBoxes` - SingleLineText(nbl::ext::TextRendering::FontFace* face, const std::string& text); + SingleLineText(nbl::ext::TextRendering::FontFace* face, const std::wstring& text); struct BoundingBox { diff --git a/62_CAD/main.cpp b/62_CAD/main.cpp index 637c88eda..f4a886791 100644 --- a/62_CAD/main.cpp +++ b/62_CAD/main.cpp @@ -1,4 +1,8 @@ - +// TODO: Copyright notice + + +#include "nbl/examples/examples.hpp" + using namespace nbl::hlsl; using namespace nbl; using namespace core; @@ -7,10 +11,9 @@ using namespace asset; using namespace ui; using namespace video; - -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "SimpleWindowedApplication.hpp" -#include "InputSystem.hpp" +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/InputSystem.hpp" #include "nbl/video/utilities/CSimpleResizeSurface.h" #include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" @@ -45,6 +48,7 @@ static constexpr bool DebugModeWireframe = false; static constexpr bool DebugRotatingViewProj = false; static constexpr bool FragmentShaderPixelInterlock = true; static constexpr bool LargeGeoTextureStreaming = true; +static constexpr bool CacheAndReplay = false; // caches first frame resources (buffers and images) from DrawResourcesFiller and replays in future frames, skiping CPU Logic enum class ExampleMode { @@ -57,6 +61,9 @@ enum class ExampleMode CASE_6, // Custom Clip Projections CASE_7, // Images CASE_8, // MSDF and Text + CASE_9, // DTM + CASE_10, // testing fixed geometry and emulated fp64 corner cases + CASE_11, // grid DTM CASE_COUNT }; @@ -71,9 +78,12 @@ constexpr std::array cameraExtents = 10.0, // CASE_6 10.0, // CASE_7 600.0, // CASE_8 + 600.0, // CASE_9 + 10.0, // CASE_10 + 1000.0 // CASE_11 }; -constexpr ExampleMode mode = ExampleMode::CASE_4; +constexpr ExampleMode mode = ExampleMode::CASE_8; class Camera2D { @@ -163,14 +173,14 @@ class Camera2D class CEventCallback : public ISimpleManagedSurface::ICallback { public: - CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)){} + CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)){} CEventCallback() {} void setLogger(nbl::system::logger_opt_smart_ptr& logger) { m_logger = logger; } - void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) + void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) { m_inputSystem = std::move(m_inputSystem); } @@ -198,7 +208,7 @@ class CEventCallback : public ISimpleManagedSurface::ICallback } private: - nbl::core::smart_refctd_ptr m_inputSystem = nullptr; + nbl::core::smart_refctd_ptr m_inputSystem = nullptr; nbl::system::logger_opt_smart_ptr m_logger = nullptr; }; @@ -236,7 +246,7 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::fill(m_framebuffers.begin(),m_framebuffers.end(),nullptr); } - // For creating extra per-image or swapchain resources you might need + // For creating extra per-image or swapchain resourcesCollection you might need virtual inline bool onCreateSwapchain_impl(const uint8_t qFam) { auto device = const_cast(m_renderpass->getOriginDevice()); @@ -270,10 +280,91 @@ class CSwapchainResources : public ISimpleManagedSurface::ISwapchainResources std::array,ISwapchain::MaxImages> m_framebuffers; }; -class ComputerAidedDesign final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication + +// TODO: Move this funcitons that help with creating a new promoted CPUImage +template +struct PromotionComponentSwizzle +{ + template + void operator()(const InT* in, OutT* out) const + { + using in_t = std::conditional_t, uint64_t, InT>; + using out_t = std::conditional_t, uint64_t, OutT>; + + reinterpret_cast(out)[0u] = reinterpret_cast(in)[0u]; + + if constexpr (SRC_CHANNELS > 1) + reinterpret_cast(out)[1u] = reinterpret_cast(in)[1u]; + else + reinterpret_cast(out)[1u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 2) + reinterpret_cast(out)[2u] = reinterpret_cast(in)[2u]; + else + reinterpret_cast(out)[2u] = static_cast(0); + + if constexpr (SRC_CHANNELS > 3) + reinterpret_cast(out)[3u] = reinterpret_cast(in)[3u]; + else + reinterpret_cast(out)[3u] = static_cast(1); + } +}; +template +bool performCopyUsingImageFilter( + const core::smart_refctd_ptr& inCPUImage, + const core::smart_refctd_ptr& outCPUImage) { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + Filter filter; + + const uint32_t mipLevels = inCPUImage->getCreationParameters().mipLevels; + + for (uint32_t level = 0u; level < mipLevels; ++level) + { + const auto regions = inCPUImage->getRegions(level); + + for (auto& region : regions) + { + typename Filter::state_type state = {}; + state.extent = region.imageExtent; + state.layerCount = region.imageSubresource.layerCount; + state.inImage = inCPUImage.get(); + state.outImage = outCPUImage.get(); + state.inOffsetBaseLayer = core::vectorSIMDu32(region.imageOffset.x, region.imageOffset.y, region.imageOffset.z, region.imageSubresource.baseArrayLayer); + state.outOffsetBaseLayer = core::vectorSIMDu32(0u); + state.inMipLevel = region.imageSubresource.mipLevel; + state.outMipLevel = region.imageSubresource.mipLevel; + + if (!filter.execute(core::execution::par_unseq, &state)) + return false; + } + } + return true; +} + +bool performImageFormatPromotionCopy(const core::smart_refctd_ptr& inCPUImage, const core::smart_refctd_ptr& outCPUImage) +{ + asset::E_FORMAT srcImageFormat = inCPUImage->getCreationParameters().format; + asset::E_FORMAT dstImageFormat = outCPUImage->getCreationParameters().format; + + // In = srcData, Out = stagingBuffer + if (srcImageFormat == dstImageFormat) + return false; + + auto srcChannelCount = asset::getFormatChannelCount(srcImageFormat); + if (srcChannelCount == 1u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 2u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else if (srcChannelCount == 3u) + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); + else + return performCopyUsingImageFilter>>(inCPUImage, outCPUImage); +} + +class ComputerAidedDesign final : public nbl::examples::SimpleWindowedApplication, public nbl::examples::BuiltinResourcesApplication +{ + using device_base_t = nbl::examples::SimpleWindowedApplication; + using asset_base_t = nbl::examples::BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static uint32_t WindowWidthRequest = 1600u; @@ -282,22 +373,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu constexpr static uint32_t MaxSubmitsInFlight = 16u; public: - void allocateResources(uint32_t maxObjects) + void allocateResources() { - drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue()); - - // TODO: move individual allocations to DrawResourcesFiller::allocateResources(memory) - // Issue warning error, if we can't store our largest geomm struct + clip proj data inside geometry buffer along linestyle and mainObject - uint32_t maxIndices = maxObjects * 6u * 2u; - drawResourcesFiller.allocateIndexBuffer(m_device.get(), maxIndices); - drawResourcesFiller.allocateMainObjectsBuffer(m_device.get(), maxObjects); - drawResourcesFiller.allocateDrawObjectsBuffer(m_device.get(), maxObjects * 5u); - drawResourcesFiller.allocateStylesBuffer(m_device.get(), 512u); - - // * 3 because I just assume there is on average 3x beziers per actual object (cause we approximate other curves/arcs with beziers now) - // + 128 ClipProjData - size_t geometryBufferSize = maxObjects * sizeof(QuadraticBezierInfo) * 3 + 128 * sizeof(ClipProjectionData); - drawResourcesFiller.allocateGeometryBuffer(m_device.get(), geometryBufferSize); + drawResourcesFiller = DrawResourcesFiller(core::smart_refctd_ptr(m_utils), getGraphicsQueue(), core::smart_refctd_ptr(m_logger)); + + size_t maxImagesMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + size_t maxBufferMemSize = 1024ull * 1024ull * 1024ull; // 1024 MB + drawResourcesFiller.allocateDrawResourcesWithinAvailableVRAM(m_device.get(), maxImagesMemSize, maxBufferMemSize); drawResourcesFiller.allocateMSDFTextures(m_device.get(), 256u, uint32_t2(MSDFSize, MSDFSize)); { @@ -311,14 +393,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto globalsBufferMem = m_device->allocate(memReq, m_globalsBuffer.get()); } - size_t sumBufferSizes = - drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.geometryBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getSize() + - drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getSize(); - m_logger->log("Buffers Size = %.2fKB", ILogger::E_LOG_LEVEL::ELL_INFO, sumBufferSizes / 1024.0f); - // pseudoStencil { asset::E_FORMAT pseudoStencilFormat = asset::EF_R32_UINT; @@ -399,22 +473,44 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - IGPUSampler::SParams samplerParams = {}; - samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; - samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape - samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; - samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; - samplerParams.AnisotropicFilter = 3; - samplerParams.CompareEnable = false; - samplerParams.CompareFunc = ECO_GREATER; - samplerParams.LodBias = 0.f; - samplerParams.MinLod = -1000.f; - samplerParams.MaxLod = 1000.f; - msdfTextureSampler = m_device->createSampler(samplerParams); - + // MSDF Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_CLAMP_TO_BORDER; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_OPAQUE_WHITE; // positive means outside shape + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + msdfImageSampler = m_device->createSampler(samplerParams); + } + + // Static Image Sampler + { + IGPUSampler::SParams samplerParams = {}; + samplerParams.TextureWrapU = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapV = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.TextureWrapW = IGPUSampler::E_TEXTURE_CLAMP::ETC_MIRROR; + samplerParams.BorderColor = IGPUSampler::ETBC_FLOAT_TRANSPARENT_BLACK; + samplerParams.MinFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MaxFilter = IGPUSampler::ETF_LINEAR; + samplerParams.MipmapMode = IGPUSampler::ESMM_LINEAR; + samplerParams.AnisotropicFilter = 3; + samplerParams.CompareEnable = false; + samplerParams.CompareFunc = ECO_GREATER; + samplerParams.LodBias = 0.f; + samplerParams.MinLod = -1000.f; + samplerParams.MaxLod = 1000.f; + staticImageSampler = m_device->createSampler(samplerParams); + } + // Initial Pipeline Transitions and Clearing of PseudoStencil and ColorStorage // Recorded to Temporary CommandBuffer, Submitted to Graphics Queue, and Blocked on here { @@ -640,11 +736,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu double dt = 0; double m_timeElapsed = 0.0; std::chrono::steady_clock::time_point lastTime; - uint32_t m_hatchDebugStep = 0u; + uint32_t m_hatchDebugStep = 10u; + E_HEIGHT_SHADING_MODE m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; inline bool onAppInitialized(smart_refctd_ptr&& system) override { - m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); // Remember to call the base class initialization! if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) @@ -670,7 +767,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (!m_surface->init(getGraphicsQueue(),std::move(scResources),{})) return logFail("Could not initialize the Surface!"); - allocateResources(1024 * 1024u); + allocateResources(); const bitflag bindlessTextureFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT | @@ -678,6 +775,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_PARTIALLY_BOUND_BIT; // Create DescriptorSetLayout, PipelineLayout and update DescriptorSets + const uint32_t imagesBinding = 3u; { video::IGPUDescriptorSetLayout::SBinding bindingsSet0[] = { { @@ -689,45 +787,24 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }, { .binding = 1u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 2u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 3u, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_BUFFER, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_VERTEX | asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - }, - { - .binding = 4u, .type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 5u, + .binding = 2u, .type = asset::IDescriptor::E_TYPE::ET_SAMPLER, .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, .count = 1u, }, { - .binding = 6u, + .binding = imagesBinding, .type = asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE, .createFlags = bindlessTextureFlags, .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 128u, + .count = ImagesBindingArraySize, }, }; descriptorSetLayout0 = m_device->createDescriptorSetLayout(bindingsSet0); @@ -767,7 +844,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { descriptorSet0 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout0)); descriptorSet1 = descriptorPool->createDescriptorSet(smart_refctd_ptr(descriptorSetLayout1)); - constexpr uint32_t DescriptorCountSet0 = 6u; + constexpr uint32_t DescriptorCountSet0 = 3u; video::IGPUDescriptorSet::SDescriptorInfo descriptorInfosSet0[DescriptorCountSet0] = {}; // Descriptors For Set 0: @@ -775,27 +852,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu descriptorInfosSet0[0u].info.buffer.size = m_globalsBuffer->getCreationParams().size; descriptorInfosSet0[0u].desc = m_globalsBuffer; - descriptorInfosSet0[1u].info.buffer.offset = 0u; - descriptorInfosSet0[1u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[1u].desc = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer; - - descriptorInfosSet0[2u].info.buffer.offset = 0u; - descriptorInfosSet0[2u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer->getCreationParams().size; - descriptorInfosSet0[2u].desc = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer; - - descriptorInfosSet0[3u].info.buffer.offset = 0u; - descriptorInfosSet0[3u].info.buffer.size = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer->getCreationParams().size; - descriptorInfosSet0[3u].desc = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer; + descriptorInfosSet0[1u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfosSet0[1u].info.combinedImageSampler.sampler = msdfImageSampler; + descriptorInfosSet0[1u].desc = drawResourcesFiller.getMSDFsTextureArray(); - descriptorInfosSet0[4u].info.combinedImageSampler.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - descriptorInfosSet0[4u].info.combinedImageSampler.sampler = msdfTextureSampler; - descriptorInfosSet0[4u].desc = drawResourcesFiller.getMSDFsTextureArray(); - - descriptorInfosSet0[5u].desc = msdfTextureSampler; // TODO[Erfan]: different sampler and make immutable? + descriptorInfosSet0[2u].desc = staticImageSampler; // TODO[Erfan]: different sampler and make immutable? // This is bindless to we write to it later. - // descriptorInfosSet0[6u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - // descriptorInfosSet0[6u].desc = drawResourcesFiller.getMSDFsTextureArray(); + // descriptorInfosSet0[3u].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + // descriptorInfosSet0[3u].desc = drawResourcesFiller.getMSDFsTextureArray(); // Descriptors For Set 1: constexpr uint32_t DescriptorCountSet1 = 2u; @@ -812,65 +877,57 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu video::IGPUDescriptorSet::SWriteDescriptorSet descriptorUpdates[DescriptorUpdatesCount] = {}; // Set 0 Updates: + // globals descriptorUpdates[0u].dstSet = descriptorSet0.get(); descriptorUpdates[0u].binding = 0u; descriptorUpdates[0u].arrayElement = 0u; descriptorUpdates[0u].count = 1u; descriptorUpdates[0u].info = &descriptorInfosSet0[0u]; + // mdfs textures descriptorUpdates[1u].dstSet = descriptorSet0.get(); descriptorUpdates[1u].binding = 1u; descriptorUpdates[1u].arrayElement = 0u; descriptorUpdates[1u].count = 1u; descriptorUpdates[1u].info = &descriptorInfosSet0[1u]; - + + // general texture sampler descriptorUpdates[2u].dstSet = descriptorSet0.get(); descriptorUpdates[2u].binding = 2u; descriptorUpdates[2u].arrayElement = 0u; descriptorUpdates[2u].count = 1u; descriptorUpdates[2u].info = &descriptorInfosSet0[2u]; - descriptorUpdates[3u].dstSet = descriptorSet0.get(); - descriptorUpdates[3u].binding = 3u; + // Set 1 Updates: + descriptorUpdates[3u].dstSet = descriptorSet1.get(); + descriptorUpdates[3u].binding = 0u; descriptorUpdates[3u].arrayElement = 0u; descriptorUpdates[3u].count = 1u; - descriptorUpdates[3u].info = &descriptorInfosSet0[3u]; - - descriptorUpdates[4u].dstSet = descriptorSet0.get(); - descriptorUpdates[4u].binding = 4u; + descriptorUpdates[3u].info = &descriptorInfosSet1[0u]; + + descriptorUpdates[4u].dstSet = descriptorSet1.get(); + descriptorUpdates[4u].binding = 1u; descriptorUpdates[4u].arrayElement = 0u; descriptorUpdates[4u].count = 1u; - descriptorUpdates[4u].info = &descriptorInfosSet0[4u]; - - descriptorUpdates[5u].dstSet = descriptorSet0.get(); - descriptorUpdates[5u].binding = 5u; - descriptorUpdates[5u].arrayElement = 0u; - descriptorUpdates[5u].count = 1u; - descriptorUpdates[5u].info = &descriptorInfosSet0[5u]; - - // Set 1 Updates: - descriptorUpdates[6u].dstSet = descriptorSet1.get(); - descriptorUpdates[6u].binding = 0u; - descriptorUpdates[6u].arrayElement = 0u; - descriptorUpdates[6u].count = 1u; - descriptorUpdates[6u].info = &descriptorInfosSet1[0u]; - - descriptorUpdates[7u].dstSet = descriptorSet1.get(); - descriptorUpdates[7u].binding = 1u; - descriptorUpdates[7u].arrayElement = 0u; - descriptorUpdates[7u].count = 1u; - descriptorUpdates[7u].info = &descriptorInfosSet1[1u]; - + descriptorUpdates[4u].info = &descriptorInfosSet1[1u]; m_device->updateDescriptorSets(DescriptorUpdatesCount, descriptorUpdates, 0u, nullptr); } - pipelineLayout = m_device->createPipelineLayout({}, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); + const asset::SPushConstantRange range = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(PushConstants) + }; + + pipelineLayout = m_device->createPipelineLayout({ &range,1 }, core::smart_refctd_ptr(descriptorSetLayout0), core::smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); } - smart_refctd_ptr mainPipelineFragmentShaders = {}; - smart_refctd_ptr mainPipelineVertexShader = {}; - std::array, 2u> geoTexturePipelineShaders = {}; + drawResourcesFiller.setTexturesDescriptorSetAndBinding(core::smart_refctd_ptr(descriptorSet0), imagesBinding); + + smart_refctd_ptr mainPipelineFragmentShaders = {}; + smart_refctd_ptr mainPipelineVertexShader = {}; + std::array, 2u> geoTexturePipelineShaders = {}; { smart_refctd_ptr shaderReadCache = nullptr; smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); @@ -904,7 +961,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } // Load Custom Shader - auto loadCompileShader = [&](const std::string& relPath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + auto loadCompileShader = [&](const std::string& relPath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = m_logger.get(); @@ -915,24 +972,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu return nullptr; // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader - auto cpuShader = IAsset::castDown(assets[0]); - if (!cpuShader) + auto source = IAsset::castDown(assets[0]); + if (!source) return nullptr; - - cpuShader->setShaderStage(stage); - return m_device->compileShader({ cpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + + return m_device->compileShader( ILogicalDevice::SShaderCreationParameters { .source = source.get(), .readCache = shaderReadCache.get(), .writeCache = shaderWriteCache.get(), .stage = IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY }); }; - auto mainPipelineFragmentCpuShader = loadCompileShader("../shaders/main_pipeline/fragment.hlsl", IShader::E_SHADER_STAGE::ESS_ALL_OR_LIBRARY); - auto mainPipelineVertexCpuShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineVertCpuShader = loadCompileShader(GeoTextureRenderer::VertexShaderRelativePath, IShader::E_SHADER_STAGE::ESS_VERTEX); - auto geoTexturePipelineFragCpuShader = loadCompileShader(GeoTextureRenderer::FragmentShaderRelativePath, IShader::E_SHADER_STAGE::ESS_FRAGMENT); - mainPipelineFragmentCpuShader->setShaderStage(IShader::E_SHADER_STAGE::ESS_FRAGMENT); - - mainPipelineFragmentShaders = m_device->createShader({ mainPipelineFragmentCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - mainPipelineVertexShader = m_device->createShader({ mainPipelineVertexCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[0] = m_device->createShader({ geoTexturePipelineVertCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); - geoTexturePipelineShaders[1] = m_device->createShader({ geoTexturePipelineFragCpuShader.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + mainPipelineFragmentShaders = loadCompileShader("../shaders/main_pipeline/fragment.hlsl"); + mainPipelineVertexShader = loadCompileShader("../shaders/main_pipeline/vertex_shader.hlsl"); core::smart_refctd_ptr shaderWriteCacheFile; { @@ -976,7 +1024,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // Load FSTri Shader ext::FullScreenTriangle::ProtoPipeline fsTriangleProtoPipe(m_assetMgr.get(),m_device.get(),m_logger.get()); - const IGPUShader::SSpecInfo fragSpec = { .entryPoint = "resolveAlphaMain", .shader = mainPipelineFragmentShaders.get() }; + const video::IGPUPipelineBase::SShaderSpecInfo fragSpec = { .shader = mainPipelineFragmentShaders.get(), .entryPoint = "resolveAlphaMain" }; resolveAlphaGraphicsPipeline = fsTriangleProtoPipe.createPipeline(fragSpec, pipelineLayout.get(), compatibleRenderPass.get(), 0u, blendParams); if (!resolveAlphaGraphicsPipeline) @@ -987,20 +1035,21 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // Create Main Graphics Pipelines { - IGPUShader::SSpecInfo specInfo[2] = { + video::IGPUPipelineBase::SShaderSpecInfo specInfo[2] = { { - .entryPoint = "main", - .shader = mainPipelineVertexShader.get() + .shader = mainPipelineVertexShader.get(), + .entryPoint = "vtxMain" }, { - .entryPoint = "fragMain", - .shader = mainPipelineFragmentShaders.get() + .shader = mainPipelineFragmentShaders.get(), + .entryPoint = "fragMain" }, }; IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = pipelineLayout.get(); - params[0].shaders = specInfo; + params[0].vertexShader = specInfo[0]; + params[0].fragmentShader = specInfo[1]; params[0].cached = { .vertexInput = {}, .primitiveAssembly = { @@ -1049,10 +1098,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu if (m_font->getFreetypeFace()->num_charmaps > 0) FT_Set_Charmap(m_font->getFreetypeFace(), m_font->getFreetypeFace()->charmaps[0]); - const auto str = "MSDF: ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnoprstuvwxyz '1234567890-=\"!@#$%&*()_+"; + const std::wstring str = L"MSDF: ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnoprstuvwxyz '1234567890-=\"!@#$%&*()_+"; singleLineText = std::unique_ptr(new SingleLineText( m_font.get(), - std::string(str))); + str)); drawResourcesFiller.setGlyphMSDFTextureFunction( [&](nbl::ext::TextRendering::FontFace* face, uint32_t glyphIdx) -> core::smart_refctd_ptr @@ -1069,7 +1118,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu ); m_geoTextureRenderer = std::unique_ptr(new GeoTextureRenderer(smart_refctd_ptr(m_device), smart_refctd_ptr(m_logger))); - m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); + // m_geoTextureRenderer->initialize(geoTexturePipelineShaders[0].get(), geoTexturePipelineShaders[1].get(), compatibleRenderPass.get(), m_globalsBuffer); // Create the Semaphores m_renderSemaphore = m_device->createSemaphore(0ull); @@ -1090,6 +1139,158 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu m_intendedNextSubmit.scratchCommandBuffers = m_commandBufferInfos; m_currentRecordingCommandBufferInfo = &m_commandBufferInfos[0]; + // Load image + system::path m_loadCWD = ".."; + std::string imagePaths[] = + { + "../../media/color_space_test/R8G8B8_1.jpg", + "../../media/color_space_test/R8G8B8_1.png", + "../../media/color_space_test/R8G8B8A8_2.png", + "../../media/color_space_test/R8G8B8A8_1.png", + }; + + /** + * @param formatOverride override format of an image view, use special argument asset::E_FORMAT::EF_COUNT to don't override image view format and use one retrieved from the loaded image + */ + auto loadImage = [&](const std::string& imagePath) -> smart_refctd_ptr + { + constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); + const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(), m_loadCWD); + auto bundle = m_assetMgr->getAsset(imagePath, loadParams); + auto contents = bundle.getContents(); + if (contents.empty()) + { + m_logger->log("Failed to load image with path %s, skipping!", ILogger::ELL_ERROR, (m_loadCWD / imagePath).c_str()); + return nullptr; + } + + smart_refctd_ptr cpuImgView; + const auto& asset = contents[0]; + switch (asset->getAssetType()) + { + case IAsset::ET_IMAGE: + { + auto image = smart_refctd_ptr_static_cast(asset); + auto& flags = image->getCreationParameters().flags; + // assert if asset is mutable + const_cast&>(flags) |= asset::IImage::E_CREATE_FLAGS::ECF_MUTABLE_FORMAT_BIT; + const auto format = image->getCreationParameters().format; + + ICPUImageView::SCreationParams viewParams = { + .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, + .image = std::move(image), + .viewType = IImageView::E_TYPE::ET_2D, + .format = format, + .subresourceRange = { + .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = ICPUImageView::remaining_mip_levels, + .baseArrayLayer = 0u, + .layerCount = ICPUImageView::remaining_array_layers + } + }; + + cpuImgView = ICPUImageView::create(std::move(viewParams)); + } break; + + case IAsset::ET_IMAGE_VIEW: + cpuImgView = smart_refctd_ptr_static_cast(asset); + break; + default: + m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!", ILogger::ELL_ERROR); + return nullptr; + } + + const auto loadedCPUImage = cpuImgView->getCreationParameters().image; + const auto loadedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + // Promoting the image to a format GPU supports. (so that updateImageViaStagingBuffer doesn't have to handle that each frame if overflow-submit needs to happen) + auto promotedCPUImageCreationParams = loadedCPUImage->getCreationParameters(); + + promotedCPUImageCreationParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; + // promote format because RGB8 and friends don't actually exist in HW + { + const IPhysicalDevice::SImageFormatPromotionRequest request = { + .originalFormat = promotedCPUImageCreationParams.format, + .usages = IPhysicalDevice::SFormatImageUsages::SUsage(promotedCPUImageCreationParams.usage) + }; + promotedCPUImageCreationParams.format = m_physicalDevice->promoteImageFormat(request,video::IGPUImage::TILING::OPTIMAL); + } + + if (loadedCPUImageCreationParams.format != promotedCPUImageCreationParams.format) + { + smart_refctd_ptr promotedCPUImage = ICPUImage::create(promotedCPUImageCreationParams); + core::rational bytesPerPixel = asset::getBytesPerPixel(promotedCPUImageCreationParams.format); + + const auto extent = loadedCPUImageCreationParams.extent; + const uint32_t mipLevels = loadedCPUImageCreationParams.mipLevels; + const uint32_t arrayLayers = loadedCPUImageCreationParams.arrayLayers; + + // Only supporting 1 mip, it's just for test.. + const size_t byteSize = (bytesPerPixel * extent.width * extent.height * extent.depth * arrayLayers).getIntegerApprox(); // TODO: consider mips + ICPUBuffer::SCreationParams bufferCreationParams = {}; + bufferCreationParams.size = byteSize; + smart_refctd_ptr promotedCPUImageBuffer = ICPUBuffer::create(std::move(bufferCreationParams)); + + auto newRegions = core::make_refctd_dynamic_array>(1u); + ICPUImage::SBufferCopy& region = newRegions->front(); + region.imageSubresource.aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; + region.imageSubresource.mipLevel = 0u; // TODO + region.imageSubresource.baseArrayLayer = 0u; + region.imageSubresource.layerCount = arrayLayers; + region.bufferOffset = 0u; + region.bufferRowLength = 0u; + region.bufferImageHeight = 0u; + region.imageOffset = { 0u, 0u, 0u }; + region.imageExtent = extent; + promotedCPUImage->setBufferAndRegions(std::move(promotedCPUImageBuffer), newRegions); + + performImageFormatPromotionCopy(loadedCPUImage, promotedCPUImage); + return promotedCPUImage; + } + else + { + return loadedCPUImage; + } + }; + + for (const auto& imagePath : imagePaths) + { + auto image = loadImage(imagePath); + if (image) + sampleImages.push_back(image); + } + + gridDTMHeightMap = loadImage("../../media/gridDTMHeightMap.exr"); + + // set diagonals of cells to TOP_LEFT_TO_BOTTOM_RIGHT or BOTTOM_LEFT_TO_TOP_RIGHT randomly + { + // assumption is that format of the grid DTM height map is *_SRGB, I don't think we need any code to ensure that + + auto* region = gridDTMHeightMap->getRegion(0, core::vectorSIMDu32(0.0f)); + auto imageExtent = region->getExtent(); + auto imagePixelSize = asset::getBytesPerPixel(gridDTMHeightMap->getCreationParameters().format).getIntegerApprox(); + float* imageData = static_cast(gridDTMHeightMap->getBuffer()->getPointer()) + region->bufferOffset; + const size_t imageByteSize = gridDTMHeightMap->getImageDataSizeInBytes(); + assert(imageByteSize % sizeof(float) == 0); + + std::random_device rd; + std::mt19937 mt(rd()); + std::uniform_int_distribution dist(0, 1); + + for (int i = 0; i < imageByteSize; i += sizeof(float)) + { + const bool isTexelEven = static_cast(dist(mt)); + E_CELL_DIAGONAL diagonal = isTexelEven ? TOP_LEFT_TO_BOTTOM_RIGHT : BOTTOM_LEFT_TO_TOP_RIGHT; + + setDiagonalModeBit(imageData, diagonal); + imageData++; + } + + } + + assert(gridDTMHeightMap); + return true; } @@ -1129,10 +1330,30 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { m_hatchDebugStep--; } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_1) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_2) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } + if (ev.action == nbl::ui::SKeyboardEvent::E_KEY_ACTION::ECA_PRESSED && ev.keyCode == nbl::ui::E_KEY_CODE::EKC_3) + { + m_shadingModeExample = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + } } } , m_logger.get()); + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u; + if (isCachingDraw) + { + SIntendedSubmitInfo invalidSubmit = {}; + addObjects(invalidSubmit); // if any overflows happen here, it will add to our replay cache and not submit anything + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + finishedCachingDraw = true; + } if (!beginFrameRender()) return; @@ -1153,10 +1374,28 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu IQueue::SSubmitInfo::SSemaphoreInfo waitSems[2u] = { acquired, prevFrameRendered }; m_intendedNextSubmit.waitSemaphores = waitSems; - addObjects(m_intendedNextSubmit); - + if (CacheAndReplay) + { + // to size-1u because we only want to submit overflows here. + for (uint32_t i = 0u; i < replayCaches.size() - 1u; ++i) + { + drawResourcesFiller.setReplayCache(replayCaches[i].get()); + submitDraws(m_intendedNextSubmit, true); + drawResourcesFiller.unsetReplayCache(); + } + if (!replayCaches.empty()) + drawResourcesFiller.setReplayCache(replayCaches.back().get()); + } + else + { + addObjects(m_intendedNextSubmit); + } + endFrameRender(m_intendedNextSubmit); + if (CacheAndReplay) + drawResourcesFiller.unsetReplayCache(); + #ifdef BENCHMARK_TILL_FIRST_FRAME if (!stopBenchamrkFlag) { @@ -1201,23 +1440,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // cb->reset(video::IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); // cb->begin(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); cb->beginDebugMarker("Frame"); - - float64_t3x3 projectionToNDC; - projectionToNDC = m_Camera.constructViewProjection(); - - Globals globalData = {}; - globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; - globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; - globalData.defaultClipProjection.projectionToNDC = projectionToNDC; - globalData.defaultClipProjection.minClipNDC = float32_t2(-1.0, -1.0); - globalData.defaultClipProjection.maxClipNDC = float32_t2(+1.0, +1.0); - auto screenToWorld = getScreenToWorldRatio(globalData.defaultClipProjection.projectionToNDC, globalData.resolution); - globalData.screenToWorldRatio = screenToWorld; - globalData.worldToScreenRatio = (1.0/screenToWorld); - globalData.miterLimit = 10.0f; - SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; - bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); - assert(updateSuccess); nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; auto scRes = static_cast(m_surface->getSwapchainResources()); @@ -1248,10 +1470,63 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu void submitDraws(SIntendedSubmitInfo& intendedSubmitInfo, bool inBetweenSubmit) { + const bool isCachingDraw = CacheAndReplay && m_realFrameIx == 0u && !finishedCachingDraw; + if (isCachingDraw) + { + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); + replayCaches.push_back(drawResourcesFiller.createReplayCache()); + return; // we don't record, submit or do anything, just caching the draw resources + } + + drawResourcesFiller.pushAllUploads(intendedSubmitInfo); + + m_currentRecordingCommandBufferInfo = intendedSubmitInfo.getCommandBufferForRecording(); // drawResourcesFiller.pushAllUploads might've overflow submitted and changed the current recording command buffer + // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state auto* cb = m_currentRecordingCommandBufferInfo->cmdbuf; - auto&r = drawResourcesFiller; + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); + const auto& resourcesGPUBuffer = drawResourcesFiller.getResourcesGPUBuffer(); + + float64_t3x3 projectionToNDC; + projectionToNDC = m_Camera.constructViewProjection(); + + // TEST CAMERA ROTATION +#if 1 + // double rotation = 0.25 * PI(); + double rotation = abs(cos(m_timeElapsed * 0.0004)) * 0.25 * PI() ; + float64_t2 rotationVec = float64_t2(cos(rotation), sin(rotation)); + float64_t3x3 rotationParameter = float64_t3x3 { + rotationVec.x, rotationVec.y, 0.0, + -rotationVec.y, rotationVec.x, 0.0, + 0.0, 0.0, 1.0 + }; + projectionToNDC = nbl::hlsl::mul(projectionToNDC, rotationParameter); +#endif + Globals globalData = {}; + uint64_t baseAddress = resourcesGPUBuffer->getDeviceAddress(); + globalData.pointers = { + .lineStyles = baseAddress + resourcesCollection.lineStyles.bufferOffset, + .dtmSettings = baseAddress + resourcesCollection.dtmSettings.bufferOffset, + .customProjections = baseAddress + resourcesCollection.customProjections.bufferOffset, + .customClipRects = baseAddress + resourcesCollection.customClipRects.bufferOffset, + .mainObjects = baseAddress + resourcesCollection.mainObjects.bufferOffset, + .drawObjects = baseAddress + resourcesCollection.drawObjects.bufferOffset, + .geometryBuffer = baseAddress + resourcesCollection.geometryInfo.bufferOffset, + }; + globalData.antiAliasingFactor = 1.0;// +abs(cos(m_timeElapsed * 0.0008)) * 20.0f; + globalData.resolution = uint32_t2{ m_window->getWidth(), m_window->getHeight() }; + globalData.defaultProjectionToNDC = projectionToNDC; + float screenToWorld = getScreenToWorldRatio(globalData.defaultProjectionToNDC, globalData.resolution); + globalData.screenToWorldScaleTransform = float64_t3x3( 1.0f / screenToWorld, 0.0f, 0.0f, + 0.0f, 1.0f / screenToWorld, 0.0f, + 0.0f, 0.0f, 1.0f); + globalData.miterLimit = 10.0f; + globalData.currentlyActiveMainObjectIndex = drawResourcesFiller.getActiveMainObjectIndex(); + SBufferRange globalBufferUpdateRange = { .offset = 0ull, .size = sizeof(Globals), .buffer = m_globalsBuffer.get() }; + bool updateSuccess = cb->updateBuffer(globalBufferUpdateRange, &globalData); + assert(updateSuccess); + asset::SViewport vp = { .x = 0u, @@ -1272,25 +1547,12 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pipelineBarriersBeforeDraw { - constexpr uint32_t MaxBufferBarriersCount = 6u; + constexpr uint32_t MaxBufferBarriersCount = 2u; uint32_t bufferBarriersCount = 0u; IGPUCommandBuffer::SPipelineBarrierDependencyInfo::buffer_barrier_t bufferBarriers[MaxBufferBarriersCount]; + + const auto& resourcesCollection = drawResourcesFiller.getResourcesCollection(); - // Index Buffer Copy Barrier -> Only do once at the beginning of the frames - if (m_realFrameIx == 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::INDEX_READ_BIT; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.gpuDrawBuffers.indexBuffer->getSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer, - }; - } if (m_globalsBuffer->getSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; @@ -1305,60 +1567,18 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu .buffer = m_globalsBuffer, }; } - if (drawResourcesFiller.getCurrentDrawObjectsBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentDrawObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.drawObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentGeometryBufferSize() > 0u) + if (drawResourcesFiller.getCopiedResourcesSize() > 0u) { auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; + bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_INPUT_BITS | PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; + bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::MEMORY_READ_BITS | ACCESS_FLAGS::MEMORY_WRITE_BITS; bufferBarrier.range = { .offset = 0u, - .size = drawResourcesFiller.getCurrentGeometryBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.geometryBuffer, - }; - } - if (drawResourcesFiller.getCurrentMainObjectsBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentMainObjectsBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.mainObjectsBuffer, - }; - } - if (drawResourcesFiller.getCurrentLineStylesBufferSize() > 0u) - { - auto& bufferBarrier = bufferBarriers[bufferBarriersCount++]; - bufferBarrier.barrier.dep.srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT; - bufferBarrier.barrier.dep.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - bufferBarrier.barrier.dep.dstStageMask = PIPELINE_STAGE_FLAGS::VERTEX_SHADER_BIT | PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT; - bufferBarrier.barrier.dep.dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS; - bufferBarrier.range = - { - .offset = 0u, - .size = drawResourcesFiller.getCurrentLineStylesBufferSize(), - .buffer = drawResourcesFiller.gpuDrawBuffers.lineStylesBuffer, + .size = drawResourcesFiller.getCopiedResourcesSize(), + .buffer = drawResourcesFiller.getResourcesGPUBuffer(), }; } cb->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .bufBarriers = {bufferBarriers, bufferBarriersCount}, .imgBarriers = {} }); @@ -1383,38 +1603,75 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; } cb->beginRenderPass(beginInfo, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - const uint32_t currentIndexCount = drawResourcesFiller.getDrawObjectCount() * 6u; + IGPUDescriptorSet* descriptorSets[] = { descriptorSet0.get(), descriptorSet1.get() }; cb->bindDescriptorSets(asset::EPBP_GRAPHICS, pipelineLayout.get(), 0u, 2u, descriptorSets); + + cb->bindGraphicsPipeline(graphicsPipeline.get()); - // TODO[Przemek]: based on our call bind index buffer you uploaded to part of the `drawResourcesFiller.gpuDrawBuffers.geometryBuffer` - // Vertices will be pulled based on baseBDAPointer of where you uploaded the vertex + the VertexID in the vertex shader. - cb->bindIndexBuffer({ .offset = 0u, .buffer = drawResourcesFiller.gpuDrawBuffers.indexBuffer.get() }, asset::EIT_32BIT); + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) + { + if (drawCall.isDTMRendering) + { + cb->bindIndexBuffer({ .offset = resourcesCollection.geometryInfo.bufferOffset + drawCall.dtm.indexBufferOffset, .buffer = drawResourcesFiller.getResourcesGPUBuffer().get()}, asset::EIT_32BIT); - // TODO[Przemek]: binding the same pipelie, no need to change. - cb->bindGraphicsPipeline(graphicsPipeline.get()); - - // TODO[Przemek]: contour settings, height shading settings, base bda pointers will need to be pushed via pushConstants before the draw currently as it's the easiest thing to do. + PushConstants pc = { + .triangleMeshVerticesBaseAddress = drawCall.dtm.triangleMeshVerticesBaseAddress + resourcesGPUBuffer->getDeviceAddress() + resourcesCollection.geometryInfo.bufferOffset, + .triangleMeshMainObjectIndex = drawCall.dtm.triangleMeshMainObjectIndex, + .isDTMRendering = true + }; + cb->pushConstants(graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + cb->drawIndexed(drawCall.dtm.indexCount, 1u, 0u, 0u, 0u); + } + else + { + PushConstants pc = { + .isDTMRendering = false + }; + cb->pushConstants(graphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + const uint64_t indexOffset = drawCall.drawObj.drawObjectStart * 6u; + const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; + + // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); + } + } - // TODO[Przemek]: draw parameters needs to reflect the mesh involved - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); if (fragmentShaderInterlockEnabled) { cb->bindGraphicsPipeline(resolveAlphaGraphicsPipeline.get()); nbl::ext::FullScreenTriangle::recordDrawCall(cb); } - + if constexpr (DebugModeWireframe) { cb->bindGraphicsPipeline(debugGraphicsPipeline.get()); - cb->drawIndexed(currentIndexCount, 1u, 0u, 0u, 0u); + + for (auto& drawCall : drawResourcesFiller.getDrawCalls()) + { + PushConstants pc = { + .isDTMRendering = false + }; + cb->pushConstants(debugGraphicsPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, 0, sizeof(PushConstants), &pc); + + const uint64_t indexOffset = drawCall.drawObj.drawObjectStart * 6u; + const uint64_t indexCount = drawCall.drawObj.drawObjectCount * 6u; + + // assert(currentIndexCount == resourcesCollection.indexBuffer.getCount()); + cb->bindIndexBuffer({ .offset = resourcesCollection.indexBuffer.bufferOffset + indexOffset * sizeof(uint32_t), .buffer = resourcesGPUBuffer.get()}, asset::EIT_32BIT); + + cb->drawIndexed(indexCount, 1u, 0u, 0u, 0u); + } } - cb->endRenderPass(); if (!inBetweenSubmit) cb->endDebugMarker(); + + drawResourcesFiller.markFrameUsageComplete(intendedSubmitInfo.getFutureScratchSemaphore().value); if (inBetweenSubmit) { @@ -1480,6 +1737,15 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { auto retval = device_base_t::getRequiredDeviceFeatures(); retval.fragmentShaderPixelInterlock = FragmentShaderPixelInterlock; + retval.nullDescriptor = true; + return retval; + } + + virtual video::SPhysicalDeviceLimits getRequiredDeviceLimits() const override + { + video::SPhysicalDeviceLimits retval = base_t::getRequiredDeviceLimits(); + retval.fragmentShaderBarycentric = true; + return retval; } @@ -1489,32 +1755,13 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // We only support one swapchain mode, surface, the other one is Display which we have not implemented yet. retval.swapchainMode = video::E_SWAPCHAIN_MODE::ESM_SURFACE; retval.validations = true; - retval.synchronizationValidation = true; + retval.synchronizationValidation = false; return retval; } protected: void addObjects(SIntendedSubmitInfo& intendedNextSubmit) { - - // TODO[Przemek]: add your own case, you won't call any other drawResourcesFiller function, only drawMesh with your custom made Mesh (for start it can be a single triangle) - - // we record upload of our objects and if we failed to allocate we submit everything - if (!intendedNextSubmit.valid()) - { - // log("intendedNextSubmit is invalid.", nbl::system::ILogger::ELL_ERROR); - assert(false); - return; - } - - // Use the current recording command buffer of the intendedSubmitInfos scratchCommandBuffers, it should be in recording state - auto* cmdbuf = m_currentRecordingCommandBufferInfo->cmdbuf; - - assert(cmdbuf->getState() == video::IGPUCommandBuffer::STATE::RECORDING && cmdbuf->isResettable()); - assert(cmdbuf->getRecordingFlags().hasFlags(video::IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT)); - - auto* cmdpool = cmdbuf->getPool(); - drawResourcesFiller.setSubmitDrawsFunction( [&](SIntendedSubmitInfo& intendedNextSubmit) { @@ -1951,8 +2198,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu LineStyleInfo style = {}; style.screenSpaceLineWidth = 4.0f; - style.worldSpaceLineWidth = 0.0f; - style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + style.worldSpaceLineWidth = 2.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.1f); LineStyleInfo style2 = {}; style2.screenSpaceLineWidth = 2.0f; @@ -2025,7 +2272,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu myCurve.majorAxis = { -10.0, 5.0 }; myCurve.center = { 0, -5.0 }; myCurve.angleBounds = { - nbl::core::PI() * 2.0, + nbl::core::PI() * 1.0, nbl::core::PI() * 0.0 }; myCurve.eccentricity = 1.0; @@ -2053,10 +2300,10 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } drawResourcesFiller.drawPolyline(originalPolyline, style, intendedNextSubmit); - //CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(m_timeElapsed * 0.0009))); - //drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); - //drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); + CPolyline offsettedPolyline = originalPolyline.generateParallelPolyline(+0.0 - 3.0 * abs(cos(10.0 * 0.0009))); + CPolyline offsettedPolyline2 = originalPolyline.generateParallelPolyline(+0.0 + 3.0 * abs(cos(10.0 * 0.0009))); + drawResourcesFiller.drawPolyline(offsettedPolyline, style2, intendedNextSubmit); + drawResourcesFiller.drawPolyline(offsettedPolyline2, style2, intendedNextSubmit); } else if (mode == ExampleMode::CASE_4) { @@ -2210,11 +2457,11 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu else if (mode == ExampleMode::CASE_5) { //#define CASE_5_POLYLINE_1 // animated stipple pattern -//#define CASE_5_POLYLINE_2 // miter test static +#define CASE_5_POLYLINE_2 // miter test static //#define CASE_5_POLYLINE_3 // miter test animated //#define CASE_5_POLYLINE_4 // miter test animated (every angle) //#define CASE_5_POLYLINE_5 // closed polygon -#define CASE_5_POLYLINE_6 // stretching +// #define CASE_5_POLYLINE_6 // stretching //#define CASE_5_POLYLINE_7 // wide non solid lines #if defined(CASE_5_POLYLINE_1) @@ -2330,7 +2577,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu /*quadratics2[3].P0 = {20.0, 50.0}; quadratics2[3].P1 = { -80.0, 100.0 }; quadratics2[3].P2 = { -100.0, 90.0 };*/ - polyline.addQuadBeziers(core::SRange>(quadratics2.data(), quadratics2.data() + quadratics2.size())); + polyline.addQuadBeziers(quadratics2); // section 3: lines std::vector linePoints2; @@ -2768,16 +3015,20 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } else if (mode == ExampleMode::CASE_6) { - // left half of screen should be red and right half should be green - const auto& cameraProj = m_Camera.constructViewProjection(); - ClipProjectionData showLeft = {}; - showLeft.projectionToNDC = cameraProj; - showLeft.minClipNDC = float32_t2(-1.0, -1.0); - showLeft.maxClipNDC = float32_t2(0.0, +1.0); - ClipProjectionData showRight = {}; - showRight.projectionToNDC = cameraProj; - showRight.minClipNDC = float32_t2(0.0, -1.0); - showRight.maxClipNDC = float32_t2(+1.0, +1.0); + float64_t3x3 customProjection = float64_t3x3{ + 1.0, 0.0, cos(m_timeElapsed * 0.0005) * 100.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + + /// [NOTE]: We set minClip and maxClip (in default worldspace) in such a way that minClip.y > maxClip.y so that minClipNDC.y < maxClipNDC.y + // left half should be red and right half should be green + WorldClipRect showLeft = {}; + showLeft.minClip = float64_t2(-100.0, +1000.0); + showLeft.maxClip = float64_t2(0.0, -1000.0); + WorldClipRect showRight = {}; + showRight.minClip = float64_t2(0.0, +1000.0); + showRight.maxClip = float64_t2(100.0, -1000.0); LineStyleInfo leftLineStyle = {}; leftLineStyle.screenSpaceLineWidth = 3.0f; @@ -2832,181 +3083,55 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } // we do redundant and nested push/pops to test - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); + drawResourcesFiller.pushCustomProjection(customProjection); { drawResourcesFiller.drawPolyline(polyline1, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomProjection(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showRight); + drawResourcesFiller.pushCustomClipRect(showRight); { drawResourcesFiller.drawPolyline(polyline3, rightLineStyle, intendedNextSubmit); drawResourcesFiller.drawPolyline(polyline2, rightLineStyle, intendedNextSubmit); - drawResourcesFiller.pushClipProjectionData(showLeft); + drawResourcesFiller.pushCustomClipRect(showLeft); { drawResourcesFiller.drawPolyline(polyline1, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); drawResourcesFiller.drawPolyline(polyline2, leftLineStyle, intendedNextSubmit); } - drawResourcesFiller.popClipProjectionData(); + drawResourcesFiller.popCustomClipRect(); } else if (mode == ExampleMode::CASE_7) { - if (m_realFrameIx == 0u) + for (uint32_t i = 0; i < sampleImages.size(); ++i) { - // Load image - system::path m_loadCWD = ".."; - std::string imagePath = "../../media/color_space_test/R8G8B8A8_1.png"; - - constexpr auto cachingFlags = static_cast(IAssetLoader::ECF_DONT_CACHE_REFERENCES & IAssetLoader::ECF_DONT_CACHE_TOP_LEVEL); - const IAssetLoader::SAssetLoadParams loadParams(0ull, nullptr, cachingFlags, IAssetLoader::ELPF_NONE, m_logger.get(),m_loadCWD); - auto bundle = m_assetMgr->getAsset(imagePath,loadParams); - auto contents = bundle.getContents(); - if (contents.empty()) - { - m_logger->log("Failed to load image with path %s, skipping!",ILogger::ELL_ERROR,(m_loadCWD/imagePath).c_str()); - } - - smart_refctd_ptr cpuImgView; - const auto& asset = contents[0]; - switch (asset->getAssetType()) - { - case IAsset::ET_IMAGE: - { - auto image = smart_refctd_ptr_static_cast(asset); - const auto format = image->getCreationParameters().format; - - ICPUImageView::SCreationParams viewParams = { - .flags = ICPUImageView::E_CREATE_FLAGS::ECF_NONE, - .image = std::move(image), - .viewType = IImageView::E_TYPE::ET_2D, - .format = format, - .subresourceRange = { - .aspectMask = IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT, - .baseMipLevel = 0u, - .levelCount = ICPUImageView::remaining_mip_levels, - .baseArrayLayer = 0u, - .layerCount = ICPUImageView::remaining_array_layers - } - }; - - cpuImgView = ICPUImageView::create(std::move(viewParams)); - } break; - - case IAsset::ET_IMAGE_VIEW: - cpuImgView = smart_refctd_ptr_static_cast(asset); - break; - default: - m_logger->log("Failed to load ICPUImage or ICPUImageView got some other Asset Type, skipping!",ILogger::ELL_ERROR); - } - - - // create matching size gpu image - smart_refctd_ptr gpuImg; - const auto& origParams = cpuImgView->getCreationParameters(); - const auto origImage = origParams.image; - IGPUImage::SCreationParams imageParams = {}; - imageParams = origImage->getCreationParameters(); - imageParams.usage |= IGPUImage::EUF_TRANSFER_DST_BIT|IGPUImage::EUF_SAMPLED_BIT; - // promote format because RGB8 and friends don't actually exist in HW - { - const IPhysicalDevice::SImageFormatPromotionRequest request = { - .originalFormat = imageParams.format, - .usages = IPhysicalDevice::SFormatImageUsages::SUsage(imageParams.usage) - }; - imageParams.format = m_physicalDevice->promoteImageFormat(request,imageParams.tiling); - } - gpuImg = m_device->createImage(std::move(imageParams)); - if (!gpuImg || !m_device->allocate(gpuImg->getMemoryReqs(),gpuImg.get()).isValid()) - m_logger->log("Failed to create or allocate gpu image!",ILogger::ELL_ERROR); - gpuImg->setObjectDebugName(imagePath.c_str()); - - IGPUImageView::SCreationParams viewParams = { - .image = gpuImg, - .viewType = IGPUImageView::ET_2D, - .format = gpuImg->getCreationParameters().format - }; - auto gpuImgView = m_device->createImageView(std::move(viewParams)); - - // Bind gpu image view to descriptor set - video::IGPUDescriptorSet::SDescriptorInfo dsInfo; - dsInfo.info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; - dsInfo.desc = gpuImgView; - - IGPUDescriptorSet::SWriteDescriptorSet dsWrites[1u] = - { - { - .dstSet = descriptorSet0.get(), - .binding = 6u, - .arrayElement = 0u, - .count = 1u, - .info = &dsInfo, - } - }; - m_device->updateDescriptorSets(1u, dsWrites, 0u, nullptr); - - // Upload Loaded CPUImageData to GPU - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t beforeCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::UNDEFINED, - .newLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - } - }; - - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = beforeCopyImageBarriers }); - m_utils->updateImageViaStagingBuffer( - intendedNextSubmit, - origImage->getBuffer()->getPointer(), origImage->getCreationParameters().format, - gpuImg.get(), IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - origImage->getRegions()); - - IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t afterCopyImageBarriers[] = - { - { - .barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COPY_BIT, // previous top of pipe -> top_of_pipe in first scope = none - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, - } - // .ownershipOp. No queueFam ownership transfer - }, - .image = gpuImg.get(), - .subresourceRange = origParams.subresourceRange, - .oldLayout = IImage::LAYOUT::TRANSFER_DST_OPTIMAL, - .newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL, - } - }; - cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = afterCopyImageBarriers }); + uint64_t imageID = i * 69ull; // it can be hash or something of the file path the image was loaded from + //printf(std::format("\n Image {} \n", i).c_str()); + drawResourcesFiller.ensureStaticImageAvailability({ imageID, sampleImages[i] }, intendedNextSubmit); + drawResourcesFiller.addImageObject(imageID, { .topLeft = { 0.0 + (i) * 3.0, 0.0 }, .dirU = { 3.0 , 0.0 }, .aspectRatio = 1.0 }, intendedNextSubmit); + //printf("\n"); } - drawResourcesFiller._test_addImageObject({ 0.0, 0.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); - drawResourcesFiller._test_addImageObject({ 40.0, +40.0 }, { 100.0, 100.0 }, 0.0, intendedNextSubmit); + + GeoreferencedImageParams geoRefParams = {}; + geoRefParams.format = asset::EF_R8G8B8A8_SRGB; + geoRefParams.imageExtents = uint32_t2 (2048, 2048); + geoRefParams.viewportExtents = (m_realFrameIx <= 5u) ? uint32_t2(1280, 720) : uint32_t2(3840, 2160); // to test trigerring resize/recreation + // drawResourcesFiller.ensureGeoreferencedImageAvailability_AllocateIfNeeded(6996, geoRefParams, intendedNextSubmit); LineStyleInfo lineStyle = { @@ -3019,8 +3144,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu { std::vector linePoints; linePoints.push_back({ 0.0, 0.0 }); - linePoints.push_back({ 100.0, 0.0 }); - linePoints.push_back({ 100.0, -100.0 }); + linePoints.push_back({ 1.0, 0.0 }); + linePoints.push_back({ 1.0, -1.0 }); polyline.addLinePoints(linePoints); } drawResourcesFiller.drawPolyline(polyline, lineStyle, intendedNextSubmit); @@ -3076,7 +3201,7 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.0f); singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, m_font.get(), float64_t2(0.0,-250.0), float32_t2(1.0, 1.0), rotation, float32_t4(1.0, 1.0, 1.0, 1.0), italicTiltAngle, 0.5f); // singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-200.0), float32_t2(1.0, 1.0), nbl::core::PI() * abs(cos(m_timeElapsed * 0.00005))); - // Smaller text to test mip maps + // Smaller text to test level maps //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-130.0), float32_t2(0.4, 0.4), rotation); //singleLineText->Draw(drawResourcesFiller, intendedNextSubmit, float64_t2(0.0,-150.0), float32_t2(0.2, 0.2), rotation); } @@ -3090,15 +3215,6 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu auto penY = -500.0; auto previous = 0; - uint32_t glyphObjectIdx; - { - LineStyleInfo lineStyle = {}; - lineStyle.color = float32_t4(1.0, 1.0, 1.0, 1.0); - const uint32_t styleIdx = drawResourcesFiller.addLineStyle_SubmitIfNeeded(lineStyle, intendedNextSubmit); - - glyphObjectIdx = drawResourcesFiller.addMainObject_SubmitIfNeeded(styleIdx, intendedNextSubmit); - } - float64_t2 currentBaselineStart = float64_t2(0.0, 0.0); float64_t scale = 1.0 / 64.0; @@ -3231,27 +3347,366 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu } } - drawResourcesFiller.finalizeAllCopiesToGPU(intendedNextSubmit); + else if (mode == ExampleMode::CASE_9) + { + // PYRAMID + core::vector vertices = { + //{ float64_t2(0.0, 0.0), 100.0 }, //0 + //{ float64_t2(-200.0, -200.0), 10.0 }, //1 + //{ float64_t2(200.0, -200.0), 10.0 }, //2 + //{ float64_t2(200.0, 200.0), -20.0 }, //3 + //{ float64_t2(-200.0, 200.0), 10.0 }, //4 + + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, + { float64_t2(200.0, -100.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, -100.0), 10.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(200.0, 200.0), -20.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(0.0, 0.0), 100.0 }, + { float64_t2(-200.0, 200.0), 10.0 }, + { float64_t2(-200.0, -200.0), 10.0 }, + }; + + core::vector indices = { + 0, 1, 2, + 3, 4, 5, + 6, 7, 8, + 9, 10, 11 + }; + + // SINGLE TRIANGLE + /*core::vector vertices = { + { float64_t2(0.0, 0.0), -20.0 }, + { float64_t2(-200.0, -200.0), 100.0 }, + { float64_t2(200.0, -100.0), 80.0 }, + }; + + core::vector indices = { + 0, 1, 2 + };*/ + + CTriangleMesh mesh; + mesh.setVertices(std::move(vertices)); + mesh.setIndices(std::move(indices)); + + DTMSettingsInfo dtmInfo{}; + //dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourSettingsCount = 2u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 9.98; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); + + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(1.0f, 0.39f, 0.0f, 1.0f); + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 1.0f, 1.0f); + for (auto& v : mesh.m_vertices) + { + v.pos += float64_t2(450.0, 200.0); + v.height -= 10.0; + } + + drawResourcesFiller.drawTriangleMesh(mesh, dtmInfo, intendedNextSubmit); + } + else if (mode == ExampleMode::CASE_10) + { + CPolyline polyline; + + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 4.0f; + style.color = float32_t4(0.619f, 0.325f, 0.709f, 0.5f); + + for (uint32_t i = 0; i < 128u; ++i) + { + std::vector> quadBeziers; + curves::EllipticalArcInfo myCircle; + { + myCircle.majorAxis = { 0.05 , 0.0}; + myCircle.center = { 0.0 + i * 0.1, i * 0.1 }; + myCircle.angleBounds = { + nbl::core::PI() * 0.0, + nbl::core::PI() * 2.0 + }; + myCircle.eccentricity = 1.0; + } + + curves::Subdivision::AddBezierFunc addToBezier = [&](shapes::QuadraticBezier&& info) -> void + { + quadBeziers.push_back(info); + }; + + curves::Subdivision::adaptive(myCircle, 1e-5, addToBezier, 10u); + polyline.addQuadBeziers(quadBeziers); + // drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + polyline.clearEverything(); + } + + // Testing Fixed Geometry + { + float64_t2 line0[2u] = + { + float64_t2(-1.0, 0.0), + float64_t2(+1.0, 0.0), + }; + float64_t2 line1[3u] = + { + float64_t2(0.0, -1.0), + float64_t2(0.0, +1.0), + float64_t2(+1.0, +1.0), + }; + + float64_t3x3 translateMat = + { + 1.0, 0.0, 0.0, + 0.0, 1.0, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t angle = m_timeElapsed * 0.001; + float64_t2 dir = float64_t2{ cos(angle), sin(angle) }; + float64_t3x3 rotateMat = + { + dir.x, -dir.y, 0.0, + dir.y, dir.x, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t2 scale = float64_t2{ 100.0, 100.0 }; + float64_t3x3 scaleMat = + { + scale.x, 0.0, 0.0, + 0.0, scale.y, 0.0, + 0.0, 0.0, 1.0 + }; + + float64_t3x3 transformation = nbl::hlsl::mul(translateMat, nbl::hlsl::mul(rotateMat, scaleMat)); + polyline.addLinePoints(line0); + polyline.addLinePoints(line1); + polyline.preprocessPolylineWithStyle(style); + // drawResourcesFiller.drawPolyline(polyline, intendedNextSubmit); + drawResourcesFiller.drawFixedGeometryPolyline(polyline, style, transformation, TransformationType::TT_FIXED_SCREENSPACE_SIZE, intendedNextSubmit); + } + } + else if (mode == ExampleMode::CASE_11) + { + DTMSettingsInfo dtmInfo{}; + dtmInfo.mode |= E_DTM_MODE::OUTLINE; + dtmInfo.mode |= E_DTM_MODE::HEIGHT_SHADING; + dtmInfo.mode |= E_DTM_MODE::CONTOUR; + + dtmInfo.outlineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.outlineStyleInfo.worldSpaceLineWidth = 1.0f; + dtmInfo.outlineStyleInfo.color = float32_t4(0.0f, 0.39f, 0.0f, 1.0f); + //std::array outlineStipplePattern = { 0.0f, -5.0f, 20.0f, -5.0f }; + std::array outlineStipplePattern = { -10.0f, 10.0f }; + dtmInfo.outlineStyleInfo.setStipplePatternData(outlineStipplePattern); + + dtmInfo.contourSettingsCount = 2u; + dtmInfo.contourSettings[0u].startHeight = 20; + dtmInfo.contourSettings[0u].endHeight = 90; + dtmInfo.contourSettings[0u].heightInterval = 10; + dtmInfo.contourSettings[0u].lineStyleInfo.screenSpaceLineWidth = 0.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.worldSpaceLineWidth = 3.0f; + dtmInfo.contourSettings[0u].lineStyleInfo.color = float32_t4(0.0f, 0.0f, 1.0f, 0.7f); + std::array contourStipplePattern = { 0.0f, -5.0f, 10.0f, -5.0f }; + dtmInfo.contourSettings[0u].lineStyleInfo.setStipplePatternData(contourStipplePattern); + + dtmInfo.contourSettings[1u] = dtmInfo.contourSettings[0u]; + dtmInfo.contourSettings[1u].startHeight += 5.0f; + dtmInfo.contourSettings[1u].heightInterval = 13.0f; + dtmInfo.contourSettings[1u].lineStyleInfo.color = float32_t4(0.8f, 0.4f, 0.3f, 1.0f); + + // PRESS 1, 2, 3 TO SWITCH HEIGHT SHADING MODE + // 1 - DISCRETE_VARIABLE_LENGTH_INTERVALS + // 2 - DISCRETE_FIXED_LENGTH_INTERVALS + // 3 - CONTINOUS_INTERVALS + float animatedAlpha = (std::cos(m_timeElapsed * 0.0005) + 1.0) * 0.5; + animatedAlpha = 1.0f; + switch (m_shadingModeExample) + { + case E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-10.0f, float32_t4(0.5f, 1.0f, 1.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(20.0f, float32_t4(0.0f, 1.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(70.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, 1.0f)); + + break; + } + case E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS: + { + dtmInfo.heightShadingInfo.intervalLength = 10.0f; + dtmInfo.heightShadingInfo.intervalIndexToHeightMultiplier = dtmInfo.heightShadingInfo.intervalLength; + dtmInfo.heightShadingInfo.isCenteredShading = false; + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(-20.0f, float32_t4(0.0f, 0.5f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 0.7f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(100.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + case E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS: + { + dtmInfo.heightShadingInfo.heightShadingMode = E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + dtmInfo.heightShadingInfo.addHeightColorMapEntry(0.0f, float32_t4(0.0f, 0.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(25.0f, float32_t4(0.0f, 1.0f, 1.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(50.0f, float32_t4(0.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(75.0f, float32_t4(1.0f, 1.0f, 0.0f, animatedAlpha)); + dtmInfo.heightShadingInfo.addHeightColorMapEntry(90.0f, float32_t4(1.0f, 0.0f, 0.0f, animatedAlpha)); + + break; + } + } + + constexpr float HeightMapCellWidth = 20.0f; + const auto heightMapExtent = gridDTMHeightMap->getCreationParameters().extent; + assert(heightMapExtent.width > 0 && heightMapExtent.height > 0); + + float64_t2 worldSpaceExtents; + const float64_t2 topLeft = { -400.0f, 400.0f }; + worldSpaceExtents.x = (heightMapExtent.width - 1) * HeightMapCellWidth; + worldSpaceExtents.y = (heightMapExtent.height - 1) * HeightMapCellWidth; + const uint64_t heightMapTextureID = 0ull; + + constexpr bool DrawGridOnly = false; + + if(DrawGridOnly) + { + dtmInfo.mode = E_DTM_MODE::OUTLINE; + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, InvalidTextureIndex, dtmInfo, intendedNextSubmit); + } + else + { + StaticImageInfo heightMapStaticImageInfo = { + .imageID = heightMapTextureID, + .cpuImage = gridDTMHeightMap, + .forceUpdate = false, + .imageViewFormatOverride = asset::E_FORMAT::EF_R32G32B32A32_UINT // for now we use only R32G32B32A32_* anyway + }; + + if (!drawResourcesFiller.ensureStaticImageAvailability(heightMapStaticImageInfo, intendedNextSubmit)) + m_logger->log("Grid DTM height map texture unavailable!", ILogger::ELL_ERROR); + drawResourcesFiller.drawGridDTM(topLeft, worldSpaceExtents, HeightMapCellWidth, heightMapTextureID, dtmInfo, intendedNextSubmit); + } + + // draw test polyline +#if 0 + { + LineStyleInfo style = {}; + style.screenSpaceLineWidth = 0.0f; + style.worldSpaceLineWidth = 15.0f; + style.color = float32_t4(0.7f, 0.3f, 0.1f, 0.5f); + + CPolyline polyline; + { + std::vector linePoints; + linePoints.push_back(topLeft); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, 0.0)); + linePoints.push_back(topLeft + float64_t2(worldSpaceExtents.x, -worldSpaceExtents.y)); + linePoints.push_back(topLeft + float64_t2(0.0, -worldSpaceExtents.y)); + linePoints.push_back(topLeft); + polyline.addLinePoints(linePoints); + } + + drawResourcesFiller.drawPolyline(polyline, style, intendedNextSubmit); + } +#endif + } } double getScreenToWorldRatio(const float64_t3x3& viewProjectionMatrix, uint32_t2 windowSize) { double idx_0_0 = viewProjectionMatrix[0u][0u] * (windowSize.x / 2.0); - double idx_1_1 = viewProjectionMatrix[1u][1u] * (windowSize.y / 2.0); - double det_2x2_mat = idx_0_0 * idx_1_1; - return static_cast(core::sqrt(core::abs(det_2x2_mat))); + double idx_1_0 = viewProjectionMatrix[1u][0u] * (windowSize.y / 2.0); + return hlsl::length(float64_t2(idx_0_0, idx_1_0)); } protected: - std::chrono::seconds timeout = std::chrono::seconds(0x7fffFFFFu); clock_t::time_point start; + std::vector> replayCaches = {}; // vector because there can be overflow submits + bool finishedCachingDraw = false; + bool fragmentShaderInterlockEnabled = false; - core::smart_refctd_ptr m_inputSystem; - InputSystem::ChannelReader mouse; - InputSystem::ChannelReader keyboard; + core::smart_refctd_ptr m_inputSystem; + nbl::examples::InputSystem::ChannelReader mouse; + nbl::examples::InputSystem::ChannelReader keyboard; smart_refctd_ptr renderpassInitial; // this renderpass will clear the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL smart_refctd_ptr renderpassInBetween; // this renderpass will load the attachment and transition it to COLOR_ATTACHMENT_OPTIMAL @@ -3264,7 +3719,8 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu // pointer to one of the command buffer infos from above, this is the only command buffer used to record current submit in current frame, it will be updated by SIntendedSubmitInfo IQueue::SSubmitInfo::SCommandBufferInfo const * m_currentRecordingCommandBufferInfo; // pointer can change, value cannot - smart_refctd_ptr msdfTextureSampler; + smart_refctd_ptr msdfImageSampler; + smart_refctd_ptr staticImageSampler; smart_refctd_ptr m_globalsBuffer; smart_refctd_ptr descriptorSet0; @@ -3300,6 +3756,9 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu std::vector> m_shapeMSDFImages = {}; + std::vector> sampleImages; + smart_refctd_ptr gridDTMHeightMap; + static constexpr char FirstGeneratedCharacter = ' '; static constexpr char LastGeneratedCharacter = '~'; @@ -3314,3 +3773,4 @@ class ComputerAidedDesign final : public examples::SimpleWindowedApplication, pu }; NBL_MAIN_FUNC(ComputerAidedDesign) + diff --git a/62_CAD/shaders/geotexture/common.hlsl b/62_CAD/shaders/geotexture/common.hlsl index 82a646319..691cd3d3b 100644 --- a/62_CAD/shaders/geotexture/common.hlsl +++ b/62_CAD/shaders/geotexture/common.hlsl @@ -25,7 +25,7 @@ struct PSInput [[vk::push_constant]] GeoTextureOBB geoTextureOBB; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] Texture2D geoTexture : register(t0); diff --git a/62_CAD/shaders/globals.hlsl b/62_CAD/shaders/globals.hlsl index 392e796f4..5c3681910 100644 --- a/62_CAD/shaders/globals.hlsl +++ b/62_CAD/shaders/globals.hlsl @@ -1,7 +1,14 @@ #ifndef _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ #define _CAD_EXAMPLE_GLOBALS_HLSL_INCLUDED_ -#define NBL_FORCE_EMULATED_FLOAT_64 +#ifdef __HLSL_VERSION +#ifndef NBL_USE_SPIRV_BUILTINS +#include "runtimeDeviceConfigCaps.hlsl" // defines DeviceConfigCaps, uses JIT device caps +#endif +#endif + +// TODO[Erfan]: Turn off in the future, but keep enabled to test +// #define NBL_FORCE_EMULATED_FLOAT_64 #include #include @@ -13,17 +20,14 @@ #ifdef __HLSL_VERSION #include -#include #endif using namespace nbl::hlsl; - -// because we can't use jit/device_capabilities.hlsl in c++ code #ifdef __HLSL_VERSION -using pfloat64_t = portable_float64_t; -using pfloat64_t2 = portable_float64_t2; -using pfloat64_t3 = portable_float64_t3; +using pfloat64_t = portable_float64_t; +using pfloat64_t2 = portable_float64_t2; +using pfloat64_t3 = portable_float64_t3; #else using pfloat64_t = float64_t; using pfloat64_t2 = nbl::hlsl::vector; @@ -32,40 +36,46 @@ using pfloat64_t3 = nbl::hlsl::vector; using pfloat64_t3x3 = portable_matrix_t3x3; -// TODO: Compute this in a compute shader from the world counterparts -// because this struct includes NDC coordinates, the values will change based camera zoom and move -// of course we could have the clip values to be in world units and also the matrix to transform to world instead of ndc but that requires extra computations(matrix multiplications) per vertex -struct ClipProjectionData +struct PushConstants +{ + uint64_t triangleMeshVerticesBaseAddress; + uint32_t triangleMeshMainObjectIndex; + uint32_t isDTMRendering; +}; + +struct WorldClipRect { - pfloat64_t3x3 projectionToNDC; // 72 -> because we use scalar_layout - float32_t2 minClipNDC; // 80 - float32_t2 maxClipNDC; // 88 + pfloat64_t2 minClip; // min clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) + pfloat64_t2 maxClip; // max clip of a rect in worldspace coordinates of the original space (globals.defaultProjectionToNDC) }; +struct Pointers +{ + uint64_t lineStyles; + uint64_t dtmSettings; + uint64_t customProjections; + uint64_t customClipRects; + uint64_t mainObjects; + uint64_t drawObjects; + uint64_t geometryBuffer; +}; #ifndef __HLSL_VERSION -static_assert(offsetof(ClipProjectionData, projectionToNDC) == 0u); -static_assert(offsetof(ClipProjectionData, minClipNDC) == 72u); -static_assert(offsetof(ClipProjectionData, maxClipNDC) == 80u); +static_assert(sizeof(Pointers) == 56u); #endif struct Globals { - ClipProjectionData defaultClipProjection; // 88 - pfloat64_t screenToWorldRatio; // 96 - pfloat64_t worldToScreenRatio; // 100 - uint32_t2 resolution; // 108 - float antiAliasingFactor; // 112 - float miterLimit; // 116 - float32_t2 _padding; // 128 + Pointers pointers; + pfloat64_t3x3 defaultProjectionToNDC; + pfloat64_t3x3 screenToWorldScaleTransform; // Pre-multiply your transform with this to scale in screen space (e.g., scale 100.0 means 100 screen pixels). + uint32_t2 resolution; + float antiAliasingFactor; + uint32_t miterLimit; + uint32_t currentlyActiveMainObjectIndex; // for alpha resolve to skip resolving activeMainObjectIdx and prep it for next submit + float32_t _padding; }; - #ifndef __HLSL_VERSION -static_assert(offsetof(Globals, defaultClipProjection) == 0u); -static_assert(offsetof(Globals, screenToWorldRatio) == 88u); -static_assert(offsetof(Globals, worldToScreenRatio) == 96u); -static_assert(offsetof(Globals, resolution) == 104u); -static_assert(offsetof(Globals, antiAliasingFactor) == 112u); -static_assert(offsetof(Globals, miterLimit) == 116u); +static_assert(sizeof(Globals) == 224u); #endif #ifdef __HLSL_VERSION @@ -100,6 +110,18 @@ pfloat64_t2 transformVectorNdc(NBL_CONST_REF_ARG(pfloat64_t3x3) transformation, } #endif +enum class MainObjectType : uint32_t +{ + NONE = 0u, + POLYLINE, + HATCH, + TEXT, + STATIC_IMAGE, + DTM, + GRID_DTM, + STREAMED_IMAGE, +}; + enum class ObjectType : uint32_t { LINE = 0u, @@ -107,7 +129,10 @@ enum class ObjectType : uint32_t CURVE_BOX = 2u, POLYLINE_CONNECTOR = 3u, FONT_GLYPH = 4u, - IMAGE = 5u + STATIC_IMAGE = 5u, + TRIANGLE_MESH = 6u, + GRID_DTM = 7u, + STREAMED_IMAGE = 8u, }; enum class MajorAxis : uint32_t @@ -116,12 +141,23 @@ enum class MajorAxis : uint32_t MAJOR_Y = 1u, }; +enum TransformationType +{ + TT_NORMAL = 0, + TT_FIXED_SCREENSPACE_SIZE +}; + + // Consists of multiple DrawObjects +// [IDEA]: In GPU-driven rendering, to save mem for MainObject data fetching: many of these can be shared amongst different main objects, we could find these styles, settings, etc indices with upper_bound +// [TODO]: pack indices and members of mainObject and DrawObject + enforce max size for autosubmit --> but do it only after the mainobject definition is finalized in gpu-driven rendering work struct MainObject { uint32_t styleIdx; - uint32_t pad; // do I even need this on the gpu side? it's stored in structured buffer not bda - uint64_t clipProjectionAddress; + uint32_t dtmSettingsIdx; + uint32_t customProjectionIndex; + uint32_t customClipRectIndex; + uint32_t transformationType; // todo pack later, it's just 2 possible values atm }; struct DrawObject @@ -131,6 +167,7 @@ struct DrawObject uint64_t geometryAddress; }; +// Goes into geometry buffer, needs to be aligned by 8 struct LinePointInfo { pfloat64_t2 p; @@ -138,6 +175,7 @@ struct LinePointInfo float32_t stretchValue; }; +// Goes into geometry buffer, needs to be aligned by 8 struct QuadraticBezierInfo { nbl::hlsl::shapes::QuadraticBezier shape; // 48bytes = 3 (control points) x 16 (float64_t2) @@ -148,6 +186,7 @@ struct QuadraticBezierInfo static_assert(offsetof(QuadraticBezierInfo, phaseShift) == 48u); #endif +// Goes into geometry buffer, needs to be aligned by 8 struct GlyphInfo { pfloat64_t2 topLeft; // 2 * 8 = 16 bytes @@ -192,14 +231,68 @@ struct GlyphInfo } }; +// Goes into geometry buffer, needs to be aligned by 8 struct ImageObjectInfo { - pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) + uint32_t textureID; // 4 bytes (32) +}; + +// Goes into geometry buffer, needs to be aligned by 8 +// Currently a simple OBB like ImageObject, but later will be fullscreen with additional info about UV offset for toroidal(mirror) addressing +struct GeoreferencedImageInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) float32_t2 dirU; // 2 * 4 = 8 bytes (24) float32_t aspectRatio; // 4 bytes (28) uint32_t textureID; // 4 bytes (32) }; +// Goes into geometry buffer, needs to be aligned by 8 +struct GridDTMInfo +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + pfloat64_t2 worldSpaceExtents; // 16 bytes (32) + uint32_t textureID; // 4 bytes (36) + float gridCellWidth; // 4 bytes (40) + float thicknessOfTheThickestLine; // 4 bytes (44) + float _padding; // 4 bytes (48) +}; + +enum E_CELL_DIAGONAL : uint32_t +{ + TOP_LEFT_TO_BOTTOM_RIGHT = 0u, + BOTTOM_LEFT_TO_TOP_RIGHT = 1u, + INVALID = 2u +}; + +#ifndef __HLSL_VERSION + +// sets last bit of data to 1 or 0 depending on diagonalMode +static void setDiagonalModeBit(float* data, E_CELL_DIAGONAL diagonalMode) +{ + if (diagonalMode == E_CELL_DIAGONAL::INVALID) + return; + + uint32_t dataAsUint = reinterpret_cast(*data); + constexpr uint32_t HEIGHT_VALUE_MASK = 0xFFFFFFFEu; + dataAsUint &= HEIGHT_VALUE_MASK; + dataAsUint |= static_cast(diagonalMode); + *data = reinterpret_cast(dataAsUint); + + uint32_t dataAsUintDbg = reinterpret_cast(*data); +} + +#endif + +// Top left corner holds diagonal mode info of a cell +static E_CELL_DIAGONAL getDiagonalModeFromCellCornerData(uint32_t cellCornerData) +{ + return (cellCornerData & 0x1u) ? BOTTOM_LEFT_TO_TOP_RIGHT : TOP_LEFT_TO_BOTTOM_RIGHT; +} + static uint32_t packR11G11B10_UNORM(float32_t3 color) { // Scale and convert to integers @@ -235,12 +328,13 @@ static float32_t3 unpackR11G11B10_UNORM(uint32_t packed) struct PolylineConnector { pfloat64_t2 circleCenter; - float32_t2 v; + float32_t2 v; // the vector from circle center to the intersection of the line ends, it's normalized such that the radius of the circle is equal to 1 float32_t cosAngleDifferenceHalf; float32_t _reserved_pad; }; // NOTE: Don't attempt to pack curveMin/Max to uints because of limited range of values, we need the logarithmic precision of floats (more precision near 0) +// Goes into geometry buffer, needs to be aligned by 8 struct CurveBox { // will get transformed in the vertex shader, and will be calculated on the cpu when generating these boxes @@ -262,9 +356,15 @@ NBL_CONSTEXPR uint32_t InvalidRigidSegmentIndex = 0xffffffff; NBL_CONSTEXPR float InvalidStyleStretchValue = nbl::hlsl::numeric_limits::infinity; -// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like LineStyle stipple patterns) and requires upper_bound to figure out the color based on height value. +// TODO[Przemek]: we will need something similar to LineStyles but related to heigh shading settings which is user customizable (like stipple patterns) and requires upper_bound to figure out the color based on height value. // We'll discuss that later or what it will be looking like and how it's gonna get passed to our shaders. +struct TriangleMeshVertex +{ + pfloat64_t2 pos; + pfloat64_t height; // TODO: can be of type float32_t instead +}; + // The color parameter is also used for styling non-curve objects such as text glyphs and hatches with solid color struct LineStyle { @@ -316,6 +416,73 @@ struct LineStyle } }; +enum E_DTM_MODE +{ + OUTLINE = 1 << 0, + CONTOUR = 1 << 1, + HEIGHT_SHADING = 1 << 2, +}; + +enum class E_HEIGHT_SHADING_MODE : uint32_t +{ + DISCRETE_VARIABLE_LENGTH_INTERVALS, + DISCRETE_FIXED_LENGTH_INTERVALS, + CONTINOUS_INTERVALS +}; + +struct DTMContourSettings +{ + uint32_t contourLineStyleIdx; // index into line styles + float contourLinesStartHeight; + float contourLinesEndHeight; + float contourLinesHeightInterval; +}; + +struct DTMHeightShadingSettings +{ + const static uint32_t HeightColorMapMaxEntries = 16u; + + // height-color map + float intervalLength; + float intervalIndexToHeightMultiplier; + int isCenteredShading; + + uint32_t heightColorEntryCount; + float heightColorMapHeights[HeightColorMapMaxEntries]; + float32_t4 heightColorMapColors[HeightColorMapMaxEntries]; + + E_HEIGHT_SHADING_MODE determineHeightShadingMode() + { + if (nbl::hlsl::isinf(intervalLength)) + return E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS; + if (intervalLength == 0.0f) + return E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS; + return E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS; + } +}; + +// Documentation and explanation of variables in DTMSettingsInfo +struct DTMSettings +{ + const static uint32_t MaxContourSettings = 8u; + + uint32_t mode; // E_DTM_MODE + + // outline + uint32_t outlineLineStyleIdx; + + // contour lines + uint32_t contourSettingsCount; + DTMContourSettings contourSettings[MaxContourSettings]; + + // height shading + DTMHeightShadingSettings heightShadingSettings; + + bool drawOutlineEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::OUTLINE) != 0u; } + bool drawContourEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::CONTOUR) != 0u; } + bool drawHeightShadingEnabled() NBL_CONST_MEMBER_FUNC { return (mode & E_DTM_MODE::HEIGHT_SHADING) != 0u; } +}; + #ifndef __HLSL_VERSION inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) { @@ -338,22 +505,118 @@ inline bool operator==(const LineStyle& lhs, const LineStyle& rhs) return isStipplePatternArrayEqual; } + +inline bool operator==(const DTMSettings& lhs, const DTMSettings& rhs) +{ + if (lhs.mode != rhs.mode) + return false; + + if (lhs.drawOutlineEnabled()) + { + if (lhs.outlineLineStyleIdx != rhs.outlineLineStyleIdx) + return false; + } + + if (lhs.drawContourEnabled()) + { + if (lhs.contourSettingsCount != rhs.contourSettingsCount) + return false; + if (memcmp(lhs.contourSettings, rhs.contourSettings, lhs.contourSettingsCount * sizeof(DTMContourSettings))) + return false; + } + + if (lhs.drawHeightShadingEnabled()) + { + if (lhs.heightShadingSettings.intervalLength != rhs.heightShadingSettings.intervalLength) + return false; + if (lhs.heightShadingSettings.intervalIndexToHeightMultiplier != rhs.heightShadingSettings.intervalIndexToHeightMultiplier) + return false; + if (lhs.heightShadingSettings.isCenteredShading != rhs.heightShadingSettings.isCenteredShading) + return false; + if (lhs.heightShadingSettings.heightColorEntryCount != rhs.heightShadingSettings.heightColorEntryCount) + return false; + + + if(memcmp(lhs.heightShadingSettings.heightColorMapHeights, rhs.heightShadingSettings.heightColorMapHeights, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float))) + return false; + if(memcmp(lhs.heightShadingSettings.heightColorMapColors, rhs.heightShadingSettings.heightColorMapColors, lhs.heightShadingSettings.heightColorEntryCount * sizeof(float32_t4))) + return false; + } + + return true; +} #endif +NBL_CONSTEXPR uint32_t ImagesBindingArraySize = 128; NBL_CONSTEXPR uint32_t MainObjectIdxBits = 24u; // It will be packed next to alpha in a texture NBL_CONSTEXPR uint32_t AlphaBits = 32u - MainObjectIdxBits; NBL_CONSTEXPR uint32_t MaxIndexableMainObjects = (1u << MainObjectIdxBits) - 1u; NBL_CONSTEXPR uint32_t InvalidStyleIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidDTMSettingsIdx = nbl::hlsl::numeric_limits::max; NBL_CONSTEXPR uint32_t InvalidMainObjectIdx = MaxIndexableMainObjects; -NBL_CONSTEXPR uint64_t InvalidClipProjectionAddress = nbl::hlsl::numeric_limits::max; -NBL_CONSTEXPR uint32_t InvalidTextureIdx = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomProjectionIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidCustomClipRectIndex = nbl::hlsl::numeric_limits::max; +NBL_CONSTEXPR uint32_t InvalidTextureIndex = nbl::hlsl::numeric_limits::max; + +// Hatches NBL_CONSTEXPR MajorAxis SelectedMajorAxis = MajorAxis::MAJOR_Y; -// TODO: get automatic version working on HLSL NBL_CONSTEXPR MajorAxis SelectedMinorAxis = MajorAxis::MAJOR_X; //(MajorAxis) (1 - (uint32_t) SelectedMajorAxis); + +// Text or MSDF Hatches NBL_CONSTEXPR float MSDFPixelRange = 4.0f; NBL_CONSTEXPR float MSDFPixelRangeHalf = MSDFPixelRange / 2.0f; -NBL_CONSTEXPR float MSDFSize = 32.0f; +NBL_CONSTEXPR float MSDFSize = 64.0f; NBL_CONSTEXPR uint32_t MSDFMips = 4; NBL_CONSTEXPR float HatchFillMSDFSceenSpaceSize = 8.0; +inline bool isInvalidGridDtmHeightValue(float value) +{ + return nbl::hlsl::isnan(value); +} + +// Used in CPU-side only for now +struct OrientedBoundingBox2D +{ + pfloat64_t2 topLeft; // 2 * 8 = 16 bytes (16) + float32_t2 dirU; // 2 * 4 = 8 bytes (24) + float32_t aspectRatio; // 4 bytes (28) +}; + +#ifdef __HLSL_VERSION +[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); + +LineStyle loadLineStyle(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.lineStyles + index * sizeof(LineStyle), 4u); +} +DTMSettings loadDTMSettings(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.dtmSettings + index * sizeof(DTMSettings), 4u); +} +pfloat64_t3x3 loadCustomProjection(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customProjections + index * sizeof(pfloat64_t3x3), 8u); +} +WorldClipRect loadCustomClipRect(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.customClipRects + index * sizeof(WorldClipRect), 8u); +} +MainObject loadMainObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.mainObjects + index * sizeof(MainObject), 4u); +} +DrawObject loadDrawObject(const uint32_t index) +{ + return vk::RawBufferLoad(globals.pointers.drawObjects + index * sizeof(DrawObject), 8u); +} +#else +static_assert(alignof(LineStyle)==4u); +static_assert(alignof(DTMSettings)==4u); +static_assert(alignof(pfloat64_t3x3)==8u); +static_assert(alignof(WorldClipRect)==8u); +static_assert(alignof(MainObject)==4u); +static_assert(alignof(DrawObject)==8u); +#endif + + #endif diff --git a/62_CAD/shaders/main_pipeline/common.hlsl b/62_CAD/shaders/main_pipeline/common.hlsl index 17c851a19..fc80c67f8 100644 --- a/62_CAD/shaders/main_pipeline/common.hlsl +++ b/62_CAD/shaders/main_pipeline/common.hlsl @@ -3,6 +3,15 @@ #include "../globals.hlsl" +// This function soley exists to match n4ce's behaviour, colors and color operations for DTMs, Curves, Lines, Hatches are done in linear space and then outputted to linear surface (as if surface had UNORM format, but ours is SRGB) +// We should do gamma "uncorrection" to account for the fact that our surface format is SRGB and will do gamma correction +void gammaUncorrect(inout float3 col) +{ + bool outputToSRGB = true; // TODO + float gamma = (outputToSRGB) ? 2.2f : 1.0f; + col.rgb = pow(col.rgb, gamma); +} + // TODO: Use these in C++ as well once numeric_limits compiles on C++ float32_t2 unpackCurveBoxUnorm(uint32_t2 value) { @@ -66,14 +75,22 @@ struct PrecomputedRootFinder // As always try to reuse parameters and try not to introduce new ones struct PSInput { - float4 position : SV_Position; - float4 clip : SV_ClipDistance; - [[vk::location(0)]] nointerpolation uint4 data1 : COLOR1; - [[vk::location(1)]] nointerpolation float4 data2 : COLOR2; - [[vk::location(2)]] nointerpolation float4 data3 : COLOR3; - [[vk::location(3)]] nointerpolation float4 data4 : COLOR4; + [[vk::location(0)]] float4 position : SV_Position; + [[vk::location(1)]] float4 clip : SV_ClipDistance; + + [[vk::location(2)]] nointerpolation uint4 data1 : COLOR1; + [[vk::location(3)]] nointerpolation float4 data2 : COLOR2; + [[vk::location(4)]] nointerpolation float4 data3 : COLOR3; + [[vk::location(5)]] nointerpolation float4 data4 : COLOR4; // Data segments that need interpolation, mostly for hatches - [[vk::location(5)]] float2 interp_data5 : COLOR5; + [[vk::location(6)]] float4 interp_data5 : COLOR5; + [[vk::location(7)]] nointerpolation float data6 : COLOR6; + +#ifdef FRAGMENT_SHADER_INPUT + [[vk::location(8)]] [[vk::ext_decorate(/*spv::DecoratePerVertexKHR*/5285)]] float3 vertexScreenSpacePos[3] : COLOR7; +#else + [[vk::location(8)]] float3 vertexScreenSpacePos : COLOR7; +#endif // ArcLenCalculator // Set functions used in vshader, get functions used in fshader @@ -96,9 +113,6 @@ struct PSInput void setCurrentPhaseShift(float phaseShift) { interp_data5.x = phaseShift; } float getCurrentPhaseShift() { return interp_data5.x; } - void setCurrentWorldToScreenRatio(float worldToScreen) { interp_data5.y = worldToScreen; } - float getCurrentWorldToScreenRatio() { return interp_data5.y; } - /* LINE */ float2 getLineStart() { return data2.xy; } float2 getLineEnd() { return data2.zw; } @@ -208,19 +222,41 @@ struct PSInput void setImageUV(float2 uv) { interp_data5.xy = uv; } void setImageTextureId(uint32_t textureId) { data2.x = asfloat(textureId); } + + /* TRIANGLE MESH */ + +#ifndef FRAGMENT_SHADER_INPUT // vertex shader + void setScreenSpaceVertexAttribs(float3 pos) { vertexScreenSpacePos = pos; } +#else // fragment shader + float3 getScreenSpaceVertexAttribs(uint32_t vertexIndex) { return vertexScreenSpacePos[vertexIndex]; } +#endif + + /* GRID DTM */ + uint getGridDTMHeightTextureID() { return data1.z; } + float2 getGridDTMScreenSpaceGridExtents() { return data2.xy; } + float getGridDTMScreenSpaceCellWidth() { return data2.z; } + + void setGridDTMHeightTextureID(uint textureID) { data1.z = textureID; } + void setGridDTMScreenSpaceGridExtents(float2 screenSpaceGridExtends) { data2.xy = screenSpaceGridExtends; } + void setGridDTMScreenSpaceCellWidth(float screenSpaceGridWidth) { data2.z = screenSpaceGridWidth; } + + void setCurrentWorldToScreenRatio(float worldToScreen) { data6.x = worldToScreen; } + float getCurrentWorldToScreenRatio() { return data6.x; } + }; // Set 0 - Scene Data and Globals, buffer bindings don't change the buffers only get updated -[[vk::binding(0, 0)]] ConstantBuffer globals : register(b0); -[[vk::binding(1, 0)]] StructuredBuffer drawObjects : register(t0); -[[vk::binding(2, 0)]] StructuredBuffer mainObjects : register(t1); -[[vk::binding(3, 0)]] StructuredBuffer lineStyles : register(t2); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] Texture2DArray msdfTextures : register(t3); -[[vk::combinedImageSampler]][[vk::binding(4, 0)]] SamplerState msdfSampler : register(s3); +// [[vk::binding(0, 0)]] ConstantBuffer globals; ---> moved to globals.hlsl + +[[vk::push_constant]] PushConstants pc; + +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] Texture2DArray msdfTextures : register(t4); +[[vk::combinedImageSampler]][[vk::binding(1, 0)]] SamplerState msdfSampler : register(s4); -[[vk::binding(5, 0)]] SamplerState textureSampler : register(s4); -[[vk::binding(6, 0)]] Texture2D textures[128] : register(t4); +[[vk::binding(2, 0)]] SamplerState textureSampler : register(s5); +[[vk::binding(3, 0)]] Texture2D textures[ImagesBindingArraySize] : register(t5); +[[vk::binding(3, 0)]] Texture2D texturesU32[ImagesBindingArraySize] : register(t5); // Set 1 - Window dependant data which has higher update frequency due to multiple windows and resize need image recreation and descriptor writes [[vk::binding(0, 1)]] globallycoherent RWTexture2D pseudoStencil : register(u0); diff --git a/62_CAD/shaders/main_pipeline/dtm.hlsl b/62_CAD/shaders/main_pipeline/dtm.hlsl new file mode 100644 index 000000000..749d0fd6f --- /dev/null +++ b/62_CAD/shaders/main_pipeline/dtm.hlsl @@ -0,0 +1,524 @@ +#ifndef _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_DTM_HLSL_INCLUDED_ + +#include "line_style.hlsl" + +namespace dtm +{ + +// for usage in upper_bound function +struct DTMSettingsHeightsAccessor +{ + DTMHeightShadingSettings settings; + using value_type = float; + + float operator[](const uint32_t ix) + { + return settings.heightColorMapHeights[ix]; + } +}; + +float dot2(in float2 vec) +{ + return dot(vec, vec); +} + +struct HeightSegmentTransitionData +{ + float currentHeight; + float4 currentSegmentColor; + float boundaryHeight; + float4 otherSegmentColor; +}; + +// This function interpolates between the current and nearest segment colors based on the +// screen-space distance to the segment boundary. The result is a smoothly blended color +// useful for visualizing discrete height levels without harsh edges. +float4 smoothHeightSegmentTransition(in HeightSegmentTransitionData transitionInfo, in float heightDeriv) +{ + float pxDistanceToNearestSegment = abs((transitionInfo.currentHeight - transitionInfo.boundaryHeight) / heightDeriv); + float nearestSegmentColorCoverage = smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, pxDistanceToNearestSegment); + float4 localHeightColor = lerp(transitionInfo.otherSegmentColor, transitionInfo.currentSegmentColor, nearestSegmentColorCoverage); + return localHeightColor; +} + +// Computes the continuous position of a height value within uniform intervals. +// flooring this value will give the interval index +// +// If `isCenteredShading` is true, the intervals are centered around `minHeight`, meaning the +// first interval spans [minHeight - intervalLength / 2.0, minHeight + intervalLength / 2.0]. +// Otherwise, intervals are aligned from `minHeight` upward, so the first interval spans +// [minHeight, minHeight + intervalLength]. +// +// Parameters: +// - height: The height value to classify. +// - minHeight: The reference starting height for interval calculation. +// - intervalLength: The length of each interval segment. +// - isCenteredShading: Whether to center the shading intervals around minHeight. +// +// Returns: +// - A float representing the continuous position within the interval grid. +float getIntervalPosition(in float height, in float minHeight, in float intervalLength, in bool isCenteredShading) +{ + if (isCenteredShading) + return ((height - minHeight) / intervalLength + 0.5f); + else + return ((height - minHeight) / intervalLength); +} + +void getIntervalHeightAndColor(in int intervalIndex, in DTMHeightShadingSettings settings, out float4 outIntervalColor, out float outIntervalHeight) +{ + float minShadingHeight = settings.heightColorMapHeights[0]; + float heightForColor = minShadingHeight + float(intervalIndex) * settings.intervalIndexToHeightMultiplier; + + if (settings.isCenteredShading) + outIntervalHeight = minShadingHeight + (float(intervalIndex) - 0.5) * settings.intervalLength; + else + outIntervalHeight = minShadingHeight + (float(intervalIndex)) * settings.intervalLength; + + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0, settings.heightColorEntryCount, heightForColor), settings.heightColorEntryCount - 1u); + int32_t lowerBoundHeightIndex = max(upperBoundHeightIndex - 1, 0); + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + if (upperBoundHeight == lowerBoundHeight) + { + outIntervalColor = upperBoundColor; + } + else + { + float interpolationVal = (heightForColor - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + outIntervalColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + } +} + +float3 calculateDTMTriangleBarycentrics(in float2 v1, in float2 v2, in float2 v3, in float2 p) +{ + float denom = (v2.x - v1.x) * (v3.y - v1.y) - (v3.x - v1.x) * (v2.y - v1.y); + float u = ((v2.y - v3.y) * (p.x - v3.x) + (v3.x - v2.x) * (p.y - v3.y)) / denom; + float v = ((v3.y - v1.y) * (p.x - v3.x) + (v1.x - v3.x) * (p.y - v3.y)) / denom; + float w = 1.0 - u - v; + return float3(u, v, w); +} + +float4 calculateDTMHeightColor(in DTMHeightShadingSettings settings, in float3 triangleVertices[3], in float heightDeriv, in float2 fragPos, in float height) +{ + float4 outputColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + // HEIGHT SHADING + const uint32_t heightMapSize = settings.heightColorEntryCount; + float minShadingHeight = settings.heightColorMapHeights[0]; + float maxShadingHeight = settings.heightColorMapHeights[heightMapSize - 1]; + + if (heightMapSize > 0) + { + // Do the triangle SDF: + float2 e0 = (triangleVertices[1] - triangleVertices[0]).xy; + float2 e1 = (triangleVertices[2] - triangleVertices[1]).xy; + float2 e2 = (triangleVertices[0] - triangleVertices[2]).xy; + + float2 v0 = fragPos - triangleVertices[0].xy; + float2 v1 = fragPos - triangleVertices[1].xy; + float2 v2 = fragPos - triangleVertices[2].xy; + + float distanceToLine0 = dot2(v0 - e0 * clamp(dot(v0, e0) / dot(e0, e0), 0.0, 1.0)); + float distanceToLine1 = dot2(v1 - e1 * clamp(dot(v1, e1) / dot(e1, e1), 0.0, 1.0)); + float distanceToLine2 = dot2(v2 - e2 * clamp(dot(v2, e2) / dot(e2, e2), 0.0, 1.0)); + + // TODO[Optization]: We can get the sign (whether inside or outside the triangle) from the barycentric coords we already compute outside this func + // So we can skip this part which tries to figure out which side of each triangle edge line the fragPos relies on + float o = e0.x * e2.y - e0.y * e2.x; + float2 d = min(min(float2(distanceToLine0, o * (v0.x * e0.y - v0.y * e0.x)), + float2(distanceToLine1, o * (v1.x * e1.y - v1.y * e1.x))), + float2(distanceToLine2, o * (v2.x * e2.y - v2.y * e2.x))); + + float triangleSDF = -sqrt(d.x) * sign(d.y); + + // Intersect with the region between min and max height shading. + float minHeightShadingLine = (minShadingHeight - height) / heightDeriv; + float maxHeightShadingLine = (height - maxShadingHeight) / heightDeriv; + + float convexPolygonSdf = triangleSDF; + convexPolygonSdf = max(convexPolygonSdf, minHeightShadingLine); + convexPolygonSdf = max(convexPolygonSdf, maxHeightShadingLine); + outputColor.a = 1.0f - smoothstep(0.0f, globals.antiAliasingFactor + globals.antiAliasingFactor, convexPolygonSdf); + + // calculate height color + E_HEIGHT_SHADING_MODE mode = settings.determineHeightShadingMode(); + if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_VARIABLE_LENGTH_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + int upperBoundIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize, height), heightMapSize - 1u); + int mapIndex = max(upperBoundIndex - 1, 0); + int mapIndexPrev = max(mapIndex - 1, 0); + int mapIndexNext = min(mapIndex + 1, heightMapSize - 1); + + // logic explainer: if colorIdx is 0.0 then it means blend with next + // if color idx is >= length of the colours array then it means it's also > 0.0 and this blend with prev is true + // if color idx is > 0 and < len - 1, then it depends on the current pixel's height value and two closest height values + bool blendWithPrev = (mapIndex > 0) + && (mapIndex >= heightMapSize - 1 || (height * 2.0 < settings.heightColorMapHeights[upperBoundIndex] + settings.heightColorMapHeights[mapIndex])); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = settings.heightColorMapColors[mapIndex]; + transitionInfo.boundaryHeight = blendWithPrev ? settings.heightColorMapHeights[mapIndex] : settings.heightColorMapHeights[mapIndexNext]; + transitionInfo.otherSegmentColor = blendWithPrev ? settings.heightColorMapColors[mapIndexPrev] : settings.heightColorMapColors[mapIndexNext]; + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::DISCRETE_FIXED_LENGTH_INTERVALS) + { + float intervalPosition = getIntervalPosition(height, minShadingHeight, settings.intervalLength, settings.isCenteredShading); + float positionWithinInterval = frac(intervalPosition); + int intervalIndex = nbl::hlsl::_static_cast(intervalPosition); + + float4 currentIntervalColor; + float currentIntervalHeight; + getIntervalHeightAndColor(intervalIndex, settings, currentIntervalColor, currentIntervalHeight); + + bool blendWithPrev = (positionWithinInterval < 0.5f); + + HeightSegmentTransitionData transitionInfo; + transitionInfo.currentHeight = height; + transitionInfo.currentSegmentColor = currentIntervalColor; + if (blendWithPrev) + { + int prevIntervalIdx = max(intervalIndex - 1, 0); + float prevIntervalHeight; // unused, the currentIntervalHeight is the boundary height between current and prev + getIntervalHeightAndColor(prevIntervalIdx, settings, transitionInfo.otherSegmentColor, prevIntervalHeight); + transitionInfo.boundaryHeight = currentIntervalHeight; + } + else + { + int nextIntervalIdx = intervalIndex + 1; + getIntervalHeightAndColor(nextIntervalIdx, settings, transitionInfo.otherSegmentColor, transitionInfo.boundaryHeight); + } + + float4 localHeightColor = smoothHeightSegmentTransition(transitionInfo, heightDeriv); + outputColor.rgb = localHeightColor.rgb; + outputColor.a *= localHeightColor.a; + } + else if (mode == E_HEIGHT_SHADING_MODE::CONTINOUS_INTERVALS) + { + DTMSettingsHeightsAccessor dtmHeightsAccessor = { settings }; + uint32_t upperBoundHeightIndex = min(nbl::hlsl::upper_bound(dtmHeightsAccessor, 0u, heightMapSize - 1u, height), heightMapSize - 1u); + uint32_t lowerBoundHeightIndex = upperBoundHeightIndex == 0 ? upperBoundHeightIndex : upperBoundHeightIndex - 1; + + float upperBoundHeight = settings.heightColorMapHeights[upperBoundHeightIndex]; + float lowerBoundHeight = settings.heightColorMapHeights[lowerBoundHeightIndex]; + + float4 upperBoundColor = settings.heightColorMapColors[upperBoundHeightIndex]; + float4 lowerBoundColor = settings.heightColorMapColors[lowerBoundHeightIndex]; + + float interpolationVal; + if (upperBoundHeightIndex == 0) + interpolationVal = 1.0f; + else + interpolationVal = (height - lowerBoundHeight) / (upperBoundHeight - lowerBoundHeight); + + float4 localHeightColor = lerp(lowerBoundColor, upperBoundColor, interpolationVal); + + outputColor.a *= localHeightColor.a; + outputColor.rgb = localHeightColor.rgb * outputColor.a + outputColor.rgb * (1.0f - outputColor.a); + } + } + + return outputColor; +} + +float calculateDTMContourSDF(in DTMContourSettings contourSettings, in LineStyle contourStyle, in float worldToScreenRatio, in float3 v[3], in float2 fragPos, in float height) +{ + float distance = nbl::hlsl::numeric_limits::max; + const float contourThickness = (contourStyle.screenSpaceLineWidth + contourStyle.worldSpaceLineWidth / worldToScreenRatio) * 0.5f; + const float stretch = 1.0f; + const float phaseShift = 0.0f; + + const float startHeight = contourSettings.contourLinesStartHeight; + const float endHeight = contourSettings.contourLinesEndHeight; + const float interval = contourSettings.contourLinesHeightInterval; + const int maxContourLineIdx = (endHeight - startHeight) / interval; + + // TODO: it actually can output a negative number, fix + int contourLineIdx = nbl::hlsl::_static_cast((height - startHeight) / interval + 0.5f); + contourLineIdx = clamp(contourLineIdx, 0, maxContourLineIdx); + float contourLineHeight = startHeight + interval * contourLineIdx; + + + // Sort so that v[0].z >= v[1].z >= v[2].z + if (v[0].z < v[1].z) + nbl::hlsl::swap(v[0], v[1]); + if (v[0].z < v[2].z) + nbl::hlsl::swap(v[0], v[2]); + if (v[1].z < v[2].z) + nbl::hlsl::swap(v[1], v[2]); + + int contourLinePointsIdx = 0; + float2 contourLinePoints[2]; + for (int i = 0; i < 3; ++i) + { + if (contourLinePointsIdx == 2) + break; + + int minvIdx = 0; + int maxvIdx = 0; + + if (i == 0) { minvIdx = 2; maxvIdx = 0; } + if (i == 1) { minvIdx = 1; maxvIdx = 0; } + if (i == 2) { minvIdx = 2; maxvIdx = 1; } + + float3 minV = v[minvIdx]; + float3 maxV = v[maxvIdx]; + + if (contourLineHeight >= minV.z && contourLineHeight <= maxV.z) + { + float interpolationVal = (contourLineHeight - minV.z) / (maxV.z - minV.z); + contourLinePoints[contourLinePointsIdx] = lerp(minV.xy, maxV.xy, clamp(interpolationVal, 0.0f, 1.0f)); + ++contourLinePointsIdx; + } + } + + if (contourLinePointsIdx == 2) + { + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(contourLinePoints[0], contourLinePoints[1]); + + if (!contourStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag); + } + else + { + // TODO: + // It might be beneficial to calculate distance between pixel and contour line to early out some pixels and save yourself from stipple sdf computations! + // where you only compute the complex sdf if abs((height - contourVal) / heightDeriv) <= aaFactor + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(contourStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, contourThickness, contourStyle.isRoadStyleFlag, clipper); + } + } + + return distance; +} + +float4 calculateDTMOutlineColor(in uint outlineLineStyleIdx, in float worldToScreenRatio, in float3 v[3], in float2 fragPos) +{ + float4 outputColor; + + LineStyle outlineStyle = loadLineStyle(outlineLineStyleIdx); + const float outlineThickness = (outlineStyle.screenSpaceLineWidth + outlineStyle.worldSpaceLineWidth / worldToScreenRatio) * 0.5f; + const float phaseShift = 0.0f; // input.getCurrentPhaseShift(); + const float stretch = 1.0f; + + // index of vertex opposing an edge, needed for calculation of triangle heights + uint opposingVertexIdx[3]; + opposingVertexIdx[0] = 2; + opposingVertexIdx[1] = 0; + opposingVertexIdx[2] = 1; + + float minDistance = nbl::hlsl::numeric_limits::max; + if (!outlineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag); + + minDistance = min(minDistance, distance); + } + } + else + { + for (int i = 0; i < 3; ++i) + { + float3 p0 = v[i]; + float3 p1 = v[(i + 1) % 3]; + + // long story short, in order for stipple patterns to be consistent: + // - point with lesser x coord should be starting point + // - if x coord of both points are equal then point with lesser y value should be starting point + if (p1.x < p0.x) + nbl::hlsl::swap(p0, p1); + else if (p1.x == p0.x && p1.y < p0.y) + nbl::hlsl::swap(p0, p1); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(float2(p0.x, p0.y), float2(p1.x, p1.y)); + + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(outlineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, outlineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + } + + outputColor.a = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, minDistance); + outputColor.a *= outlineStyle.color.a; + outputColor.rgb = outlineStyle.color.rgb; + + return outputColor; +} + +// TODO: +// It's literally sdf with a line shape +// so it should be moved somewhere else and used for every line maybe +float calculateLineSDF(in LineStyle lineStyle, in float worldToScreenRatio, in nbl::hlsl::shapes::Line lineSegment, in float2 fragPos, in float phaseShift) +{ + const float outlineThickness = (lineStyle.screenSpaceLineWidth + lineStyle.worldSpaceLineWidth / worldToScreenRatio) * 0.5f; + const float stretch = 1.0f; + + float minDistance = nbl::hlsl::numeric_limits::max; + if (!lineStyle.hasStipples() || stretch == InvalidStyleStretchValue) + { + float distance = nbl::hlsl::numeric_limits::max; + distance = ClippedSignedDistance >::sdf(lineSegment, fragPos, outlineThickness, lineStyle.isRoadStyleFlag); + minDistance = min(minDistance, distance); + } + else + { + float distance = nbl::hlsl::numeric_limits::max; + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(lineStyle, lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, fragPos, outlineThickness, lineStyle.isRoadStyleFlag, clipper); + + minDistance = min(minDistance, distance); + } + + return minDistance; +} + +float4 blendUnder(in float4 dstColor, in float4 srcColor) +{ + dstColor.rgb = dstColor.rgb + (1 - dstColor.a) * srcColor.a * srcColor.rgb; + dstColor.a = (1.0f - srcColor.a) * dstColor.a + srcColor.a; + + return dstColor; +} + +E_CELL_DIAGONAL resolveGridDTMCellDiagonal(in uint32_t4 cellData) +{ + float4 cellHeights = asfloat(cellData); + + const bool4 invalidHeights = bool4( + isInvalidGridDtmHeightValue(cellHeights.x), + isInvalidGridDtmHeightValue(cellHeights.y), + isInvalidGridDtmHeightValue(cellHeights.z), + isInvalidGridDtmHeightValue(cellHeights.w) + ); + + int invalidHeightsCount = 0; + for (int i = 0; i < 4; ++i) + invalidHeightsCount += int(invalidHeights[i]); + + if (invalidHeightsCount == 0) + return getDiagonalModeFromCellCornerData(cellData.w); + + if (invalidHeightsCount > 1) + return INVALID; + + if (invalidHeights.x || invalidHeights.z) + return TOP_LEFT_TO_BOTTOM_RIGHT; + else if (invalidHeights.y || invalidHeights.w) + return BOTTOM_LEFT_TO_TOP_RIGHT; + + return INVALID; +} + +struct GridDTMTriangle +{ + float3 vertices[3]; +}; + +/** +* grid consists of square cells and cells are divided into two triangles: +* depending on mode it is +* either: or: +* v2a-------v1 v0-------v2b +* | A / | | \ B | +* | / | | \ | +* | / B | | A \ | +* v0-------v2b v2a-------v1 +*/ +struct GridDTMCell +{ + GridDTMTriangle triangleA; + GridDTMTriangle triangleB; + bool validA; + bool validB; +}; + +struct GridDTMHeightMapData +{ + // heihts.x - bottom left texel + // heihts.y - bottom right texel + // heihts.z - top right texel + // heihts.w - top left texel + float4 heights; + E_CELL_DIAGONAL cellDiagonal; +}; + +GridDTMHeightMapData retrieveGridDTMCellDataFromHeightMap(in float2 gridDimensions, in float2 cellCoords, in Texture2D heightMap) +{ + GridDTMHeightMapData output; + + const float2 location = (cellCoords + float2(0.5f, 0.5f)) / gridDimensions; + uint32_t4 cellData = heightMap.Gather(textureSampler, float2(location.x, location.y), 0); + + // printf("%u %u %u %u", cellData.x, cellData.y, cellData.z, cellData.w); + + output.heights = asfloat(cellData); + output.cellDiagonal = dtm::resolveGridDTMCellDiagonal(cellData); + return output; +} + +GridDTMCell calculateCellTriangles(in dtm::GridDTMHeightMapData heightData, in float2 cellCoords, const float cellWidth) +{ + GridDTMCell output; + + // heightData.heihts.x - bottom left texel + // heightData.heihts.y - bottom right texel + // heightData.heihts.z - top right texel + // heightData.heihts.w - top left texel + float2 gridSpaceCellTopLeftCoords = cellCoords * cellWidth; + + if (heightData.cellDiagonal == E_CELL_DIAGONAL::TOP_LEFT_TO_BOTTOM_RIGHT) + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + } + else + { + output.triangleA.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleA.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleA.vertices[2] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y, heightData.heights.w); + + output.triangleB.vertices[0] = float3(gridSpaceCellTopLeftCoords.x, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.x); + output.triangleB.vertices[1] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y, heightData.heights.z); + output.triangleB.vertices[2] = float3(gridSpaceCellTopLeftCoords.x + cellWidth, gridSpaceCellTopLeftCoords.y + cellWidth, heightData.heights.y); + } + + output.validA = !isInvalidGridDtmHeightValue(output.triangleA.vertices[0].z) && !isInvalidGridDtmHeightValue(output.triangleA.vertices[1].z) && !isInvalidGridDtmHeightValue(output.triangleA.vertices[2].z); + output.validB = !isInvalidGridDtmHeightValue(output.triangleB.vertices[0].z) && !isInvalidGridDtmHeightValue(output.triangleB.vertices[1].z) && !isInvalidGridDtmHeightValue(output.triangleB.vertices[2].z); + + return output; +} + +} + +#endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl index e850622c3..1783cb145 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader.hlsl @@ -1,655 +1,707 @@ -#include "common.hlsl" -#include -#include -#include -#include -#include -#include -#include -#include - -template -struct DefaultClipper -{ - using float_t2 = vector; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; - - static DefaultClipper construct() - { - DefaultClipper ret; - return ret; - } - - inline float_t2 operator()(const float_t t) - { - const float_t ret = clamp(t, 0.0, 1.0); - return float_t2(ret, ret); - } -}; - -// for usage in upper_bound function -struct StyleAccessor -{ - LineStyle style; - using value_type = float; - - float operator[](const uint32_t ix) - { - return style.getStippleValue(ix); - } -}; - -template -struct StyleClipper -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; - - static StyleClipper construct( - LineStyle style, - CurveType curve, - typename CurveType::ArcLengthCalculator arcLenCalc, - float phaseShift, - float stretch, - float worldToScreenRatio) - { - StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; - - // values for non-uniform stretching with a rigid segment - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - // rigidSegment info in old non stretched pattern - ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; - ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; - ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; - // stretch value for non rigid segments - ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); - // rigidSegment info to new stretched pattern - ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start - ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len - ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end - } - else - { - ret.nonRigidSegmentStretchValue = stretch; - } - - return ret; - } - - // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes - // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern - float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) - { - if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) - { - float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment - ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment - ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment - ret *= stretch; - return ret; - } - else - { - return normalizedPlaceInPattern; - } - } - - float_t2 operator()(float_t t) - { - // basicaly 0.0 and 1.0 but with a guardband to discard outside the range - const float_t minT = 0.0 - 1.0; - const float_t maxT = 1.0 + 1.0; - - StyleAccessor styleAccessor = { style }; - const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; - const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); - - const float_t arcLen = arcLenCalc.calcArcLen(t); - const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); - float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); - uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); - - const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; - float_t2 ret = float_t2(InvalidT, InvalidT); - - // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections - const bool notInDrawSection = patternIdx & 0x1; - - // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections - float_t minDrawT = 0.0; - float_t maxDrawT = 1.0; - { - float_t normalizedPlaceInPatternBegin = frac(phaseShift); - normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); - uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); - const bool BeginInNonDrawSection = patternIdxBegin & 0x1; - - if (BeginInNonDrawSection) - { - float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; - diffToRightDrawableSection -= normalizedPlaceInPatternBegin; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; - minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); - } - - // Completely in non-draw section -> clip away: - if (minDrawT >= 1.0) - return ret; - - const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); - const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); - float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); - normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); - uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); - const bool EndInNonDrawSection = patternIdxEnd & 0x1; - - if (EndInNonDrawSection) - { - float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; - maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); - } - } - - if (notInDrawSection) - { - float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); - - float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; - diffToLeftDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; - const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; - float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); - t0 = clamp(t0, minDrawT, maxDrawT); - - float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; - diffToRightDrawableSection -= normalizedPlaceInPattern; - float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; - const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; - float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); - t1 = clamp(t1, minDrawT, maxDrawT); - - ret = float_t2(t0, t1); - } - else - { - t = clamp(t, minDrawT, maxDrawT); - ret = float_t2(t, t); - } - - return ret; - } - - LineStyle style; - CurveType curve; - typename CurveType::ArcLengthCalculator arcLenCalc; - float phaseShift; - float stretch; - float worldToScreenRatio; - // precomp value for non uniform stretching - float rigidSegmentStart; - float rigidSegmentEnd; - float rigidSegmentLen; - float nonRigidSegmentStretchValue; -}; - -template > -struct ClippedSignedDistance -{ - using float_t = typename CurveType::scalar_t; - using float_t2 = typename CurveType::float_t2; - using float_t3 = typename CurveType::float_t3; - - const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) - { - typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); - - const float_t InvalidT = nbl::hlsl::numeric_limits::max; - // TODO: Fix and test, we're not working with squared distance anymore - const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? - - bool clipped = false; - float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; - float_t closestT = InvalidT; - [[unroll(CurveType::MaxCandidates)]] - for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) - { - const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); - if (candidateDistanceSquared < closestDistanceSquared) - { - float_t2 snappedTs = clipper(candidates[i]); - - if (snappedTs[0] == InvalidT) - { - continue; - } - - if (snappedTs[0] != candidates[i]) - { - // left snapped or clamped - const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); - if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[0]; - closestDistanceSquared = leftSnappedCandidateDistanceSquared; - } - - if (snappedTs[0] != snappedTs[1]) - { - // right snapped or clamped - const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); - if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) - { - clipped = true; - closestT = snappedTs[1]; - closestDistanceSquared = rightSnappedCandidateDistanceSquared; - } - } - } - else - { - // no snapping - if (candidateDistanceSquared < closestDistanceSquared) - { - clipped = false; - closestT = candidates[i]; - closestDistanceSquared = candidateDistanceSquared; - } - } - } - } - - - float_t roundedDistance = closestDistanceSquared - thickness; - if(!isRoadStyle) - { - return roundedDistance; - } - else - { - const float_t aaWidth = globals.antiAliasingFactor; - float_t rectCappedDistance = roundedDistance; - - if (clipped) - { - float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); - rectCappedDistance = capSquare(q, thickness, aaWidth); - } - - return rectCappedDistance; - } - } - - static float capSquare(float_t2 q, float_t th, float_t aaWidth) - { - float_t2 d = abs(q) - float_t2(aaWidth, th); - return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); - } -}; - -// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ -float sdTrapezoid(float2 p, float r1, float r2, float he) -{ - float2 k1 = float2(r2, he); - float2 k2 = float2(r2 - r1, 2.0 * he); - - p.x = abs(p.x); - float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); - float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); - - float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; - - return s * sqrt(min(dot(ca,ca), dot(cb,cb))); -} - -// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries -float2 sdLineDstVec(float2 P, float2 A, float2 B) -{ - const float2 PA = P - A; - const float2 BA = B - A; - float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); - return PA - BA * h; -} - -float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) -{ - float h = length(b - a) / 2.0; - float2 d = normalize(b - a); - float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); - p = mul(rot, p); - p.y -= h - thickness; - return sdTrapezoid(p, ra, rb, h); -} - -typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; -typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; - -// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm -// because there is no `if constexpr` in hlsl -// @params -// textureColor: color sampled from a texture -// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. -template -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); - -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - uint32_t styleIdx = mainObjects[currentMainObjectIdx].styleIdx; - if (!colorFromTexture) - { - float32_t4 col = lineStyles[styleIdx].color; - col.w *= localAlpha; - return float4(col); - } - else - return float4(localTextureColor, localAlpha); -} -template<> -float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) -{ - float32_t4 color; - nbl::hlsl::spirv::beginInvocationInterlockEXT(); - - const uint32_t packedData = pseudoStencil[fragCoord]; - - const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); - const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); - const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored - const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; - - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) - { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); - } - - // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. - // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. - if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) - { - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); - if (colorFromTexture) // writing color from texture - colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); - } - - nbl::hlsl::spirv::endInvocationInterlockEXT(); - - if (!resolve) - discard; - - // draw with previous geometry's style's color or stored in texture buffer :kek: - // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj - if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; - color.a *= float(storedQuantizedAlpha) / 255.f; - - return color; -} - -[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] -[shader("pixel")] -float4 fragMain(PSInput input) : SV_TARGET -{ - float localAlpha = 0.0f; - float3 textureColor = float3(0, 0, 0); // color sampled from a texture - - // TODO[Przemek]: Disable All the object rendering paths if you want. - ObjectType objType = input.getObjType(); - const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); - const MainObject mainObj = mainObjects[currentMainObjectIdx]; - - // figure out local alpha with sdf - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) - { - float distance = nbl::hlsl::numeric_limits::max; - if (objType == ObjectType::LINE) - { - const float2 start = input.getLineStart(); - const float2 end = input.getLineEnd(); - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); - nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); - - LineStyle style = lineStyles[styleIdx]; - - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - LineStyleClipper clipper = LineStyleClipper::construct(lineStyles[styleIdx], lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::QUAD_BEZIER) - { - nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); - nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); - - const uint32_t styleIdx = mainObj.styleIdx; - const float thickness = input.getLineThickness(); - const float phaseShift = input.getCurrentPhaseShift(); - const float stretch = input.getPatternStretch(); - const float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); - - LineStyle style = lineStyles[styleIdx]; - if (!style.hasStipples() || stretch == InvalidStyleStretchValue) - { - distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); - } - else - { - BezierStyleClipper clipper = BezierStyleClipper::construct(lineStyles[styleIdx], quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio); - distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); - } - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); - distance = miterSDF( - P, - input.getLineThickness(), - input.getPolylineConnectorTrapezoidStart(), - input.getPolylineConnectorTrapezoidEnd(), - input.getPolylineConnectorTrapezoidLongBase(), - input.getPolylineConnectorTrapezoidShortBase()); - - } - localAlpha = smoothstep(+globals.antiAliasingFactor, -globals.antiAliasingFactor, distance); - } - else if (objType == ObjectType::CURVE_BOX) - { - const float minorBBoxUV = input.getMinorBBoxUV(); - const float majorBBoxUV = input.getMajorBBoxUV(); - - nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); - nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); - nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); - nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); - - // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. - nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); - - const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); - const float minEv = curveMinMinor.evaluate(minT); - - const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); - const float maxEv = curveMaxMinor.evaluate(maxT); - - const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; - const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; - - if (insideMinor && insideMajor) - { - localAlpha = 1.0; - } - else - { - // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) - // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor - const float InvalidT = nbl::hlsl::numeric_limits::max; - const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; - - const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); - - - float closestDistanceSquared = MAX_DISTANCE_SQUARED; - const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; - - if (minorBBoxUV < minEv) - { - // DO SDF of Min Curve - nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, - float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, - float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); - - nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = minCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - else if (minorBBoxUV > maxEv) - { - // Do SDF of Max Curve - nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( - float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, - float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, - float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); - nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); - [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] - for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) - { - candidates[i] = clamp(candidates[i], 0.0, 1.0); - const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; - const float candidateDistanceSquared = dot(distVector, distVector); - if (candidateDistanceSquared < closestDistanceSquared) - closestDistanceSquared = candidateDistanceSquared; - } - } - - if (!insideMajor) - { - const bool minLessThanMax = minEv < maxEv; - float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); - if (majorBBoxUV > 1.0) - { - const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveEnd; - } - else - { - const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; - if (minLessThanMax) - majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); - else - majorDistVector = pos - minCurveStart; - } - - const float majorDistSq = dot(majorDistVector, majorDistVector); - if (majorDistSq < closestDistanceSquared) - closestDistanceSquared = majorDistSq; - } - - const float dist = sqrt(closestDistanceSquared); - localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); - } - - LineStyle style = lineStyles[mainObj.styleIdx]; - uint32_t textureId = asuint(style.screenSpaceLineWidth); - if (textureId != InvalidTextureIdx) - { - // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected - // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); - localAlpha *= smoothstep(+globals.antiAliasingFactor / 2.0, -globals.antiAliasingFactor / 2.0f, msdf); - } - } - else if (objType == ObjectType::FONT_GLYPH) - { - const float2 uv = input.getFontGlyphUV(); - const uint32_t textureId = input.getFontGlyphTextureId(); - - if (textureId != InvalidTextureIdx) - { - float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); - float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); - float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); - /* - explaining "*= exp2(max(mipLevel,0.0))" - Each mip level has constant MSDFPixelRange - Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip - As we go up 1 mip level, the msdf distance should be multiplied by 2.0 - While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. - It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! - - Alternatively you can think of it as doing this instead: - localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); - Which is reducing the aa feathering as we go up the mip levels. - to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph - */ - msdf *= exp2(max(mipLevel,0.0)); - - LineStyle style = lineStyles[mainObj.styleIdx]; - const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; - const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle - localAlpha = smoothstep(+globals.antiAliasingFactor / 2.0f + bolden, -globals.antiAliasingFactor / 2.0f + bolden, msdf); - } - } - else if (objType == ObjectType::IMAGE) - { - const float2 uv = input.getImageUV(); - const uint32_t textureId = input.getImageTextureId(); - - if (textureId != InvalidTextureIdx) - { - float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); - textureColor = colorSample.rgb; - localAlpha = colorSample.a; - } - } - - uint2 fragCoord = uint2(input.position.xy); - - if (localAlpha <= 0) - discard; - - const bool colorFromTexture = objType == ObjectType::IMAGE; - - // TODO[Przemek]: But make sure you're still calling this, correctly calculating alpha and texture color. - // you can add 1 main object and push via DrawResourcesFiller like we already do for other objects (this go in the mainObjects StorageBuffer) and then set the currentMainObjectIdx to 0 here - // having 1 main object temporarily means that all triangle meshes will be treated as a unified object in blending operations. - return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); -} +#define FRAGMENT_SHADER_INPUT +#include "common.hlsl" +#include "dtm.hlsl" +#include +#include +#include +#include +#include +#include +#include +//#include + +// sdf of Isosceles Trapezoid y-aligned by https://iquilezles.org/articles/distfunctions2d/ +// Trapezoid centered around origin (0,0), the top edge has length r2, the bottom edge has length r1, the height of the trapezoid is he*2.0 +float sdTrapezoid(float2 p, float r1, float r2, float he) +{ + float2 k1 = float2(r2, he); + float2 k2 = float2(r2 - r1, 2.0 * he); + + p.x = abs(p.x); + float2 ca = float2(max(0.0, p.x - ((p.y < 0.0) ? r1 : r2)), abs(p.y) - he); + float2 cb = p - k1 + k2 * clamp(dot(k1 - p, k2) / dot(k2,k2), 0.0, 1.0); + + float s = (cb.x < 0.0 && ca.y < 0.0) ? -1.0 : 1.0; + + return s * sqrt(min(dot(ca,ca), dot(cb,cb))); +} + +// line segment sdf which returns the distance vector specialized for usage in hatch box line boundaries +float2 sdLineDstVec(float2 P, float2 A, float2 B) +{ + const float2 PA = P - A; + const float2 BA = B - A; + float h = clamp(dot(PA, BA) / dot(BA, BA), 0.0, 1.0); + return PA - BA * h; +} + +/* + XXXXXXX b XXXXXX Long Base (len = rb) + X X + X X + X X + X XXXXXXXXXXX X + X XXXX | XXXX X + XXX | XXXX + XX | XX + XX | XX + XX | XX + XX T Trapz Center XX (2) p.y = 0 after p.y = p.y - halfHeight + radius + XX | XX + X X C Circle Center X (1) p = (0,0) at circle center + X X | X + X X | X X + X X | X X + X X | X X + X XX | XX X + X XXX | XXX X +X XXXX | XXXX X +XXXXXXXXXXXXXXXXXXXXXXXXX a XXXXXXXXXXXXXXXXXXXXX Short Base (len = ra) +*/ +// p is in circle's space (the circle centered at line intersection and radius = thickness) +// a and b are points at each trapezoid base (short and long base) +// TODO[Optimization] we can probably send less info, since we only use length of b-a and the normalize vector +float miterSDF(float2 p, float thickness, float2 a, float2 b, float ra, float rb) +{ + float halfHeight = length(b - a) / 2.0; + float2 d = normalize(b - a); + float2x2 rot = float2x2(d.y, -d.x, d.x, d.y); + p = mul(rot, p); // rotate(change of basis) such that the point is now in the space where trapezoid is y-axis aligned, see (1) above + p.y = p.y - halfHeight + thickness; // see (2) above + return sdTrapezoid(p, ra, rb, halfHeight); +} + +// We need to specialize color calculation based on FragmentShaderInterlock feature availability for our transparency algorithm +// because there is no `if constexpr` in hlsl +// @params +// textureColor: color sampled from a texture +// useStyleColor: instead of writing and reading from colorStorage, use main object Idx to find the style color for the object. +template +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 textureColor, bool colorFromTexture); + +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + uint32_t styleIdx = loadMainObject(currentMainObjectIdx).styleIdx; + if (!colorFromTexture) + { + float32_t4 col = loadLineStyle(styleIdx).color; + col.w *= localAlpha; + return float4(col); + } + else + return float4(localTextureColor, localAlpha); +} +template<> +float32_t4 calculateFinalColor(const uint2 fragCoord, const float localAlpha, const uint32_t currentMainObjectIdx, float3 localTextureColor, bool colorFromTexture) +{ + float32_t4 color; + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + const uint32_t packedData = pseudoStencil[fragCoord]; + + const uint32_t localQuantizedAlpha = (uint32_t)(localAlpha * 255.f); + const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); + const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + const bool differentMainObject = currentMainObjectIdx != storedMainObjectIdx; // meaning current pixel's main object is different than what is already stored + const bool resolve = differentMainObject && storedMainObjectIdx != InvalidMainObjectIdx; + uint32_t toResolveStyleIdx = InvalidStyleIdx; + + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + + // If current localAlpha is higher than what is already stored in pseudoStencil we will update the value in pseudoStencil or the color in colorStorage, this is equivalent to programmable blending MAX operation. + // OR If previous pixel has a different ID than current's (i.e. previous either empty/invalid or a differnet mainObject), we should update our alpha and color storages. + if (differentMainObject || localQuantizedAlpha > storedQuantizedAlpha) + { + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(localQuantizedAlpha,currentMainObjectIdx,AlphaBits,MainObjectIdxBits); + if (colorFromTexture) // writing color from texture + colorStorage[fragCoord] = packR11G11B10_UNORM(localTextureColor); + } + + nbl::hlsl::spirv::endInvocationInterlockEXT(); + + if (!resolve) + discard; + + // draw with previous geometry's style's color or stored in texture buffer :kek: + // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj + if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style + { + color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + + color.a *= float(storedQuantizedAlpha) / 255.f; + + return color; +} + +bool isLineValid(in nbl::hlsl::shapes::Line l) +{ + bool isAnyLineComponentNaN = any(bool4(isnan(l.P0.x), isnan(l.P0.y), isnan(l.P1.x), isnan(l.P1.y))); + if (isAnyLineComponentNaN) + return false; + return true; +} + +[[vk::spvexecutionmode(spv::ExecutionModePixelInterlockOrderedEXT)]] +[shader("pixel")] +float4 fragMain(PSInput input) : SV_TARGET +{ + float localAlpha = 0.0f; + float3 textureColor = float3(0, 0, 0); // color sampled from a texture + + ObjectType objType = input.getObjType(); + const uint32_t currentMainObjectIdx = input.getMainObjectIdx(); + const MainObject mainObj = loadMainObject(currentMainObjectIdx); + float worldToScreenRatio = input.getCurrentWorldToScreenRatio(); + + if (pc.isDTMRendering) + { + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + float3 triangleVertices[3]; + triangleVertices[0] = input.getScreenSpaceVertexAttribs(0); + triangleVertices[1] = input.getScreenSpaceVertexAttribs(1); + triangleVertices[2] = input.getScreenSpaceVertexAttribs(2); + + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(triangleVertices[0].xy, triangleVertices[1].xy, triangleVertices[2].xy, input.position.xy); + + float height = baryCoord.x * triangleVertices[0].z + baryCoord.y * triangleVertices[1].z + baryCoord.z * triangleVertices[2].z; + float heightDeriv = fwidth(height); + + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + + if (dtmSettings.drawOutlineEnabled()) // TODO: do i need 'height' paramter here? + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMOutlineColor(dtmSettings.outlineLineStyleIdx, worldToScreenRatio, triangleVertices, input.position.xy)); + if (dtmSettings.drawContourEnabled()) + { + for(uint32_t i = 0; i < dtmSettings.contourSettingsCount; ++i) // TODO: should reverse the order with blendUnder + { + LineStyle contourStyle = loadLineStyle(dtmSettings.contourSettings[i].contourLineStyleIdx); + float sdf = dtm::calculateDTMContourSDF(dtmSettings.contourSettings[i], contourStyle, worldToScreenRatio, triangleVertices, input.position.xy, height); + float4 contourColor = contourStyle.color; + contourColor.a *= 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, sdf); + dtmColor = dtm::blendUnder(dtmColor, contourColor); + } + } + if (dtmSettings.drawHeightShadingEnabled()) + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, triangleVertices, heightDeriv, input.position.xy, height)); + + textureColor = dtmColor.rgb / dtmColor.a; + localAlpha = dtmColor.a; + + // because final color is premultiplied by alpha + textureColor = dtmColor.rgb / dtmColor.a; + + gammaUncorrect(textureColor); // want to output to SRGB without gamma correction + return calculateFinalColor(uint2(input.position.xy), localAlpha, currentMainObjectIdx, textureColor, true); + } + else + { + // figure out local alpha with sdf + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + { + float distance = nbl::hlsl::numeric_limits::max; + if (objType == ObjectType::LINE) + { + const float2 start = input.getLineStart(); + const float2 end = input.getLineEnd(); + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + + nbl::hlsl::shapes::Line lineSegment = nbl::hlsl::shapes::Line::construct(start, end); + + LineStyle style = loadLineStyle(styleIdx); + + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Line >::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + nbl::hlsl::shapes::Line::ArcLengthCalculator arcLenCalc = nbl::hlsl::shapes::Line::ArcLengthCalculator::construct(lineSegment); + LineStyleClipper clipper = LineStyleClipper::construct(loadLineStyle(styleIdx), lineSegment, arcLenCalc, phaseShift, stretch, worldToScreenRatio); + distance = ClippedSignedDistance, LineStyleClipper>::sdf(lineSegment, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::QUAD_BEZIER) + { + nbl::hlsl::shapes::Quadratic quadratic = input.getQuadratic(); + nbl::hlsl::shapes::Quadratic::ArcLengthCalculator arcLenCalc = input.getQuadraticArcLengthCalculator(); + + const uint32_t styleIdx = mainObj.styleIdx; + const float thickness = input.getLineThickness(); + const float phaseShift = input.getCurrentPhaseShift(); + const float stretch = input.getPatternStretch(); + + LineStyle style = loadLineStyle(styleIdx); + if (!style.hasStipples() || stretch == InvalidStyleStretchValue) + { + distance = ClippedSignedDistance< nbl::hlsl::shapes::Quadratic >::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag); + } + else + { + BezierStyleClipper clipper = BezierStyleClipper::construct(loadLineStyle(styleIdx), quadratic, arcLenCalc, phaseShift, stretch, worldToScreenRatio ); + distance = ClippedSignedDistance, BezierStyleClipper>::sdf(quadratic, input.position.xy, thickness, style.isRoadStyleFlag, clipper); + } + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float2 P = input.position.xy - input.getPolylineConnectorCircleCenter(); + distance = miterSDF( + P, + input.getLineThickness(), + input.getPolylineConnectorTrapezoidStart(), + input.getPolylineConnectorTrapezoidEnd(), + input.getPolylineConnectorTrapezoidLongBase(), + input.getPolylineConnectorTrapezoidShortBase()); + + } + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, distance); + } + else if (objType == ObjectType::CURVE_BOX) + { + const float minorBBoxUV = input.getMinorBBoxUV(); + const float majorBBoxUV = input.getMajorBBoxUV(); + + nbl::hlsl::math::equations::Quadratic curveMinMinor = input.getCurveMinMinor(); + nbl::hlsl::math::equations::Quadratic curveMinMajor = input.getCurveMinMajor(); + nbl::hlsl::math::equations::Quadratic curveMaxMinor = input.getCurveMaxMinor(); + nbl::hlsl::math::equations::Quadratic curveMaxMajor = input.getCurveMaxMajor(); + + // TODO(Optimization): Can we ignore this majorBBoxUV clamp and rely on the t clamp that happens next? then we can pass `PrecomputedRootFinder`s instead of computing the values per pixel. + nbl::hlsl::math::equations::Quadratic minCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMinMajor.a, curveMinMajor.b, curveMinMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + nbl::hlsl::math::equations::Quadratic maxCurveEquation = nbl::hlsl::math::equations::Quadratic::construct(curveMaxMajor.a, curveMaxMajor.b, curveMaxMajor.c - clamp(majorBBoxUV, 0.0, 1.0)); + + const float minT = clamp(PrecomputedRootFinder::construct(minCurveEquation).computeRoots(), 0.0, 1.0); + const float minEv = curveMinMinor.evaluate(minT); + + const float maxT = clamp(PrecomputedRootFinder::construct(maxCurveEquation).computeRoots(), 0.0, 1.0); + const float maxEv = curveMaxMinor.evaluate(maxT); + + const bool insideMajor = majorBBoxUV >= 0.0 && majorBBoxUV <= 1.0; + const bool insideMinor = minorBBoxUV >= minEv && minorBBoxUV <= maxEv; + + if (insideMinor && insideMajor) + { + localAlpha = 1.0; + } + else + { + // Find the true SDF of a hatch box boundary which is bounded by two curves, It requires knowing the distance from the current UV to the closest point on bounding curves and the limiting lines (in major direction) + // We also keep track of distance vector (minor, major) to convert to screenspace distance for anti-aliasing with screenspace aaFactor + const float InvalidT = nbl::hlsl::numeric_limits::max; + const float MAX_DISTANCE_SQUARED = nbl::hlsl::numeric_limits::max; + + const float2 boxScreenSpaceSize = input.getCurveBoxScreenSpaceSize(); + + + float closestDistanceSquared = MAX_DISTANCE_SQUARED; + const float2 pos = float2(minorBBoxUV, majorBBoxUV) * boxScreenSpaceSize; + + if (minorBBoxUV < minEv) + { + // DO SDF of Min Curve + nbl::hlsl::shapes::Quadratic minCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMinMinor.a, curveMinMajor.a) * boxScreenSpaceSize, + float2(curveMinMinor.b, curveMinMajor.b) * boxScreenSpaceSize, + float2(curveMinMinor.c, curveMinMajor.c) * boxScreenSpaceSize); + + nbl::hlsl::shapes::Quadratic::Candidates candidates = minCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = minCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + else if (minorBBoxUV > maxEv) + { + // Do SDF of Max Curve + nbl::hlsl::shapes::Quadratic maxCurve = nbl::hlsl::shapes::Quadratic::construct( + float2(curveMaxMinor.a, curveMaxMajor.a) * boxScreenSpaceSize, + float2(curveMaxMinor.b, curveMaxMajor.b) * boxScreenSpaceSize, + float2(curveMaxMinor.c, curveMaxMajor.c) * boxScreenSpaceSize); + nbl::hlsl::shapes::Quadratic::Candidates candidates = maxCurve.getClosestCandidates(pos); + [[unroll(nbl::hlsl::shapes::Quadratic::MaxCandidates)]] + for (uint32_t i = 0; i < nbl::hlsl::shapes::Quadratic::MaxCandidates; i++) + { + candidates[i] = clamp(candidates[i], 0.0, 1.0); + const float2 distVector = maxCurve.evaluate(candidates[i]) - pos; + const float candidateDistanceSquared = dot(distVector, distVector); + if (candidateDistanceSquared < closestDistanceSquared) + closestDistanceSquared = candidateDistanceSquared; + } + } + + if (!insideMajor) + { + const bool minLessThanMax = minEv < maxEv; + float2 majorDistVector = float2(MAX_DISTANCE_SQUARED, MAX_DISTANCE_SQUARED); + if (majorBBoxUV > 1.0) + { + const float2 minCurveEnd = float2(minEv, 1.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveEnd, float2(maxEv, 1.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveEnd; + } + else + { + const float2 minCurveStart = float2(minEv, 0.0) * boxScreenSpaceSize; + if (minLessThanMax) + majorDistVector = sdLineDstVec(pos, minCurveStart, float2(maxEv, 0.0) * boxScreenSpaceSize); + else + majorDistVector = pos - minCurveStart; + } + + const float majorDistSq = dot(majorDistVector, majorDistVector); + if (majorDistSq < closestDistanceSquared) + closestDistanceSquared = majorDistSq; + } + + const float dist = sqrt(closestDistanceSquared); + localAlpha = 1.0f - smoothstep(0.0, globals.antiAliasingFactor, dist); + } + + LineStyle style = loadLineStyle(mainObj.styleIdx); + uint32_t textureId = asuint(style.screenSpaceLineWidth); + if (textureId != InvalidTextureIndex) + { + // For Hatch fiils we sample the first mip as we don't fill the others, because they are constant in screenspace and render as expected + // If later on we decided that we can have different sizes here, we should do computations similar to FONT_GLYPH + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(frac(input.position.xy / HatchFillMSDFSceenSpaceSize), float(textureId)), 0.0).xyz; + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, MSDFPixelRange * HatchFillMSDFSceenSpaceSize / MSDFSize); + localAlpha *= 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f, globals.antiAliasingFactor / 2.0f, msdf); + } + } + else if (objType == ObjectType::FONT_GLYPH) + { + const float2 uv = input.getFontGlyphUV(); + const uint32_t textureId = input.getFontGlyphTextureId(); + + if (textureId != InvalidTextureIndex) + { + float mipLevel = msdfTextures.CalculateLevelOfDetail(msdfSampler, uv); + float3 msdfSample = msdfTextures.SampleLevel(msdfSampler, float3(uv, float(textureId)), mipLevel); + float msdf = nbl::hlsl::text::msdfDistance(msdfSample, input.getFontGlyphPxRange()); + /* + explaining "*= exp2(max(mipLevel,0.0))" + Each mip level has constant MSDFPixelRange + Which essentially makes the msdfSamples here (Harware Sampled) have different scales per mip + As we go up 1 mip level, the msdf distance should be multiplied by 2.0 + While this makes total sense for NEAREST mip sampling when mipLevel is an integer and only one mip is being sampled. + It's a bit complex when it comes to trilinear filtering (LINEAR mip sampling), but it works in practice! + + Alternatively you can think of it as doing this instead: + localAlpha = smoothstep(+globals.antiAliasingFactor / exp2(max(mipLevel,0.0)), 0.0, msdf); + Which is reducing the aa feathering as we go up the mip levels. + to avoid aa feathering of the MAX_MSDF_DISTANCE_VALUE to be less than aa factor and eventually color it and cause greyed out area around the main glyph + */ + msdf *= exp2(max(mipLevel,0.0)); + + LineStyle style = loadLineStyle(mainObj.styleIdx); + const float screenPxRange = input.getFontGlyphPxRange() / MSDFPixelRangeHalf; + const float bolden = style.worldSpaceLineWidth * screenPxRange; // worldSpaceLineWidth is actually boldenInPixels, aliased TextStyle with LineStyle + localAlpha = 1.0f - smoothstep(-globals.antiAliasingFactor / 2.0f + bolden, globals.antiAliasingFactor / 2.0f + bolden, msdf); + } + } + else if (objType == ObjectType::STATIC_IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIndex) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } + else if (objType == ObjectType::GRID_DTM) + { + DTMSettings dtmSettings = loadDTMSettings(mainObj.dtmSettingsIdx); + + if (!dtmSettings.drawContourEnabled() && !dtmSettings.drawOutlineEnabled() && !dtmSettings.drawHeightShadingEnabled()) + discard; + + float2 uv = input.getImageUV(); + const uint32_t textureId = input.getGridDTMHeightTextureID(); + + float2 gridExtents = input.getGridDTMScreenSpaceGridExtents(); + const float cellWidth = input.getGridDTMScreenSpaceCellWidth(); + // TODO: I think we can get it from the height map size if texture is valid?!, better if it comes directly from CPU side, vertex shader or something, division + round to integer is error-prone for large integer values + float2 gridDimensions = round(gridExtents / cellWidth); // texturesU32[NonUniformResourceIndex(textureId)].GetDimensions()? + + float2 gridSpacePos = uv * gridExtents; + float2 gridSpacePosDivGridCellWidth = gridSpacePos / cellWidth; + float2 currentCellCoord; + { + currentCellCoord.x = floor(gridSpacePosDivGridCellWidth.x); + currentCellCoord.y = floor(gridSpacePosDivGridCellWidth.y); + } + + // grid consists of square cells and cells are divided into two triangles: + // depending on mode it is + // either: or: + // v2a-------v1 v0-------v2b + // | A / | | \ B | + // | / | | \ | + // | / B | | A \ | + // v0-------v2b v2a-------v1 + // + + const bool gridOnly = textureId == InvalidTextureIndex && dtmSettings.drawOutlineEnabled(); + if (gridOnly) + { + nbl::hlsl::shapes::Line outlineLineSegments[2]; + + const float halfCellWidth = cellWidth * 0.5f; + const float2 horizontalBounds = float2(0.0f, gridExtents.y); + const float2 verticalBounds = float2(0.0f, gridExtents.x); + float2 nearestLineRemainingCoords = int2((gridSpacePos + halfCellWidth) / cellWidth) * cellWidth; + // shift lines outside of the grid to a bound + nearestLineRemainingCoords.x = clamp(nearestLineRemainingCoords.x, verticalBounds.x, verticalBounds.y); + nearestLineRemainingCoords.y = clamp(nearestLineRemainingCoords.y, horizontalBounds.x, horizontalBounds.y); + + // find the nearest horizontal line + outlineLineSegments[0].P0 = float32_t2(verticalBounds.x, nearestLineRemainingCoords.y); + outlineLineSegments[0].P1 = float32_t2(verticalBounds.y, nearestLineRemainingCoords.y); + // find the nearest vertical line + outlineLineSegments[1].P0 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.x); + outlineLineSegments[1].P1 = float32_t2(nearestLineRemainingCoords.x, horizontalBounds.y); + + LineStyle outlineStyle = loadLineStyle(dtmSettings.outlineLineStyleIdx); + float sdf = dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, outlineLineSegments[0], gridSpacePos, 0.0f); + sdf = min(sdf, dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, outlineLineSegments[1], gridSpacePos, 0.0f)); + + float4 dtmColor = outlineStyle.color; + dtmColor.a *= 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, sdf); + + textureColor = dtmColor.rgb; + localAlpha = dtmColor.a; + } + else + { + // calculate localUV and figure out the 4 cells we're gonna do sdf with + float2 localUV = gridSpacePosDivGridCellWidth - currentCellCoord; // TODO: use fmod instead? + int2 roundedLocalUV = round(localUV); + float2 offset = roundedLocalUV * 2.0f - 1.0f; + + // Triangles + const uint32_t MaxTrianglesToDoSDFWith = 8u; + dtm::GridDTMTriangle triangles[MaxTrianglesToDoSDFWith]; + float interpolatedHeights[MaxTrianglesToDoSDFWith]; // these are height based on barycentric interpolation of current pixel with all the triangles above + uint32_t triangleCount = 0u; + + // We can do sdf for up to 4 maximum lines for the outlines, 2 belong to the current cell and the other 2 belong to the opposite neighbouring cell + /* Example: + | + | opposite cell + | + ------+------ + | + current cell | + | + + `+` is the current corner and we draw the 4 lines leading up to it. + */ + + // curr cell horizontal, curr cell vertical, opposite cell horizontal, opposite cell vertical + bool4 linesValidity = bool4(false, false, false, false); + + [unroll] + for (int i = 0; i < 2; ++i) + { + for (int j = 0; j < 2; ++j) + { + float2 cellCoord = currentCellCoord + float2(i, j) * offset; + const bool isCellWithinRange = + cellCoord.x >= 0.0f && cellCoord.y >= 0.0f && + cellCoord.x < gridDimensions.x && cellCoord.y < gridDimensions.y; + if (isCellWithinRange) + { + dtm::GridDTMHeightMapData heightData = dtm::retrieveGridDTMCellDataFromHeightMap(gridDimensions, cellCoord, texturesU32[NonUniformResourceIndex(textureId)]); + dtm::GridDTMCell gridCellFormed = dtm::calculateCellTriangles(heightData, cellCoord, cellWidth); + if (gridCellFormed.validA) + triangles[triangleCount++] = gridCellFormed.triangleA; + if (gridCellFormed.validB) + triangles[triangleCount++] = gridCellFormed.triangleB; + + // we just need to check and set lines validity + // Formulas to get current cell's horizontal and vertical lines validity + // All this to avoid extra texel fetch to check validity and use the Gather result instead :D + // TODO: Only 0,0 and 1,1 is enough to check if cells are valid, but other checks required in case current cell is invalid (out of bounds) but it's line is valid + if (i == 0 && j == 0) + { + // current cell's line validity + linesValidity[0] = !isInvalidGridDtmHeightValue(heightData.heights[2 - (roundedLocalUV.y * 2)]) && !isInvalidGridDtmHeightValue(heightData.heights[3 - (roundedLocalUV.y * 2)]); + linesValidity[1] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 0]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 3]); + } + if (i == 1 && j == 0) + { + linesValidity[1] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 1]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 2]); + linesValidity[2] = !isInvalidGridDtmHeightValue(heightData.heights[2 - (roundedLocalUV.y * 2)]) && !isInvalidGridDtmHeightValue(heightData.heights[3 - (roundedLocalUV.y * 2)]);; + } + if (i == 0 && j == 1) + { + linesValidity[0] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.y * 2]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.y * 2 + 1]); + linesValidity[3] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 0]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 3]); + } + if (i == 1 && j == 1) + { + linesValidity[2] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.y * 2]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.y * 2 + 1]); + linesValidity[3] = !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 1]) && !isInvalidGridDtmHeightValue(heightData.heights[roundedLocalUV.x ^ 2]); + } + } + } + } + + const uint32_t InvalidTriangleIndex = nbl::hlsl::numeric_limits::max; + uint32_t currentTriangleIndex = InvalidTriangleIndex; + // For height shading, merge this loop with the previous one, because baryCoord all positive means point inside triangle and we can use that to figure out the triangle we want to do height shading for. + for (int t = 0; t < triangleCount; ++t) + { + dtm::GridDTMTriangle tri = triangles[t]; + const float3 baryCoord = dtm::calculateDTMTriangleBarycentrics(tri.vertices[0].xy, tri.vertices[1].xy, tri.vertices[2].xy, gridSpacePos); + interpolatedHeights[t] = baryCoord.x * tri.vertices[0].z + baryCoord.y * tri.vertices[1].z + baryCoord.z * tri.vertices[2].z; + + if (currentTriangleIndex == InvalidTriangleIndex) + { + const float minValue = 0.0f - nbl::hlsl::numeric_limits::epsilon; + const float maxValue = 1.0f + nbl::hlsl::numeric_limits::epsilon; + if (all(baryCoord >= minValue) && all(baryCoord <= maxValue)) + currentTriangleIndex = t; + } + } + + float4 dtmColor = float4(0.0f, 0.0f, 0.0f, 0.0f); + if (dtmSettings.drawContourEnabled()) + { + for (int i = dtmSettings.contourSettingsCount-1u; i >= 0; --i) + { + LineStyle contourStyle = loadLineStyle(dtmSettings.contourSettings[i].contourLineStyleIdx); + float sdf = nbl::hlsl::numeric_limits::max; + for (int t = 0; t < triangleCount; ++t) + { + const dtm::GridDTMTriangle tri = triangles[t]; + const float currentInterpolatedHeight = interpolatedHeights[t]; + sdf = min(sdf, dtm::calculateDTMContourSDF(dtmSettings.contourSettings[i], contourStyle, worldToScreenRatio, tri.vertices, gridSpacePos, currentInterpolatedHeight)); + } + + float4 contourColor = contourStyle.color; contourColor.a = 0.5f; + contourColor.a *= 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, sdf); + dtmColor = dtm::blendUnder(dtmColor, contourColor); + } + } + + if (dtmSettings.drawOutlineEnabled()) + { + float sdf = nbl::hlsl::numeric_limits::max; + LineStyle outlineStyle = loadLineStyle(dtmSettings.outlineLineStyleIdx); + nbl::hlsl::shapes::Line lineSegment; + + // Doing SDF of outlines as if cooridnate system is centered around the nearest corner of the cell + float2 localCellSpaceOrigin = (currentCellCoord + float2(roundedLocalUV)) * cellWidth; // in local cell space, origin + float2 localGridTopLeftCorner = -localCellSpaceOrigin; // top left in local cell space: topLeft is (0, 0) implicitly + float2 localFragPos = gridSpacePos - localCellSpaceOrigin; // we compute the current fragment pos, in local cell space + + float phaseShift = 0.0f; + const bool hasStipples = outlineStyle.hasStipples(); + const float rcpPattenLenScreenSpace = outlineStyle.reciprocalStipplePatternLen * worldToScreenRatio; + // Drawing the lines that form a plus sign around the current corner: + if (linesValidity[0]) + { + // this cells horizontal line + lineSegment.P0 = float2((offset.x > 0) ? -offset.x * cellWidth : 0.0f, 0.0f); + lineSegment.P1 = float2((offset.x < 0) ? -offset.x * cellWidth : 0.0f, 0.0f); + phaseShift = fract((lineSegment.P0.x - localGridTopLeftCorner.x) * rcpPattenLenScreenSpace); + sdf = min(sdf, dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, lineSegment, localFragPos, phaseShift)); + } + if (linesValidity[1]) + { + // this cells vertical line + lineSegment.P0 = float2(0.0f, (offset.y > 0) ? -offset.y * cellWidth : 0.0f); + lineSegment.P1 = float2(0.0f, (offset.y < 0) ? -offset.y * cellWidth : 0.0f); + phaseShift = fract((lineSegment.P0.y - localGridTopLeftCorner.y) * rcpPattenLenScreenSpace); + sdf = min(sdf, dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, lineSegment, localFragPos, phaseShift)); + } + if (linesValidity[2]) + { + // opposite cell horizontal line + lineSegment.P0 = float2((offset.x < 0) ? offset.x * cellWidth : 0.0f, 0.0f); + lineSegment.P1 = float2((offset.x > 0) ? offset.x * cellWidth : 0.0f, 0.0f); + phaseShift = fract((lineSegment.P0.x - localGridTopLeftCorner.x) * rcpPattenLenScreenSpace); + sdf = min(sdf, dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, lineSegment, localFragPos, phaseShift)); + } + if (linesValidity[3]) + { + // opposite cell vertical line + lineSegment.P0 = float2(0.0f, (offset.y < 0) ? offset.y * cellWidth : 0.0f); + lineSegment.P1 = float2(0.0f, (offset.y > 0) ? offset.y * cellWidth : 0.0f); + phaseShift = fract((lineSegment.P0.y - localGridTopLeftCorner.y) * rcpPattenLenScreenSpace); + sdf = min(sdf, dtm::calculateLineSDF(outlineStyle, worldToScreenRatio, lineSegment, localFragPos, phaseShift)); + } + + float4 outlineColor = outlineStyle.color; + outlineColor.a *= 1.0f - smoothstep(-globals.antiAliasingFactor, globals.antiAliasingFactor, sdf); + dtmColor = dtm::blendUnder(dtmColor, outlineColor); + } + + if (dtmSettings.drawHeightShadingEnabled()) + { + if (currentTriangleIndex != InvalidTriangleIndex) + { + dtm::GridDTMTriangle currentTriangle = triangles[currentTriangleIndex]; + float heightDeriv = fwidth(interpolatedHeights[currentTriangleIndex]); + dtmColor = dtm::blendUnder(dtmColor, dtm::calculateDTMHeightColor(dtmSettings.heightShadingSettings, currentTriangle.vertices, heightDeriv, gridSpacePos, interpolatedHeights[currentTriangleIndex])); + } + else + { + // TODO[Future]: Average color of nearby valid triangles (dtm height function should return color + polygon sdf) + } + + } + + textureColor = dtmColor.rgb / dtmColor.a; + localAlpha = dtmColor.a; + } + + } + else if (objType == ObjectType::STREAMED_IMAGE) + { + const float2 uv = input.getImageUV(); + const uint32_t textureId = input.getImageTextureId(); + + if (textureId != InvalidTextureIndex) + { + float4 colorSample = textures[NonUniformResourceIndex(textureId)].Sample(textureSampler, float2(uv.x, uv.y)); + textureColor = colorSample.rgb; + localAlpha = colorSample.a; + } + } + + + if (localAlpha <= 0) + discard; + + uint2 fragCoord = uint2(input.position.xy); + const bool colorFromTexture = objType == ObjectType::STREAMED_IMAGE || objType == ObjectType::STATIC_IMAGE || objType == ObjectType::GRID_DTM; + + return calculateFinalColor(fragCoord, localAlpha, currentMainObjectIdx, textureColor, colorFromTexture); + } +} diff --git a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl index 7dba46dd0..2955d22fe 100644 --- a/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl +++ b/62_CAD/shaders/main_pipeline/fragment_shader_debug.hlsl @@ -1,9 +1,6 @@ struct PSInputDebug { float4 position : SV_Position; - [[vk::location(0)]] float4 color : COLOR; - [[vk::location(1)]] nointerpolation float4 start_end : COLOR1; - [[vk::location(2)]] nointerpolation uint3 lineWidth_eccentricity_objType : COLOR2; }; [shader("pixel")] diff --git a/62_CAD/shaders/main_pipeline/line_style.hlsl b/62_CAD/shaders/main_pipeline/line_style.hlsl new file mode 100644 index 000000000..f50127667 --- /dev/null +++ b/62_CAD/shaders/main_pipeline/line_style.hlsl @@ -0,0 +1,297 @@ +#ifndef _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ +#define _CAD_EXAMPLE_LINE_STYLE_HLSL_INCLUDED_ + +#include +#include + +// for usage in upper_bound function +struct StyleAccessor +{ + LineStyle style; + using value_type = float; + + float operator[](const uint32_t ix) + { + return style.getStippleValue(ix); + } +}; + +template +struct StyleClipper +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.000001; + + static StyleClipper construct( + LineStyle style, + CurveType curve, + typename CurveType::ArcLengthCalculator arcLenCalc, + float phaseShift, + float stretch, + float worldToScreenRatio) + { + StyleClipper ret = { style, curve, arcLenCalc, phaseShift, stretch, worldToScreenRatio, 0.0f, 0.0f, 0.0f, 0.0f }; + + // values for non-uniform stretching with a rigid segment + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + // rigidSegment info in old non stretched pattern + ret.rigidSegmentStart = (style.rigidSegmentIdx >= 1u) ? style.getStippleValue(style.rigidSegmentIdx - 1u) : 0.0f; + ret.rigidSegmentEnd = (style.rigidSegmentIdx < style.stipplePatternSize) ? style.getStippleValue(style.rigidSegmentIdx) : 1.0f; + ret.rigidSegmentLen = ret.rigidSegmentEnd - ret.rigidSegmentStart; + // stretch value for non rigid segments + ret.nonRigidSegmentStretchValue = (stretch - ret.rigidSegmentLen) / (1.0f - ret.rigidSegmentLen); + // rigidSegment info to new stretched pattern + ret.rigidSegmentStart *= ret.nonRigidSegmentStretchValue / stretch; // get the new normalized rigid segment start + ret.rigidSegmentLen /= stretch; // get the new rigid segment normalized len + ret.rigidSegmentEnd = ret.rigidSegmentStart + ret.rigidSegmentLen; // get the new normalized rigid segment end + } + else + { + ret.nonRigidSegmentStretchValue = stretch; + } + + return ret; + } + + // For non-uniform stretching with a rigid segment (the one segement that shouldn't stretch) the whole pattern changes + // instead of transforming each of the style.stipplePattern values (max 14 of them), we transform the normalized place in pattern + float getRealNormalizedPlaceInPattern(float normalizedPlaceInPattern) + { + if (style.rigidSegmentIdx != InvalidRigidSegmentIndex && stretch != 1.0f) + { + float ret = min(normalizedPlaceInPattern, rigidSegmentStart) / nonRigidSegmentStretchValue; // unstretch parts before rigid segment + ret += max(normalizedPlaceInPattern - rigidSegmentEnd, 0.0f) / nonRigidSegmentStretchValue; // unstretch parts after rigid segment + ret += max(min(rigidSegmentLen, normalizedPlaceInPattern - rigidSegmentStart), 0.0f); // unstretch parts inside rigid segment + ret *= stretch; + return ret; + } + else + { + return normalizedPlaceInPattern; + } + } + + float_t2 operator()(float_t t) + { + // basicaly 0.0 and 1.0 but with a guardband to discard outside the range + const float_t minT = 0.0 - 1.0; + const float_t maxT = 1.0 + 1.0; + + StyleAccessor styleAccessor = { style }; + const float_t reciprocalStretchedStipplePatternLen = style.reciprocalStipplePatternLen / stretch; + const float_t patternLenInScreenSpace = 1.0 / (worldToScreenRatio * style.reciprocalStipplePatternLen); + + const float_t arcLen = arcLenCalc.calcArcLen(t); + const float_t worldSpaceArcLen = arcLen * float_t(worldToScreenRatio); + float_t normalizedPlaceInPattern = frac(worldSpaceArcLen * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPattern = getRealNormalizedPlaceInPattern(normalizedPlaceInPattern); + uint32_t patternIdx = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPattern); + + const float_t InvalidT = nbl::hlsl::numeric_limits::infinity; + float_t2 ret = float_t2(InvalidT, InvalidT); + + // odd patternIdx means a "no draw section" and current candidate should split into two nearest draw sections + const bool notInDrawSection = patternIdx & 0x1; + + // TODO[Erfan]: Disable this piece of code after clipping, and comment the reason, that the bezier start and end at 0.0 and 1.0 should be in drawable sections + float_t minDrawT = 0.0; + float_t maxDrawT = 1.0; + { + float_t normalizedPlaceInPatternBegin = frac(phaseShift); + normalizedPlaceInPatternBegin = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternBegin); + uint32_t patternIdxBegin = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternBegin); + const bool BeginInNonDrawSection = patternIdxBegin & 0x1; + + if (BeginInNonDrawSection) + { + float_t diffToRightDrawableSection = (patternIdxBegin == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdxBegin]; + diffToRightDrawableSection -= normalizedPlaceInPatternBegin; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * patternLenInScreenSpace * ((patternIdxBegin != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT1 = 0.0 + scrSpcOffsetToArcLen1; + minDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, 0.0); + } + + // Completely in non-draw section -> clip away: + if (minDrawT >= 1.0) + return ret; + + const float_t arcLenEnd = arcLenCalc.calcArcLen(1.0); + const float_t worldSpaceArcLenEnd = arcLenEnd * float_t(worldToScreenRatio); + float_t normalizedPlaceInPatternEnd = frac(worldSpaceArcLenEnd * reciprocalStretchedStipplePatternLen + phaseShift); + normalizedPlaceInPatternEnd = getRealNormalizedPlaceInPattern(normalizedPlaceInPatternEnd); + uint32_t patternIdxEnd = nbl::hlsl::upper_bound(styleAccessor, 0, style.stipplePatternSize, normalizedPlaceInPatternEnd); + const bool EndInNonDrawSection = patternIdxEnd & 0x1; + + if (EndInNonDrawSection) + { + float_t diffToLeftDrawableSection = (patternIdxEnd == 0) ? 0.0 : styleAccessor[patternIdxEnd - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPatternEnd; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * patternLenInScreenSpace * ((patternIdxEnd != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + const float_t arcLenForT0 = arcLenEnd + scrSpcOffsetToArcLen0; + maxDrawT = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, 1.0); + } + } + + if (notInDrawSection) + { + float toScreenSpaceLen = patternLenInScreenSpace * ((patternIdx != style.rigidSegmentIdx) ? nonRigidSegmentStretchValue : 1.0); + + float_t diffToLeftDrawableSection = (patternIdx == 0) ? 0.0 : styleAccessor[patternIdx - 1]; + diffToLeftDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen0 = diffToLeftDrawableSection * toScreenSpaceLen; + const float_t arcLenForT0 = arcLen + scrSpcOffsetToArcLen0; + float_t t0 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT0, AccuracyThresholdT, t); + t0 = clamp(t0, minDrawT, maxDrawT); + + float_t diffToRightDrawableSection = (patternIdx == style.stipplePatternSize) ? 1.0 : styleAccessor[patternIdx]; + diffToRightDrawableSection -= normalizedPlaceInPattern; + float_t scrSpcOffsetToArcLen1 = diffToRightDrawableSection * toScreenSpaceLen; + const float_t arcLenForT1 = arcLen + scrSpcOffsetToArcLen1; + float_t t1 = arcLenCalc.calcArcLenInverse(curve, minT, maxT, arcLenForT1, AccuracyThresholdT, t); + t1 = clamp(t1, minDrawT, maxDrawT); + + ret = float_t2(t0, t1); + } + else + { + t = clamp(t, minDrawT, maxDrawT); + ret = float_t2(t, t); + } + + return ret; + } + + LineStyle style; + CurveType curve; + typename CurveType::ArcLengthCalculator arcLenCalc; + float phaseShift; + float stretch; + float worldToScreenRatio; + // precomp value for non uniform stretching + float rigidSegmentStart; + float rigidSegmentEnd; + float rigidSegmentLen; + float nonRigidSegmentStretchValue; +}; + +typedef StyleClipper< nbl::hlsl::shapes::Quadratic > BezierStyleClipper; +typedef StyleClipper< nbl::hlsl::shapes::Line > LineStyleClipper; + +template +struct DefaultClipper +{ + using float_t2 = vector; + NBL_CONSTEXPR_STATIC_INLINE float_t AccuracyThresholdT = 0.0; + + static DefaultClipper construct() + { + DefaultClipper ret; + return ret; + } + + inline float_t2 operator()(const float_t t) + { + const float_t ret = clamp(t, 0.0, 1.0); + return float_t2(ret, ret); + } +}; + +template > +struct ClippedSignedDistance +{ + using float_t = typename CurveType::scalar_t; + using float_t2 = typename CurveType::float_t2; + using float_t3 = typename CurveType::float_t3; + + const static float_t sdf(CurveType curve, float_t2 pos, float_t thickness, bool isRoadStyle, Clipper clipper = DefaultClipper::construct()) + { + typename CurveType::Candidates candidates = curve.getClosestCandidates(pos); + + const float_t InvalidT = nbl::hlsl::numeric_limits::max; + // TODO: Fix and test, we're not working with squared distance anymore + const float_t MAX_DISTANCE_SQUARED = (thickness + 1.0f) * (thickness + 1.0f); // TODO: ' + 1' is too much? + + bool clipped = false; + float_t closestDistanceSquared = MAX_DISTANCE_SQUARED; + float_t closestT = InvalidT; + [[unroll(CurveType::MaxCandidates)]] + for (uint32_t i = 0; i < CurveType::MaxCandidates; i++) + { + const float_t candidateDistanceSquared = length(curve.evaluate(candidates[i]) - pos); + if (candidateDistanceSquared < closestDistanceSquared) + { + float_t2 snappedTs = clipper(candidates[i]); + + if (snappedTs[0] == InvalidT) + { + continue; + } + + if (snappedTs[0] != candidates[i]) + { + // left snapped or clamped + const float_t leftSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[0]) - pos); + if (leftSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[0]; + closestDistanceSquared = leftSnappedCandidateDistanceSquared; + } + + if (snappedTs[0] != snappedTs[1]) + { + // right snapped or clamped + const float_t rightSnappedCandidateDistanceSquared = length(curve.evaluate(snappedTs[1]) - pos); + if (rightSnappedCandidateDistanceSquared < closestDistanceSquared) + { + clipped = true; + closestT = snappedTs[1]; + closestDistanceSquared = rightSnappedCandidateDistanceSquared; + } + } + } + else + { + // no snapping + if (candidateDistanceSquared < closestDistanceSquared) + { + clipped = false; + closestT = candidates[i]; + closestDistanceSquared = candidateDistanceSquared; + } + } + } + } + + + float_t roundedDistance = closestDistanceSquared - thickness; + if (!isRoadStyle) + { + return roundedDistance; + } + else + { + const float_t aaWidth = globals.antiAliasingFactor; + float_t rectCappedDistance = roundedDistance; + + if (clipped) + { + float_t2 q = mul(curve.getLocalCoordinateSpace(closestT), pos - curve.evaluate(closestT)); + rectCappedDistance = capSquare(q, thickness, aaWidth); + } + + return rectCappedDistance; + } + } + + static float capSquare(float_t2 q, float_t th, float_t aaWidth) + { + float_t2 d = abs(q) - float_t2(aaWidth, th); + return length(max(d, 0.0)) + min(max(d.x, d.y), 0.0); + } +}; + +#endif \ No newline at end of file diff --git a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl index 46c5d28e0..69bab6bde 100644 --- a/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl +++ b/62_CAD/shaders/main_pipeline/resolve_alphas.hlsl @@ -1,6 +1,5 @@ #include "common.hlsl" #include -#include template float32_t4 calculateFinalColor(const uint2 fragCoord); @@ -16,36 +15,59 @@ template<> float32_t4 calculateFinalColor(const uint2 fragCoord) { float32_t4 color; - - nbl::hlsl::spirv::beginInvocationInterlockEXT(); + nbl::hlsl::spirv::beginInvocationInterlockEXT(); + + bool resolve = false; + uint32_t toResolveStyleIdx = InvalidStyleIdx; const uint32_t packedData = pseudoStencil[fragCoord]; const uint32_t storedQuantizedAlpha = nbl::hlsl::glsl::bitfieldExtract(packedData,0,AlphaBits); const uint32_t storedMainObjectIdx = nbl::hlsl::glsl::bitfieldExtract(packedData,AlphaBits,MainObjectIdxBits); - pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); - // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate - const bool resolve = storedMainObjectIdx != InvalidMainObjectIdx; - uint32_t toResolveStyleIdx = InvalidStyleIdx; + const bool currentlyActiveMainObj = (storedMainObjectIdx == globals.currentlyActiveMainObjectIndex); + if (!currentlyActiveMainObj) + { + // Normal Scenario, this branch will always be taken if there is no overflow submit in the middle of an active mainObject + //we do the final resolve of the pixel and invalidate the pseudo-stencil + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(0, InvalidMainObjectIdx, AlphaBits, MainObjectIdxBits); + + // if geomID has changed, we resolve the SDF alpha (draw using blend), else accumulate + resolve = storedMainObjectIdx != InvalidMainObjectIdx; - // load from colorStorage only if we want to resolve color from texture instead of style - // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store - if (resolve) + // load from colorStorage only if we want to resolve color from texture instead of style + // sampling from colorStorage needs to happen in critical section because another fragment may also want to store into it at the same time + need to happen before store + if (resolve) + { + toResolveStyleIdx = loadMainObject(storedMainObjectIdx).styleIdx; + if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color + color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + } + } + else if (globals.currentlyActiveMainObjectIndex != InvalidMainObjectIdx) { - toResolveStyleIdx = mainObjects[storedMainObjectIdx].styleIdx; - if (toResolveStyleIdx == InvalidStyleIdx) // if style idx to resolve is invalid, then it means we should resolve from color - color = float32_t4(unpackR11G11B10_UNORM(colorStorage[fragCoord]), 1.0f); + // Being here means there was an overflow submit in the middle of an active main objejct + // We don't want to resolve the active mainObj, because it needs to fully resolved later when the mainObject actually finishes. + // We change the active main object index in our pseudo-stencil to 0u, because that will be it's new index in the next submit. + uint32_t newMainObjectIdx = 0u; + pseudoStencil[fragCoord] = nbl::hlsl::glsl::bitfieldInsert(storedQuantizedAlpha, newMainObjectIdx, AlphaBits, MainObjectIdxBits); + resolve = false; // just to re-iterate that we don't want to resolve this. } + nbl::hlsl::spirv::endInvocationInterlockEXT(); if (!resolve) discard; + // draw with previous geometry's style's color or stored in texture buffer :kek: // we don't need to load the style's color in critical section because we've already retrieved the style index from the stored main obj if (toResolveStyleIdx != InvalidStyleIdx) // if toResolveStyleIdx is valid then that means our resolved color should come from line style - color = lineStyles[toResolveStyleIdx].color; + { + color = loadLineStyle(toResolveStyleIdx).color; + gammaUncorrect(color.rgb); // want to output to SRGB without gamma correction + } + color.a *= float(storedQuantizedAlpha) / 255.f; return color; @@ -55,5 +77,5 @@ float32_t4 calculateFinalColor(const uint2 fragCoord) [shader("pixel")] float4 resolveAlphaMain(float4 position : SV_Position) : SV_TARGET { - return calculateFinalColor(position.xy); + return calculateFinalColor(position.xy); } diff --git a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl index bff4182f6..90394e935 100644 --- a/62_CAD/shaders/main_pipeline/vertex_shader.hlsl +++ b/62_CAD/shaders/main_pipeline/vertex_shader.hlsl @@ -5,7 +5,6 @@ #include #include #include -#include // TODO[Lucas]: Move these functions to builtin hlsl functions (Even the shadertoy obb and aabb ones) float cross2D(float2 a, float2 b) @@ -23,21 +22,43 @@ float2 QuadraticBezier(float2 p0, float2 p1, float2 p2, float t) return shapes::QuadraticBezier::construct(p0, p1, p2).evaluate(t); } -ClipProjectionData getClipProjectionData(in MainObject mainObj) +struct NDCClipProjectionData { - if (mainObj.clipProjectionAddress != InvalidClipProjectionAddress) + pfloat64_t3x3 projectionToNDC; // pre-multiplied projection in a tree + float32_t2 minClipNDC; + float32_t2 maxClipNDC; +}; + +NDCClipProjectionData getClipProjectionData(in MainObject mainObj) +{ + NDCClipProjectionData ret; + if (mainObj.customProjectionIndex != InvalidCustomProjectionIndex) { - ClipProjectionData ret; - ret.projectionToNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress, 8u); - ret.minClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3), 8u); - ret.maxClipNDC = vk::RawBufferLoad(mainObj.clipProjectionAddress + sizeof(pfloat64_t3x3) + sizeof(float32_t2), 8u); + // If projection type is worldspace projection and clip: + pfloat64_t3x3 customProjection = loadCustomProjection(mainObj.customProjectionIndex); + ret.projectionToNDC = nbl::hlsl::mul(globals.defaultProjectionToNDC, customProjection); + } + else + ret.projectionToNDC = globals.defaultProjectionToNDC; - return ret; + if (mainObj.customClipRectIndex != InvalidCustomClipRectIndex) + { + WorldClipRect worldClipRect = loadCustomClipRect(mainObj.customClipRectIndex); + + /// [NOTE]: Optimization: we avoid looking for min/max in the shader because minClip and maxClip in default worldspace are defined in such a way that minClip.y > maxClip.y so minClipNDC.y < maxClipNDC.y + ret.minClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.minClip)); + ret.maxClipNDC = nbl::hlsl::_static_cast(transformPointNdc(globals.defaultProjectionToNDC, worldClipRect.maxClip)); } else { - return globals.defaultClipProjection; + ret.minClipNDC = float2(-1.0f, -1.0f); + ret.maxClipNDC = float2(+1.0f, +1.0f); } + + if (mainObj.transformationType == TransformationType::TT_FIXED_SCREENSPACE_SIZE) + ret.projectionToNDC = nbl::hlsl::mul(ret.projectionToNDC, globals.screenToWorldScaleTransform); + + return ret; } float2 transformPointScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolution, pfloat64_t2 point2d) @@ -47,10 +68,23 @@ float2 transformPointScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolut return _static_cast(result); } +float2 transformVectorScreenSpace(pfloat64_t3x3 transformation, uint32_t2 resolution, pfloat64_t2 vec2d) +{ + pfloat64_t2 ndc = transformVectorNdc(transformation, vec2d); + pfloat64_t2 result = (ndc) * 0.5f * _static_cast(resolution); + return _static_cast(result); +} float32_t4 transformFromSreenSpaceToNdc(float2 pos, uint32_t2 resolution) { return float32_t4((pos.xy / (float32_t2)resolution) * 2.0f - 1.0f, 0.0f, 1.0f); } +float32_t getScreenToWorldRatio(pfloat64_t3x3 transformation, uint32_t2 resolution) +{ + pfloat64_t idx_0_0 = transformation[0u].x * (resolution.x / 2.0); + pfloat64_t idx_1_0 = transformation[1u].x * (resolution.y / 2.0); + float32_t2 firstCol; firstCol.x = _static_cast(idx_0_0); firstCol.y = _static_cast(idx_1_0); + return nbl::hlsl::length(firstCol); // TODO: Do length in fp64? +} template void dilateHatch(out float2 outOffsetVec, out float2 outUV, const float2 undilatedCorner, const float2 dilateRate, const float2 ndcAxisU, const float2 ndcAxisV); @@ -85,20 +119,11 @@ void dilateHatch(out float2 outOffsetVec, out float2 outUV, const float2 // Or optionally we could dilate and stuff when we know this hatch is opaque (alpha = 1.0) } -PSInput main(uint vertexID : SV_VertexID) +[shader("vertex")] +PSInput vtxMain(uint vertexID : SV_VertexID) { - // TODO[Przemek]: Disable Everything here and do your own thing as we already discussed, but let's have the same PSInput data passed to fragment. - // your programmable pulling will use the baseVertexBufferAddress BDA address and `vertexID` to RawBufferLoad it's vertex. - // ~~Later, most likely We will require pulling all 3 vertices of the triangle, that's where you need to know which triangle you're currently on, and instead of objectID = vertexID/4 which we currently do, you will do vertexID/3 and pull all 3 of it's vertices.~~ - // Ok, brainfart, a vertex can belong to multiple triangles, I was thinking of AA but triangles share vertices, nevermind my comment above. - - const uint vertexIdx = vertexID & 0x3u; - const uint objectID = vertexID >> 2; - - DrawObject drawObj = drawObjects[objectID]; - - ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); - uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; + NDCClipProjectionData clipProjectionData; + PSInput outV; // Default Initialize PS Input @@ -107,475 +132,635 @@ PSInput main(uint vertexID : SV_VertexID) outV.data2 = float4(0, 0, 0, 0); outV.data3 = float4(0, 0, 0, 0); outV.data4 = float4(0, 0, 0, 0); - outV.interp_data5 = float2(0, 0); - outV.setObjType(objType); - outV.setMainObjectIdx(drawObj.mainObjIndex); - - MainObject mainObj = mainObjects[drawObj.mainObjIndex]; - ClipProjectionData clipProjectionData = getClipProjectionData(mainObj); - - // We only need these for Outline type objects like lines and bezier curves - if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) - { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; - - // Width is on both sides, thickness is one one side of the curve (div by 2.0f) - const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + _static_cast(_static_cast(lineStyle.worldSpaceLineWidth) * globals.screenToWorldRatio); - const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; - const float sdfLineThickness = screenSpaceLineWidth / 2.0f; - outV.setLineThickness(sdfLineThickness); - outV.setCurrentWorldToScreenRatio( - _static_cast((_static_cast(2.0f) / - (clipProjectionData.projectionToNDC[0].x * _static_cast(globals.resolution.x)))) - ); - - if (objType == ObjectType::LINE) - { - pfloat64_t2 points[2u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); + outV.interp_data5 = float4(0, 0, 0, 0); - float2 transformedPoints[2u]; - for (uint i = 0u; i < 2u; ++i) - { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } - - const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); - const float2 normalToLine = float2(-lineVector.y, lineVector.x); - - if (vertexIdx == 0u || vertexIdx == 1u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[0u] - + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) - - lineVector * antiAliasedLineThickness; - } - else // if (vertexIdx == 2u || vertexIdx == 3u) - { - // work in screen space coordinates because of fixed pixel size - outV.position.xy = transformedPoints[1u] - + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) - + lineVector * antiAliasedLineThickness; - } + if (pc.isDTMRendering) + { + outV.setObjType(ObjectType::TRIANGLE_MESH); + outV.setMainObjectIdx(pc.triangleMeshMainObjectIndex); + + TriangleMeshVertex vtx = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * vertexID, 8u); - outV.setLineStart(transformedPoints[0u]); - outV.setLineEnd(transformedPoints[1u]); + MainObject mainObj = loadMainObject(pc.triangleMeshMainObjectIndex); + clipProjectionData = getClipProjectionData(mainObj); - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else if (objType == ObjectType::QUAD_BEZIER) + float screenToWorldRatio = getScreenToWorldRatio(clipProjectionData.projectionToNDC, globals.resolution); + float worldToScreenRatio = 1.0f / screenToWorldRatio; + outV.setCurrentWorldToScreenRatio(worldToScreenRatio); + + // assuming there are 3 * N vertices, number of vertices is equal to number of indices and indices are sequential starting from 0 + float2 transformedOriginalPos; + float2 transformedDilatedPos; { - pfloat64_t2 points[3u]; - points[0u] = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - points[1u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - points[2u] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); - - const float phaseShift = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); - const float patternStretch = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); - outV.setCurrentPhaseShift(phaseShift); - outV.setPatternStretch(patternStretch); - - // transform these points into screen space and pass to fragment - float2 transformedPoints[3u]; - for (uint i = 0u; i < 3u; ++i) - { - transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); - } - - shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); - shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); - shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); - - outV.setQuadratic(quadratic); - outV.setQuadraticPrecomputedArcLenData(preCompData); - - float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; - float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - - // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ - float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; - float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; - float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; - float MaxCurvature; - if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) - MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); - else - MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - - // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; - // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) - // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, - const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); - if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) - { - //OBB Fallback - float2 obbV0; - float2 obbV1; - float2 obbV2; - float2 obbV3; - quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); - if (subsectionIdx == 0) - { - if (vertexIdx == 0u) - outV.position = float4(obbV0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(obbV1, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(obbV3, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(obbV2, 0.0, 1.0f); - } - else - outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); - } - else - { - // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation - // this is the place where we use it's tangent in the bezier to form sides the cages - const float optimalT = 0.145f; + uint32_t currentVertexWithinTriangleIndex = vertexID % 3; + uint32_t firstVertexOfCurrentTriangleIndex = vertexID - currentVertexWithinTriangleIndex; + + TriangleMeshVertex triangleVertices[3]; + triangleVertices[0] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * firstVertexOfCurrentTriangleIndex, 8u); + triangleVertices[1] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 1), 8u); + triangleVertices[2] = vk::RawBufferLoad(pc.triangleMeshVerticesBaseAddress + sizeof(TriangleMeshVertex) * (firstVertexOfCurrentTriangleIndex + 2), 8u); + transformedOriginalPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, triangleVertices[currentVertexWithinTriangleIndex].pos); + + pfloat64_t2 triangleCentroid; + triangleCentroid.x = (triangleVertices[0].pos.x + triangleVertices[1].pos.x + triangleVertices[2].pos.x) / _static_cast(3.0f); + triangleCentroid.y = (triangleVertices[0].pos.y + triangleVertices[1].pos.y + triangleVertices[2].pos.y) / _static_cast(3.0f); + + // move triangles to local space, with centroid at (0, 0) + triangleVertices[0].pos = triangleVertices[0].pos - triangleCentroid; + triangleVertices[1].pos = triangleVertices[1].pos - triangleCentroid; + triangleVertices[2].pos = triangleVertices[2].pos - triangleCentroid; + + // TODO: calculate dialation factor + // const float dilateByPixels = 0.5 * (dtmSettings.maxScreenSpaceLineWidth + dtmSettings.maxWorldSpaceLineWidth * screenToWorldRatio) + aaFactor; + + pfloat64_t dialationFactor = _static_cast(2.0f); + pfloat64_t2 dialatedVertex = triangleVertices[currentVertexWithinTriangleIndex].pos * dialationFactor; - // Whether or not to flip the the interior cage nodes - int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; + dialatedVertex = dialatedVertex + triangleCentroid; - const float middleT = 0.5f; - float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); - float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); - float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; + transformedDilatedPos = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, dialatedVertex); + } - /* - P1 - + + outV.position = transformFromSreenSpaceToNdc(transformedDilatedPos, globals.resolution); + const float heightAsFloat = nbl::hlsl::_static_cast(vtx.height); + outV.setScreenSpaceVertexAttribs(float3(transformedOriginalPos, heightAsFloat)); + // full screen triangle (this will destroy outline, contour line and height drawing) +#if 0 + const uint vertexIdx = vertexID % 3; + if(vertexIdx == 0) + outV.position.xy = float2(-1.0f, -1.0f); + else if (vertexIdx == 1) + outV.position.xy = float2(-1.0f, 3.0f); + else if (vertexIdx == 2) + outV.position.xy = float2(3.0f, -1.0f); +#endif + } + else + { + const uint vertexIdx = vertexID & 0x3u; + const uint objectID = vertexID >> 2; - exterior0 exterior1 - ---------------------- - / \- - -/ ---------------- \ - / -/interior0 interior1 - / / \ \- - -/ -/ \- \ - / -/ \ \- - / / \- \ - P0 + \ + P2 - */ + DrawObject drawObj = loadDrawObject(objectID); - // Internal cage points - float2 interior0; - float2 interior1; + ObjectType objType = (ObjectType)(drawObj.type_subsectionIdx & 0x0000FFFF); + uint32_t subsectionIdx = drawObj.type_subsectionIdx >> 16; + outV.setObjType(objType); + outV.setMainObjectIdx(drawObj.mainObjIndex); - float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + MainObject mainObj = loadMainObject(drawObj.mainObjIndex); + clipProjectionData = getClipProjectionData(mainObj); + + float screenToWorldRatio = getScreenToWorldRatio(clipProjectionData.projectionToNDC, globals.resolution); + float worldToScreenRatio = 1.0f / screenToWorldRatio; + outV.setCurrentWorldToScreenRatio(worldToScreenRatio); + + // We only need these for Outline type objects like lines and bezier curves + if (objType == ObjectType::LINE || objType == ObjectType::QUAD_BEZIER || objType == ObjectType::POLYLINE_CONNECTOR) + { + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + // Width is on both sides, thickness is one one side of the curve (div by 2.0f) + const float screenSpaceLineWidth = lineStyle.screenSpaceLineWidth + lineStyle.worldSpaceLineWidth * screenToWorldRatio; + const float antiAliasedLineThickness = screenSpaceLineWidth * 0.5f + globals.antiAliasingFactor; + const float sdfLineThickness = screenSpaceLineWidth / 2.0f; + outV.setLineThickness(sdfLineThickness); - float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); - float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; - float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; - float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + if (objType == ObjectType::LINE) + { + pfloat64_t2 points[2u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(LinePointInfo), 8u); - float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); - float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; - float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; - float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); - // Interiors - { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; - } + float2 transformedPoints[2u]; + for (uint i = 0u; i < 2u; ++i) { - float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); - float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; - interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); } - if (subsectionIdx == 0u) - { - float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + const float2 lineVector = normalize(transformedPoints[1u] - transformedPoints[0u]); + const float2 normalToLine = float2(-lineVector.y, lineVector.x); - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior0, 0.0, 1.0f); - } - else if (subsectionIdx == 1u) + if (vertexIdx == 0u || vertexIdx == 1u) { - if (vertexIdx == 0u) - outV.position = float4(exterior0, 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(interior0, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[0u] + + normalToLine * (((float)vertexIdx - 0.5f) * 2.0f * antiAliasedLineThickness) + - lineVector * antiAliasedLineThickness; } - else if (subsectionIdx == 2u) + else // if (vertexIdx == 2u || vertexIdx == 3u) { - float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); - float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; - float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; - - if (vertexIdx == 0u) - outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); - else if (vertexIdx == 1u) - outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); - else if (vertexIdx == 2u) - outV.position = float4(exterior1, 0.0, 1.0f); - else if (vertexIdx == 3u) - outV.position = float4(interior1, 0.0, 1.0f); + // work in screen space coordinates because of fixed pixel size + outV.position.xy = transformedPoints[1u] + + normalToLine * (((float)vertexIdx - 2.5f) * 2.0f * antiAliasedLineThickness) + + lineVector * antiAliasedLineThickness; } - } - outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; - } - else if (objType == ObjectType::POLYLINE_CONNECTOR) - { - const float FLOAT_INF = numeric_limits::infinity; - const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); + outV.setLineStart(transformedPoints[0u]); + outV.setLineEnd(transformedPoints[1u]); - if (lineStyle.isRoadStyleFlag) + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; + } + else if (objType == ObjectType::QUAD_BEZIER) { - const pfloat64_t2 circleCenter = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - const float2 v = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); - - const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); - outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); + pfloat64_t2 points[3u]; + points[0u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + points[1u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + points[2u] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2u, 8u); + + const float phaseShift = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u, 8u); + const float patternStretch = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 3u + sizeof(float), 8u); + outV.setCurrentPhaseShift(phaseShift); + outV.setPatternStretch(patternStretch); + + // transform these points into screen space and pass to fragment + float2 transformedPoints[3u]; + for (uint i = 0u; i < 3u; ++i) + { + transformedPoints[i] = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, points[i]); + } - // Find other miter vertices - const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); - const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); + shapes::QuadraticBezier quadraticBezier = shapes::QuadraticBezier::construct(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u]); + shapes::Quadratic quadratic = shapes::Quadratic::constructFromBezier(quadraticBezier); + shapes::Quadratic::ArcLengthCalculator preCompData = shapes::Quadratic::ArcLengthCalculator::construct(quadratic); - // Pass the precomputed trapezoid values for the sdf - { - float vLen = length(v); - float2 intersectionDirection = v / vLen; + outV.setQuadratic(quadratic); + outV.setQuadraticPrecomputedArcLenData(preCompData); - float longBase = sinHalfAngleBetweenNormals; - float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); - // height of the trapezoid / triangle - float hLen = min(globals.miterLimit, vLen); + float2 Mid = (transformedPoints[0u] + transformedPoints[2u]) / 2.0f; + float Radius = length(Mid - transformedPoints[0u]) / 2.0f; - outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection * sdfLineThickness); - outV.setPolylineConnectorTrapezoidEnd(intersectionDirection * hLen * sdfLineThickness); - outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); - outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); - } + // https://algorithmist.wordpress.com/2010/12/01/quad-bezier-curvature/ + float2 vectorAB = transformedPoints[1u] - transformedPoints[0u]; + float2 vectorAC = transformedPoints[2u] - transformedPoints[1u]; + float area = abs(vectorAB.x * vectorAC.y - vectorAB.y * vectorAC.x) * 0.5; + float MaxCurvature; + if (length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.25f)) > Radius && length(transformedPoints[1u] - lerp(transformedPoints[0u], transformedPoints[2u], 0.75f)) > Radius) + MaxCurvature = pow(length(transformedPoints[1u] - Mid), 3) / (area * area); + else + MaxCurvature = max(area / pow(length(transformedPoints[0u] - transformedPoints[1u]), 3), area / pow(length(transformedPoints[2u] - transformedPoints[1u]), 3)); - if (vertexIdx == 0u) + // We only do this adaptive thing when "MinRadiusOfOsculatingCircle = RadiusOfMaxCurvature < screenSpaceLineWidth/4" OR "MaxCurvature > 4/screenSpaceLineWidth"; + // which means there is a self intersection because of large lineWidth relative to the curvature (in screenspace) + // the reason for division by 4.0f is 1. screenSpaceLineWidth is expanded on both sides and 2. the fact that diameter/2=radius, + const bool noCurvature = abs(dot(normalize(vectorAB), normalize(vectorAC)) - 1.0f) < exp2(-10.0f); + if (MaxCurvature * screenSpaceLineWidth > 4.0f || noCurvature) { - const float2 V1 = normalize(mul(v, rotationMatrix)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV1 = circleCenterScreenSpace + V1; - outV.position = float4(screenSpaceV1, 0.0f, 1.0f); + //OBB Fallback + float2 obbV0; + float2 obbV1; + float2 obbV2; + float2 obbV3; + quadraticBezier.computeOBB(antiAliasedLineThickness, obbV0, obbV1, obbV2, obbV3); + if (subsectionIdx == 0) + { + if (vertexIdx == 0u) + outV.position = float4(obbV0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(obbV1, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(obbV3, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(obbV2, 0.0, 1.0f); + } + else + outV.position = float4(0.0f, 0.0f, 0.0f, 0.0f); } - else if (vertexIdx == 1u) + else { - outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); + // this optimal value is hardcoded based on tests and benchmarks of pixel shader invocation + // this is the place where we use it's tangent in the bezier to form sides the cages + const float optimalT = 0.145f; + + // Whether or not to flip the the interior cage nodes + int flip = cross2D(transformedPoints[0u] - transformedPoints[1u], transformedPoints[2u] - transformedPoints[1u]) > 0.0f ? -1 : 1; + + const float middleT = 0.5f; + float2 midPos = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT); + float2 midTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], middleT)); + float2 midNormal = float2(-midTangent.y, midTangent.x) * flip; + + /* + P1 + + + + + exterior0 exterior1 + ---------------------- + / \- + -/ ---------------- \ + / -/interior0 interior1 + / / \ \- + -/ -/ \- \ + / -/ \ \- + / / \- \ + P0 + \ + P2 + */ + + // Internal cage points + float2 interior0; + float2 interior1; + + float2 middleExteriorPoint = midPos - midNormal * antiAliasedLineThickness; + + + float2 leftTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT)); + float2 leftNormal = normalize(float2(-leftTangent.y, leftTangent.x)) * flip; + float2 leftExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], optimalT) - leftNormal * antiAliasedLineThickness; + float2 exterior0 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, leftExteriorPoint, leftTangent); + + float2 rightTangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT)); + float2 rightNormal = normalize(float2(-rightTangent.y, rightTangent.x)) * flip; + float2 rightExteriorPoint = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 1.0f - optimalT) - rightNormal * antiAliasedLineThickness; + float2 exterior1 = shapes::util::LineLineIntersection(middleExteriorPoint, midTangent, rightExteriorPoint, rightTangent); + + // Interiors + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior0 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.286) + normal * antiAliasedLineThickness; + } + { + float2 tangent = normalize(BezierTangent(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f)); + float2 normal = normalize(float2(-tangent.y, tangent.x)) * flip; + interior1 = QuadraticBezier(transformedPoints[0u], transformedPoints[1u], transformedPoints[2u], 0.714f) + normal * antiAliasedLineThickness; + } + + if (subsectionIdx == 0u) + { + float2 endPointTangent = normalize(transformedPoints[1u] - transformedPoints[0u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[0u] - endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(leftExteriorPoint, leftTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[0u] + endPointNormal * antiAliasedLineThickness - endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior0, 0.0, 1.0f); + } + else if (subsectionIdx == 1u) + { + if (vertexIdx == 0u) + outV.position = float4(exterior0, 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(interior0, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } + else if (subsectionIdx == 2u) + { + float2 endPointTangent = normalize(transformedPoints[2u] - transformedPoints[1u]); + float2 endPointNormal = float2(-endPointTangent.y, endPointTangent.x) * flip; + float2 endPointExterior = transformedPoints[2u] + endPointTangent * antiAliasedLineThickness; + + if (vertexIdx == 0u) + outV.position = float4(shapes::util::LineLineIntersection(rightExteriorPoint, rightTangent, endPointExterior, endPointNormal), 0.0, 1.0f); + else if (vertexIdx == 1u) + outV.position = float4(transformedPoints[2u] + endPointNormal * antiAliasedLineThickness + endPointTangent * antiAliasedLineThickness, 0.0, 1.0f); + else if (vertexIdx == 2u) + outV.position = float4(exterior1, 0.0, 1.0f); + else if (vertexIdx == 3u) + outV.position = float4(interior1, 0.0, 1.0f); + } } - else if (vertexIdx == 2u) + + outV.position.xy = (outV.position.xy / globals.resolution) * 2.0f - 1.0f; + } + else if (objType == ObjectType::POLYLINE_CONNECTOR) + { + const float FLOAT_INF = numeric_limits::infinity; + const float4 INVALID_VERTEX = float4(FLOAT_INF, FLOAT_INF, FLOAT_INF, FLOAT_INF); + + if (lineStyle.isRoadStyleFlag) { - // find intersection point vertex - float2 intersectionPoint = v * antiAliasedLineThickness * 2.0f; - intersectionPoint += circleCenterScreenSpace; - outV.position = float4(intersectionPoint, 0.0f, 1.0f); + const pfloat64_t2 circleCenter = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const float2 v = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + const float cosHalfAngleBetweenNormals = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 8u); + + const float2 circleCenterScreenSpace = transformPointScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, circleCenter); + outV.setPolylineConnectorCircleCenter(circleCenterScreenSpace); + + // to better understand variables at play, and the circle space, see documentation of `miterSDF` in fragment shader + // length of vector from circle center to intersection position (normalized so that circle radius = line thickness = 1.0) + float vLen = length(v); + float2 intersectionDirection_Screenspace = normalize(transformVectorScreenSpace(clipProjectionData.projectionToNDC, globals.resolution, _static_cast(v))); + const float2 v_Screenspace = intersectionDirection_Screenspace * vLen; + + // Find other miter vertices + const float sinHalfAngleBetweenNormals = sqrt(1.0f - (cosHalfAngleBetweenNormals * cosHalfAngleBetweenNormals)); + const float32_t2x2 rotationMatrix = float32_t2x2(cosHalfAngleBetweenNormals, -sinHalfAngleBetweenNormals, sinHalfAngleBetweenNormals, cosHalfAngleBetweenNormals); + + // Pass the precomputed trapezoid values for the sdf + { + float longBase = sinHalfAngleBetweenNormals; + float shortBase = max((vLen - globals.miterLimit) * cosHalfAngleBetweenNormals / sinHalfAngleBetweenNormals, 0.0); + // height of the trapezoid / triangle + float hLen = min(globals.miterLimit, vLen); + + outV.setPolylineConnectorTrapezoidStart(-1.0 * intersectionDirection_Screenspace * sdfLineThickness); + outV.setPolylineConnectorTrapezoidEnd(intersectionDirection_Screenspace * hLen * sdfLineThickness); + outV.setPolylineConnectorTrapezoidLongBase(sinHalfAngleBetweenNormals * ((1.0 + vLen) / (vLen - cosHalfAngleBetweenNormals)) * sdfLineThickness); + outV.setPolylineConnectorTrapezoidShortBase(shortBase * sdfLineThickness); + } + + if (vertexIdx == 0u) + { + // multiplying the other way to rotate by -theta + const float2 V1 = normalize(mul(v_Screenspace, rotationMatrix)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV1 = circleCenterScreenSpace + V1; + outV.position = float4(screenSpaceV1, 0.0f, 1.0f); + } + else if (vertexIdx == 1u) + { + outV.position = float4(circleCenterScreenSpace, 0.0f, 1.0f); + } + else if (vertexIdx == 2u) + { + // find intersection point vertex + float2 intersectionPoint = v_Screenspace * antiAliasedLineThickness * 2.0f; + intersectionPoint += circleCenterScreenSpace; + outV.position = float4(intersectionPoint, 0.0f, 1.0f); + } + else if (vertexIdx == 3u) + { + const float2 V2 = normalize(mul(rotationMatrix, v_Screenspace)) * antiAliasedLineThickness * 2.0f; + const float2 screenSpaceV2 = circleCenterScreenSpace + V2; + outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + } + + outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; } - else if (vertexIdx == 3u) + else { - const float2 V2 = normalize(mul(rotationMatrix, v)) * antiAliasedLineThickness * 2.0f; - const float2 screenSpaceV2 = circleCenterScreenSpace + V2; - outV.position = float4(screenSpaceV2, 0.0f, 1.0f); + outV.position = INVALID_VERTEX; } - - outV.position.xy = transformFromSreenSpaceToNdc(outV.position.xy, globals.resolution).xy; - } - else - { - outV.position = INVALID_VERTEX; } } - } - else if (objType == ObjectType::CURVE_BOX) - { - CurveBox curveBox; - curveBox.aabbMin = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - curveBox.aabbMax = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); - - for (uint32_t i = 0; i < 3; i ++) + else if (objType == ObjectType::CURVE_BOX) { - curveBox.curveMin[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); - curveBox.curveMax[i] = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); - } + CurveBox curveBox; + curveBox.aabbMin = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + curveBox.aabbMax = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + + for (uint32_t i = 0; i < 3; i ++) + { + curveBox.curveMin[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * i, 4u); + curveBox.curveMax[i] = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) * 2 + sizeof(float32_t2) * (3 + i), 4u); + } - pfloat64_t2 aabbMaxXMinY; - aabbMaxXMinY.x = curveBox.aabbMax.x; - aabbMaxXMinY.y = curveBox.aabbMin.y; + pfloat64_t2 aabbMaxXMinY; + aabbMaxXMinY.x = curveBox.aabbMax.x; + aabbMaxXMinY.y = curveBox.aabbMin.y; - pfloat64_t2 aabbMinXMaxY; - aabbMinXMaxY.x = curveBox.aabbMin.x; - aabbMinXMaxY.y = curveBox.aabbMax.y; + pfloat64_t2 aabbMinXMaxY; + aabbMinXMaxY.x = curveBox.aabbMin.x; + aabbMinXMaxY.y = curveBox.aabbMax.y; - const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); - const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); + const float2 ndcAxisU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMaxXMinY - curveBox.aabbMin)); + const float2 ndcAxisV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, aabbMinXMaxY - curveBox.aabbMin)); - const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); + const float2 screenSpaceAabbExtents = float2(length(ndcAxisU * float2(globals.resolution)) / 2.0, length(ndcAxisV * float2(globals.resolution)) / 2.0); - // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) - outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); + // we could use something like this to compute screen space change over minor/major change and avoid ddx(minor), ddy(major) in frag shader (the code below doesn't account for rotation) + outV.setCurveBoxScreenSpaceSize(float2(screenSpaceAabbExtents)); - const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); + const float2 undilatedCorner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + const pfloat64_t2 undilatedCornerF64 = _static_cast(undilatedCorner); - // We don't dilate on AMD (= no fragShaderInterlock) - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? - float2 dilateVec; - float2 dilatedUV; - dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); + // We don't dilate on AMD (= no fragShaderInterlock) + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = pixelsToIncreaseOnEachSide / screenSpaceAabbExtents; // float sufficient to hold the dilate rect? + float2 dilateVec; + float2 dilatedUV; + dilateHatch(dilateVec, dilatedUV, undilatedCorner, dilateRate, ndcAxisU, ndcAxisV); - // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx - const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + - curveBox.aabbMax * undilatedCornerF64; + // doing interpolation this way to ensure correct endpoints and 0 and 1, we can alternatively use branches to set current corner based on vertexIdx + const pfloat64_t2 currentCorner = curveBox.aabbMin * (_static_cast(float2(1.0f, 1.0f)) - undilatedCornerF64) + + curveBox.aabbMax * undilatedCornerF64; - const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); + const float2 coord = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, currentCorner) + _static_cast(dilateVec)); - outV.position = float4(coord, 0.f, 1.f); + outV.position = float4(coord, 0.f, 1.f); - const uint major = (uint)SelectedMajorAxis; - const uint minor = 1-major; - - // A, B & C get converted from unorm to [0, 1] - // A & B get converted from [0,1] to [-2, 2] - shapes::Quadratic curveMin = shapes::Quadratic::construct( - curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); - shapes::Quadratic curveMax = shapes::Quadratic::construct( - curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); - - outV.setMinorBBoxUV(dilatedUV[minor]); - outV.setMajorBBoxUV(dilatedUV[major]); - - outV.setCurveMinMinor(math::equations::Quadratic::construct( - curveMin.A[minor], - curveMin.B[minor], - curveMin.C[minor])); - outV.setCurveMinMajor(math::equations::Quadratic::construct( - curveMin.A[major], - curveMin.B[major], - curveMin.C[major])); - - outV.setCurveMaxMinor(math::equations::Quadratic::construct( - curveMax.A[minor], - curveMax.B[minor], - curveMax.C[minor])); - outV.setCurveMaxMajor(math::equations::Quadratic::construct( - curveMax.A[major], - curveMax.B[major], - curveMax.C[major])); - - //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( - // curveMin.A[major], - // curveMin.B[major], - // curveMin.C[major] - maxCorner[major]); - //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( - // curveMax.A[major], - // curveMax.B[major], - // curveMax.C[major] - maxCorner[major]); - //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); - //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); - } - else if (objType == ObjectType::FONT_GLYPH) - { - LineStyle lineStyle = lineStyles[mainObj.styleIdx]; - const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style + const uint major = (uint)SelectedMajorAxis; + const uint minor = 1-major; + + // A, B & C get converted from unorm to [0, 1] + // A & B get converted from [0,1] to [-2, 2] + shapes::Quadratic curveMin = shapes::Quadratic::construct( + curveBox.curveMin[0], curveBox.curveMin[1], curveBox.curveMin[2]); + shapes::Quadratic curveMax = shapes::Quadratic::construct( + curveBox.curveMax[0], curveBox.curveMax[1], curveBox.curveMax[2]); + + outV.setMinorBBoxUV(dilatedUV[minor]); + outV.setMajorBBoxUV(dilatedUV[major]); + + outV.setCurveMinMinor(math::equations::Quadratic::construct( + curveMin.A[minor], + curveMin.B[minor], + curveMin.C[minor])); + outV.setCurveMinMajor(math::equations::Quadratic::construct( + curveMin.A[major], + curveMin.B[major], + curveMin.C[major])); + + outV.setCurveMaxMinor(math::equations::Quadratic::construct( + curveMax.A[minor], + curveMax.B[minor], + curveMax.C[minor])); + outV.setCurveMaxMajor(math::equations::Quadratic::construct( + curveMax.A[major], + curveMax.B[major], + curveMax.C[major])); + + //math::equations::Quadratic curveMinRootFinding = math::equations::Quadratic::construct( + // curveMin.A[major], + // curveMin.B[major], + // curveMin.C[major] - maxCorner[major]); + //math::equations::Quadratic curveMaxRootFinding = math::equations::Quadratic::construct( + // curveMax.A[major], + // curveMax.B[major], + // curveMax.C[major] - maxCorner[major]); + //outV.setMinCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMinRootFinding)); + //outV.setMaxCurvePrecomputedRootFinders(PrecomputedRootFinder::construct(curveMaxRootFinding)); + } + else if (objType == ObjectType::FONT_GLYPH) + { + LineStyle lineStyle = loadLineStyle(mainObj.styleIdx); + const float italicTiltSlope = lineStyle.screenSpaceLineWidth; // aliased text style member with line style - GlyphInfo glyphInfo; - glyphInfo.topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - glyphInfo.dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - glyphInfo.aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - float32_t2 minUV = glyphInfo.getMinUV(); - uint16_t textureID = glyphInfo.getTextureID(); - - const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; - const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); - const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); - const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) - const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) + GlyphInfo glyphInfo; + glyphInfo.topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + glyphInfo.dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + glyphInfo.aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + glyphInfo.minUV_textureID_packed = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + float32_t2 minUV = glyphInfo.getMinUV(); + uint16_t textureID = glyphInfo.getTextureID(); + + const float32_t2 dirV = float32_t2(glyphInfo.dirU.y, -glyphInfo.dirU.x) * glyphInfo.aspectRatio; + const float2 screenTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, glyphInfo.topLeft)); + const float2 screenDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(glyphInfo.dirU))); + const float2 screenDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); // corners of square from (0, 0) to (1, 1) + const float2 undilatedCornerNDC = corner * 2.0 - 1.0; // corners of square from (-1, -1) to (1, 1) - const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); - const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; - const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); + const float2 screenSpaceAabbExtents = float2(length(screenDirU * float2(globals.resolution)) / 2.0, length(screenDirV * float2(globals.resolution)) / 2.0); + const float pixelsToIncreaseOnEachSide = globals.antiAliasingFactor + 1.0; + const float2 dilateRate = (pixelsToIncreaseOnEachSide / screenSpaceAabbExtents); - const float2 vx = screenDirU * dilateRate.x; - const float2 vy = screenDirV * dilateRate.y; - const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; - float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; + const float2 vx = screenDirU * dilateRate.x; + const float2 vy = screenDirV * dilateRate.y; + const float2 offsetVec = vx * undilatedCornerNDC.x + vy * undilatedCornerNDC.y; + float2 coord = screenTopLeft + corner.x * screenDirU + corner.y * screenDirV + offsetVec; - if (corner.y == 0 && italicTiltSlope > 0.0f) - coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); + if (corner.y == 0 && italicTiltSlope > 0.0f) + coord += normalize(screenDirU) * length(screenDirV) * italicTiltSlope * float(globals.resolution.y) / float(globals.resolution.x); - // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY - // but if the glyph box is stretched in any way then we won't get correct msdf - // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances - // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture - // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize - const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max - const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max - outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader - - // In order to keep the shape scale constant with any dilation values: - // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left - const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); - const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); - const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; + // If aspect ratio of the dimensions and glyph inside the texture are the same then screenPxRangeX === screenPxRangeY + // but if the glyph box is stretched in any way then we won't get correct msdf + // in that case we need to take the max(screenPxRangeX, screenPxRangeY) to avoid blur due to underexaggerated distances + // We compute screenPxRange using the ratio of our screenspace extent to the texel space our glyph takes inside the texture + // Our glyph is centered inside the texture, so `maxUV = 1.0 - minUV` and `glyphTexelSize = (1.0-2.0*minUV) * MSDFSize + const float screenPxRangeX = screenSpaceAabbExtents.x / ((1.0 - 2.0 * minUV.x)); // division by MSDFSize happens after max + const float screenPxRangeY = screenSpaceAabbExtents.y / ((1.0 - 2.0 * minUV.y)); // division by MSDFSize happens after max + outV.setFontGlyphPxRange((max(max(screenPxRangeX, screenPxRangeY), 1.0) * MSDFPixelRangeHalf) / MSDFSize); // we premultuply by MSDFPixelRange/2.0, to avoid doing it in frag shader + + // In order to keep the shape scale constant with any dilation values: + // We compute the new dilated minUV that gets us minUV when interpolated on the previous undilated top left + const float2 topLeftInterpolationValue = (dilateRate/(1.0+2.0*dilateRate)); + const float2 dilatedMinUV = (topLeftInterpolationValue - minUV) / (2.0 * topLeftInterpolationValue - 1.0); + const float2 dilatedMaxUV = float2(1.0, 1.0) - dilatedMinUV; - const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); + const float2 uv = dilatedMinUV + corner * (dilatedMaxUV - dilatedMinUV); - outV.position = float4(coord, 0.f, 1.f); - outV.setFontGlyphUV(uv); - outV.setFontGlyphTextureId(textureID); - } - else if (objType == ObjectType::IMAGE) - { - pfloat64_t2 topLeft = vk::RawBufferLoad(drawObj.geometryAddress, 8u); - float32_t2 dirU = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); - float32_t aspectRatio = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); - uint32_t textureID = vk::RawBufferLoad(drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); - - const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; - const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); - const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); - const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); - - float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); - float2 uv = corner; // non-dilated + outV.position = float4(coord, 0.f, 1.f); + outV.setFontGlyphUV(uv); + outV.setFontGlyphTextureId(textureID); + } + else if (objType == ObjectType::STATIC_IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + // TODO[DEVSH]: make sure it's documented properly that for topLeft+dirV+aspectRatio to work it's computing dirU like below (they need to be careful with transformations when y increases when you go down in screen + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated - float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; - outV.position = float4(ndcCorner, 0.f, 1.f); - outV.setImageUV(uv); - outV.setImageTextureId(textureID); - } + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } + else if (objType == ObjectType::GRID_DTM) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + const pfloat64_t2 worldSpaceExtents = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 8u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2), 8u); + float gridCellWidth = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t), 8u); + float thicknessOfTheThickestLine = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + 2 * sizeof(pfloat64_t2) + sizeof(uint32_t) + sizeof(float), 8u); + + // TODO: remove + // test large dilation + //thicknessOfTheThickestLine += 200.0f; + + const float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + + outV.setGridDTMHeightTextureID(textureID); + outV.setGridDTMScreenSpaceCellWidth(gridCellWidth * screenToWorldRatio); + outV.setGridDTMScreenSpaceGridExtents(_static_cast(worldSpaceExtents) * screenToWorldRatio); + + static const float SquareRootOfTwo = 1.4142135f; + const pfloat64_t dilationFactor = _static_cast(SquareRootOfTwo * thicknessOfTheThickestLine); + pfloat64_t2 dilationVector; + dilationVector.x = dilationFactor; + dilationVector.y = dilationFactor; + + const pfloat64_t dilationFactorTimesTwo = dilationFactor * 2.0f; + pfloat64_t2 dilationFactorTimesTwoVector; + dilationFactorTimesTwoVector.x = dilationFactorTimesTwo; + dilationFactorTimesTwoVector.y = dilationFactorTimesTwo; + const pfloat64_t2 dilatedGridExtents = worldSpaceExtents + dilationFactorTimesTwoVector; + const float2 uvScale = _static_cast(worldSpaceExtents) / _static_cast(dilatedGridExtents); + float2 uvOffset = _static_cast(dilationVector) / _static_cast(dilatedGridExtents); + uvOffset /= uvScale; + + if (corner.x == 0.0f && corner.y == 0.0f) + { + dilationVector.x = ieee754::flipSign(dilationVector.x); + uvOffset.x = -uvOffset.x; + uvOffset.y = -uvOffset.y; + } + else if (corner.x == 0.0f && corner.y == 1.0f) + { + dilationVector.x = ieee754::flipSign(dilationVector.x); + dilationVector.y = ieee754::flipSign(dilationVector.y); + uvOffset.x = -uvOffset.x; + } + else if (corner.x == 1.0f && corner.y == 1.0f) + { + dilationVector.y = ieee754::flipSign(dilationVector.y); + } + else if (corner.x == 1.0f && corner.y == 0.0f) + { + uvOffset.y = -uvOffset.y; + } + + const float2 uv = corner + uvOffset; + outV.setImageUV(uv); + pfloat64_t2 worldSpaceExtentsYAxisFlipped; + worldSpaceExtentsYAxisFlipped.x = worldSpaceExtents.x; + worldSpaceExtentsYAxisFlipped.y = ieee754::flipSign(worldSpaceExtents.y); + const pfloat64_t2 vtxPos = topLeft + worldSpaceExtentsYAxisFlipped * _static_cast(corner); + const pfloat64_t2 dilatedVtxPos = vtxPos + dilationVector; -// Make the cage fullscreen for testing: + float2 ndcVtxPos = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, dilatedVtxPos)); + outV.position = float4(ndcVtxPos, 0.0f, 1.0f); + } + else if (objType == ObjectType::STREAMED_IMAGE) + { + pfloat64_t2 topLeft = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress, 8u); + float32_t2 dirU = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2), 4u); + float32_t aspectRatio = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2), 4u); + uint32_t textureID = vk::RawBufferLoad(globals.pointers.geometryBuffer + drawObj.geometryAddress + sizeof(pfloat64_t2) + sizeof(float2) + sizeof(float), 4u); + + const float32_t2 dirV = float32_t2(dirU.y, -dirU.x) * aspectRatio; + const float2 ndcTopLeft = _static_cast(transformPointNdc(clipProjectionData.projectionToNDC, topLeft)); + const float2 ndcDirU = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirU))); + const float2 ndcDirV = _static_cast(transformVectorNdc(clipProjectionData.projectionToNDC, _static_cast(dirV))); + + float2 corner = float2(bool2(vertexIdx & 0x1u, vertexIdx >> 1)); + float2 uv = corner; // non-dilated + + float2 ndcCorner = ndcTopLeft + corner.x * ndcDirU + corner.y * ndcDirV; + + outV.position = float4(ndcCorner, 0.f, 1.f); + outV.setImageUV(uv); + outV.setImageTextureId(textureID); + } + + // Make the cage fullscreen for testing: #if 0 - // disabled for object of POLYLINE_CONNECTOR type, since miters would cover whole screen - if(objType != ObjectType::POLYLINE_CONNECTOR) - { if (vertexIdx == 0u) outV.position = float4(-1, -1, 0, 1); else if (vertexIdx == 1u) @@ -584,9 +769,8 @@ PSInput main(uint vertexID : SV_VertexID) outV.position = float4(+1, -1, 0, 1); else if (vertexIdx == 3u) outV.position = float4(+1, +1, 0, 1); - } #endif - + } outV.clip = float4(outV.position.x - clipProjectionData.minClipNDC.x, outV.position.y - clipProjectionData.minClipNDC.y, clipProjectionData.maxClipNDC.x - outV.position.x, clipProjectionData.maxClipNDC.y - outV.position.y); return outV; } diff --git a/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl new file mode 100644 index 000000000..96647c0e7 --- /dev/null +++ b/62_CAD/shaders/runtimeDeviceConfigCaps.hlsl @@ -0,0 +1,6 @@ +#ifndef _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ +#define _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ + +#include +using DeviceConfigCaps = nbl::hlsl::jit::device_capabilities; +#endif // _RUNTIME_DEVICE_CONFIG_CAPS_HLSL_INCLUDED_ diff --git a/64_EmulatedFloatTest/main.cpp b/64_EmulatedFloatTest/main.cpp index a9ff5fde6..3fc635e87 100644 --- a/64_EmulatedFloatTest/main.cpp +++ b/64_EmulatedFloatTest/main.cpp @@ -1,35 +1,38 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h + + +#include "nbl/examples/examples.hpp" + #include #include #include #include #include -#include "nbl/application_templates/MonoDeviceApplication.hpp" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - #include "app_resources/common.hlsl" #include "app_resources/benchmark/common.hlsl" #include "nbl/builtin/hlsl/ieee754.hlsl" #include + using namespace nbl::core; using namespace nbl::hlsl; using namespace nbl::system; using namespace nbl::asset; using namespace nbl::video; using namespace nbl::application_templates; +using namespace nbl::examples; constexpr bool DoTests = true; constexpr bool DoBenchmark = true; -class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetManagerAndBuiltinResourceApplication +class CompatibilityTest final : public MonoDeviceApplication, public BuiltinResourcesApplication { using device_base_t = MonoDeviceApplication; - using asset_base_t = MonoAssetManagerAndBuiltinResourceApplication; + using asset_base_t = BuiltinResourcesApplication; public: CompatibilityTest(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) {} @@ -255,7 +258,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa // Load shaders, set up pipeline { - smart_refctd_ptr shader; + smart_refctd_ptr shader; { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); @@ -271,13 +274,13 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); + smart_refctd_ptr source = IAsset::castDown(assets[0]); auto* compilerSet = base.m_assetMgr->getCompilerSet(); nbl::asset::IShaderCompiler::SCompilerOptions options = {}; - options.stage = source->getStage(); - options.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.stage = ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; options.spirvOptimizer = nullptr; options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); @@ -286,9 +289,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa auto spirv = compilerSet->compileToSPIRV(source.get(), options); - ILogicalDevice::SShaderCreationParameters params{}; - params.cpushader = spirv.get(); - shader = base.m_device->createShader(params); + shader = base.m_device->compileShader({spirv.get()}); } if (!shader) @@ -923,7 +924,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa // Load shaders, set up pipeline { - smart_refctd_ptr shader; + smart_refctd_ptr shader; { IAssetLoader::SAssetLoadParams lp = {}; lp.logger = base.m_logger.get(); @@ -939,13 +940,13 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa // It would be super weird if loading a shader from a file produced more than 1 asset assert(assets.size() == 1); - smart_refctd_ptr source = IAsset::castDown(assets[0]); + smart_refctd_ptr source = IAsset::castDown(assets[0]); auto* compilerSet = base.m_assetMgr->getCompilerSet(); IShaderCompiler::SCompilerOptions options = {}; - options.stage = source->getStage(); - options.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.stage = ESS_COMPUTE; + options.preprocessorOptions.targetSpirvVersion = base.m_device->getPhysicalDevice()->getLimits().spirvVersion; options.spirvOptimizer = nullptr; options.debugInfoFlags |= IShaderCompiler::E_DEBUG_INFO_FLAGS::EDIF_SOURCE_BIT; options.preprocessorOptions.sourceIdentifier = source->getFilepathHint(); @@ -954,9 +955,7 @@ class CompatibilityTest final : public MonoDeviceApplication, public MonoAssetMa auto spirv = compilerSet->compileToSPIRV(source.get(), options); - ILogicalDevice::SShaderCreationParameters params{}; - params.cpushader = spirv.get(); - shader = base.m_device->createShader(params); + shader = base.m_device->compileShader({spirv.get()}); } if (!shader) diff --git a/67_RayQueryGeometry/app_resources/common.hlsl b/67_RayQueryGeometry/app_resources/common.hlsl index ecc811e3f..68a353adc 100644 --- a/67_RayQueryGeometry/app_resources/common.hlsl +++ b/67_RayQueryGeometry/app_resources/common.hlsl @@ -5,15 +5,21 @@ NBL_CONSTEXPR uint32_t WorkgroupSize = 16; +enum NormalType : uint32_t +{ + NT_R8G8B8A8_SNORM, + NT_R32G32B32_SFLOAT, +}; + // we need bitfield support in NBL_HLSL_DECLARE_STRUCT it seems struct SGeomInfo { uint64_t vertexBufferAddress; uint64_t indexBufferAddress; + uint64_t normalBufferAddress; - uint32_t vertexStride : 29; - uint32_t indexType : 2; // 16 bit, 32 bit or none - uint32_t smoothNormals : 1; // flat for cube, rectangle, disk + uint32_t normalType : 1; + uint32_t indexType : 1; // 16 bit, 32 bit }; struct SPushConstants @@ -27,20 +33,4 @@ struct SPushConstants float32_t2 offsetNDC; }; -#ifdef __HLSL_VERSION -enum ObjectType : uint32_t // matches c++ -{ - OT_CUBE = 0, - OT_SPHERE, - OT_CYLINDER, - OT_RECTANGLE, - OT_DISK, - OT_ARROW, - OT_CONE, - OT_ICOSPHERE, - - OT_COUNT -}; -#endif - #endif // RQG_COMMON_HLSL diff --git a/67_RayQueryGeometry/app_resources/render.comp.hlsl b/67_RayQueryGeometry/app_resources/render.comp.hlsl index e3d78f385..6bfde98e5 100644 --- a/67_RayQueryGeometry/app_resources/render.comp.hlsl +++ b/67_RayQueryGeometry/app_resources/render.comp.hlsl @@ -6,6 +6,7 @@ #include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" #include "nbl/builtin/hlsl/bda/__ptr.hlsl" + using namespace nbl::hlsl; [[vk::push_constant]] SPushConstants pc; @@ -13,6 +14,7 @@ using namespace nbl::hlsl; [[vk::binding(0, 0)]] RaytracingAccelerationStructure topLevelAS; [[vk::binding(1, 0)]] RWTexture2D outImage; +[[vk::constant_id(0)]] const float shader_variant = 1.0; float3 unpackNormals3x10(uint32_t v) { @@ -23,69 +25,64 @@ float3 unpackNormals3x10(uint32_t v) return clamp(float3(pn) / 511.0, -1.0, 1.0); } -float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bary) +float3 calculateNormals(int primID, SGeomInfo geom, float2 bary) { const uint indexType = geom.indexType; - const uint vertexStride = geom.vertexStride; + const uint normalType = geom.normalType; const uint64_t vertexBufferAddress = geom.vertexBufferAddress; const uint64_t indexBufferAddress = geom.indexBufferAddress; + const uint64_t normalBufferAddress = geom.normalBufferAddress; uint32_t3 indices; - switch (indexType) + if (indexBufferAddress == 0) { - case 0: // EIT_16BIT - indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); - break; - case 1: // EIT_32BIT - indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); - break; - default: // EIT_NONE + indices[0] = primID * 3; + indices[1] = indices[0] + 1; + indices[2] = indices[0] + 2; + } + else { + switch (indexType) { - indices[0] = primID * 3; - indices[1] = indices[0] + 1; - indices[2] = indices[0] + 2; + case 0: // EIT_16BIT + indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); + break; + case 1: // EIT_32BIT + indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); + break; } } + if (normalBufferAddress == 0) + { + float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12); + float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12); + float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12); + + return normalize(cross(v2 - v0, v1 - v0)); + } + float3 n0, n1, n2; - switch (instID) + switch (normalType) { - case OT_CUBE: + case NT_R8G8B8A8_SNORM: { - // TODO: document why the alignment is 2 here and nowhere else? isnt the `vertexStride` aligned to more than 2 anyway? - uint32_t v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride, 2u); - uint32_t v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride, 2u); - uint32_t v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride, 2u); + uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4); + uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4); + uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4); n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); } break; - case OT_SPHERE: - case OT_CYLINDER: - case OT_ARROW: - case OT_CONE: + case NT_R32G32B32_SFLOAT: { - uint32_t v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride); - uint32_t v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride); - uint32_t v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride); - - n0 = normalize(unpackNormals3x10(v0)); - n1 = normalize(unpackNormals3x10(v1)); - n2 = normalize(unpackNormals3x10(v2)); + n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12)); + n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12)); + n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12)); } break; - case OT_RECTANGLE: - case OT_DISK: - case OT_ICOSPHERE: - default: - { - n0 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[0] * vertexStride)); - n1 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[1] * vertexStride)); - n2 = normalize(vk::RawBufferLoad(vertexBufferAddress + indices[2] * vertexStride)); - } } float3 barycentrics = float3(0.0, bary); @@ -95,6 +92,7 @@ float3 calculateSmoothNormals(int instID, int primID, SGeomInfo geom, float2 bar } [numthreads(WorkgroupSize, WorkgroupSize, 1)] +[shader("compute")] void main(uint32_t3 threadID : SV_DispatchThreadID) { uint2 coords = threadID.xy; @@ -121,31 +119,16 @@ void main(uint32_t3 threadID : SV_DispatchThreadID) if (spirv::rayQueryGetIntersectionTypeKHR(query, true) == spv::RayQueryCommittedIntersectionTypeRayQueryCommittedIntersectionTriangleKHR) { - const int instID = spirv::rayQueryGetIntersectionInstanceIdKHR(query, true); + const int instanceCustomIndex = spirv::rayQueryGetIntersectionInstanceCustomIndexKHR(query, true); + const int geometryIndex = spirv::rayQueryGetIntersectionGeometryIndexKHR(query, true); const int primID = spirv::rayQueryGetIntersectionPrimitiveIndexKHR(query, true); // TODO: candidate for `bda::__ptr` - const SGeomInfo geom = vk::RawBufferLoad(pc.geometryInfoBuffer + instID * sizeof(SGeomInfo)); - + const SGeomInfo geom = vk::RawBufferLoad(pc.geometryInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(SGeomInfo), 8); + float3 normals; - if (jit::device_capabilities::rayTracingPositionFetch) - { - if (geom.smoothNormals) - { - float2 barycentrics = spirv::rayQueryGetIntersectionBarycentricsKHR(query, true); - normals = calculateSmoothNormals(instID, primID, geom, barycentrics); - } - else - { - float3 pos[3] = spirv::rayQueryGetIntersectionTriangleVertexPositionsKHR(query, true); - normals = cross(pos[1] - pos[0], pos[2] - pos[0]); - } - } - else - { - float2 barycentrics = spirv::rayQueryGetIntersectionBarycentricsKHR(query, true); - normals = calculateSmoothNormals(instID, primID, geom, barycentrics); - } + float2 barycentrics = spirv::rayQueryGetIntersectionBarycentricsKHR(query, true); + normals = calculateNormals(primID, geom, barycentrics); normals = normalize(normals) * 0.5 + 0.5; color = float4(normals, 1.0); diff --git a/67_RayQueryGeometry/include/common.hpp b/67_RayQueryGeometry/include/common.hpp index 0595c7203..84b0a3dcf 100644 --- a/67_RayQueryGeometry/include/common.hpp +++ b/67_RayQueryGeometry/include/common.hpp @@ -1,95 +1,34 @@ -#ifndef __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ -#define __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ -#include -#include "nbl/asset/utils/CGeometryCreator.h" -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" - -#include "SimpleWindowedApplication.hpp" - -#include "InputSystem.hpp" -#include "CEventCallback.hpp" - -#include "CCamera.hpp" - -#include -#include +#include "nbl/examples/examples.hpp" using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; -using namespace scene; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::application_templates; +using namespace nbl::examples; #include "app_resources/common.hlsl" namespace nbl::scene { -enum ObjectType : uint8_t -{ - OT_CUBE, - OT_SPHERE, - OT_CYLINDER, - OT_RECTANGLE, - OT_DISK, - OT_ARROW, - OT_CONE, - OT_ICOSPHERE, - - OT_COUNT, - OT_UNKNOWN = std::numeric_limits::max() -}; - -struct ObjectMeta -{ - ObjectType type = OT_UNKNOWN; - std::string_view name = "Unknown"; -}; - -struct ObjectDrawHookCpu -{ - nbl::core::matrix3x4SIMD model; - nbl::asset::SBasicViewParameters viewParameters; - ObjectMeta meta; -}; - -enum GeometryShader -{ - GP_BASIC = 0, - GP_CONE, - GP_ICO, - - GP_COUNT -}; +using PolygonGeometryData = core::smart_refctd_ptr; +using GeometryCollectionData = core::smart_refctd_ptr; +using GeometryData = std::variant; struct ReferenceObjectCpu { - ObjectMeta meta; - GeometryShader shadersType; - nbl::asset::CGeometryCreator::return_type data; + core::matrix3x4SIMD transform; + GeometryData data; + uint32_t instanceID; }; -struct ReferenceObjectGpu -{ - struct Bindings - { - nbl::asset::SBufferBinding vertex, index; - }; - - ObjectMeta meta; - Bindings bindings; - uint32_t vertexStride; - nbl::asset::E_INDEX_TYPE indexType = nbl::asset::E_INDEX_TYPE::EIT_UNKNOWN; - uint32_t indexCount = {}; - - const bool useIndex() const - { - return bindings.index.buffer && (indexType != E_INDEX_TYPE::EIT_UNKNOWN); - } -}; } -#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ \ No newline at end of file + +#endif // _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ \ No newline at end of file diff --git a/67_RayQueryGeometry/main.cpp b/67_RayQueryGeometry/main.cpp index dab137cbd..0d36ca368 100644 --- a/67_RayQueryGeometry/main.cpp +++ b/67_RayQueryGeometry/main.cpp @@ -1,13 +1,12 @@ // Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h - #include "common.hpp" -class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class RayQueryGeometryApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; @@ -122,15 +121,11 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu return logFail("Could not create HDR Image"); auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); - auto* geometryCreator = assetManager->getGeometryCreator(); auto cQueue = getComputeQueue(); - // create geometry objects - if (!createGeometries(gQueue, geometryCreator)) - return logFail("Could not create geometries from geometry creator"); - // create blas/tlas + renderDs = //#define TRY_BUILD_FOR_NGFX // Validation errors on the fake Acquire-Presents, TODO fix #ifdef TRY_BUILD_FOR_NGFX // Nsight is special and can't do debugger delay so you can debug your CPU stuff during a capture @@ -142,11 +137,12 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu std::this_thread::yield(); } // Nsight is special and can't capture anything not on the queue that performs the swapchain acquire/release - if (!createAccelerationStructures(gQueue)) + createAccelerationStructureDS(gQueue); #else - if (!createAccelerationStructures(cQueue)) + createAccelerationStructureDS(cQueue); #endif - return logFail("Could not create acceleration structures"); + if (!renderDs) + return logFail("Could not create acceleration structures and descriptor set"); // create pipelines { @@ -164,67 +160,38 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - shaderSrc->setShaderStage(IShader::E_SHADER_STAGE::ESS_COMPUTE); - auto shader = m_device->createShader(shaderSrc.get()); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + auto shader = m_device->compileShader({shaderSrc.get()}); if (!shader) return logFail("Failed to create shader!"); - // descriptors - IGPUDescriptorSetLayout::SBinding bindings[] = { - { - .binding = 0, - .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - }, - { - .binding = 1, - .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, - .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE, - .count = 1, - } - }; - auto descriptorSetLayout = m_device->createDescriptorSetLayout(bindings); - - const std::array dsLayoutPtrs = { descriptorSetLayout.get() }; - renderPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); - if (!renderPool) - return logFail("Could not create descriptor pool"); - renderDs = renderPool->createDescriptorSet(descriptorSetLayout); - if (!renderDs) - return logFail("Could not create descriptor set"); - SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_COMPUTE, .offset = 0u, .size = sizeof(SPushConstants)}; - auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr(descriptorSetLayout), nullptr, nullptr, nullptr); + auto pipelineLayout = m_device->createPipelineLayout({ &pcRange, 1 }, smart_refctd_ptr(renderDs->getLayout()), nullptr, nullptr, nullptr); IGPUComputePipeline::SCreationParams params = {}; params.layout = pipelineLayout.get(); params.shader.shader = shader.get(); + params.shader.entryPoint = "main"; if (!m_device->createComputePipelines(nullptr, { ¶ms, 1 }, &renderPipeline)) return logFail("Failed to create compute pipeline"); } // write descriptors - IGPUDescriptorSet::SDescriptorInfo infos[2]; - infos[0].desc = gpuTlas; - infos[1].desc = m_device->createImageView({ - .flags = IGPUImageView::ECF_NONE, - .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT, - .image = outHDRImage, - .viewType = IGPUImageView::E_TYPE::ET_2D, - .format = asset::EF_R16G16B16A16_SFLOAT - }); - if (!infos[1].desc) - return logFail("Failed to create image view"); - infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; - IGPUDescriptorSet::SWriteDescriptorSet writes[3] = { - {.dstSet = renderDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, - {.dstSet = renderDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]} - }; - m_device->updateDescriptorSets(std::span(writes, 2), {}); + { + IGPUDescriptorSet::SDescriptorInfo info = {}; + info.desc = m_device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT, + .image = outHDRImage, + .viewType = IGPUImageView::E_TYPE::ET_2D, + .format = asset::EF_R16G16B16A16_SFLOAT + }); + if (!info.desc) + return logFail("Failed to create image view"); + info.info.image.imageLayout = IImage::LAYOUT::GENERAL; + const IGPUDescriptorSet::SWriteDescriptorSet write = {.dstSet=renderDs.get(), .binding=1, .arrayElement=0, .count=1, .info=&info}; + m_device->updateDescriptorSets({&write,1}, {}); + } // camera { @@ -281,7 +248,6 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu static bool first = true; if (first) { - m_api->startCapture(); first = false; } @@ -291,11 +257,9 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu cmdbuf->beginDebugMarker("RayQueryGeometryApp Frame"); { camera.beginInputProcessing(nextPresentationTimestamp); - mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); mouseProcess(events); }, m_logger.get()); + mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, m_logger.get()); keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, m_logger.get()); camera.endInputProcessing(nextPresentationTimestamp); - - const auto type = static_cast(gcIndex); } const auto viewMatrix = camera.getViewMatrix(); @@ -315,11 +279,11 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS } }; imageBarriers[0].image = outHDRImage.get(); @@ -355,11 +319,11 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[2]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, - .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT } }; imageBarriers[0].image = outHDRImage.get(); @@ -374,11 +338,11 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu imageBarriers[0].newLayout = IImage::LAYOUT::TRANSFER_SRC_OPTIMAL; imageBarriers[1].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, - .srcAccessMask = ACCESS_FLAGS::NONE, - .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .dstAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT } }; imageBarriers[1].image = m_surface->getSwapchainResources()->getImage(m_currentImageAcquire.imageIndex); @@ -420,11 +384,11 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu { IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; imageBarriers[0].barrier = { - .dep = { - .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, - .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, - .dstStageMask = PIPELINE_STAGE_FLAGS::NONE, - .dstAccessMask = ACCESS_FLAGS::NONE + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::BLIT_BIT, + .srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = PIPELINE_STAGE_FLAGS::NONE, + .dstAccessMask = ACCESS_FLAGS::NONE } }; imageBarriers[0].image = m_surface->getSwapchainResources()->getImage(m_currentImageAcquire.imageIndex); @@ -520,249 +484,276 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu return (dim + size - 1) / size; } - smart_refctd_ptr createBuffer(IGPUBuffer::SCreationParams& params) - { - smart_refctd_ptr buffer; - buffer = m_device->createBuffer(std::move(params)); - auto bufReqs = buffer->getMemoryReqs(); - bufReqs.memoryTypeBits &= m_physicalDevice->getDeviceLocalMemoryTypeBits(); - m_device->allocate(bufReqs, buffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); - - return buffer; - } - - smart_refctd_ptr getSingleUseCommandBufferAndBegin(smart_refctd_ptr pool) + smart_refctd_ptr createAccelerationStructureDS(video::CThreadSafeQueueAdapter* queue) { - smart_refctd_ptr cmdbuf; - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, 1u, &cmdbuf)) - return nullptr; + using namespace nbl::scene; - cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + // triangles geometries + auto gc = make_smart_refctd_ptr(); - return cmdbuf; - } + auto transform_i = 0; + auto nextTransform = [&transform_i]() + { + core::matrix3x4SIMD transform; + transform.setTranslation(nbl::core::vectorSIMDf(5.f * transform_i, 0, 0, 0)); + transform_i++; + return transform; + }; - void cmdbufSubmitAndWait(smart_refctd_ptr cmdbuf, CThreadSafeQueueAdapter* queue, uint64_t startValue) - { - cmdbuf->end(); + std::vector cpuObjects; + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createArrow() }); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = CPolygonGeometryManipulator::createTriangleListIndexing(gc->createDisk(1.0f, 12).get()) }); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createCube({1.f, 1.f, 1.f})}); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createSphere(2, 16, 16)}); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createCylinder(2, 2, 20)}); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createRectangle({1.5, 3})}); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createCone(2, 3, 10)}); + cpuObjects.push_back(ReferenceObjectCpu{ .transform = nextTransform(), .data = gc->createIcoSphere(1, 3, true)}); + + const auto geometryCount = [&cpuObjects] + { + size_t count = 0; + for (auto& cpuObject: cpuObjects) + { + const auto data = cpuObject.data; + cpuObject.instanceID = count; + if (std::holds_alternative(data)) + { + count += 1; + } else if (std::holds_alternative(data)) + { + const auto colData = std::get(data); + count += colData->getGeometries()->size(); + } + } + return count; + }(); + + auto geomInfoBuffer = ICPUBuffer::create({ geometryCount * sizeof(SGeomInfo) }); - uint64_t finishedValue = startValue + 1; + SGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); - // submit builds + // get ICPUBuffers into ICPUBottomLevelAccelerationStructures + std::vector> cpuBlas(cpuObjects.size()); + for (uint32_t blas_i = 0; blas_i < cpuBlas.size(); blas_i++) { - auto completed = m_device->createSemaphore(startValue); - - std::array signals; + auto& blas = cpuBlas[blas_i]; + blas = make_smart_refctd_ptr(); + if (std::holds_alternative(cpuObjects[blas_i].data)) { - auto& signal = signals.front(); - signal.value = finishedValue; - signal.stageMask = bitflag(PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); - signal.semaphore = completed.get(); - } + const auto data = std::get(cpuObjects[blas_i].data); - const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[1] = { { - .cmdbuf = cmdbuf.get() - } }; + auto triangles = make_refctd_dynamic_array>>(1u); + auto primitiveCounts = make_refctd_dynamic_array>(1u); - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = {}, - .commandBuffers = commandBuffers, - .signalSemaphores = signals - } - }; + auto& tri = triangles->front(); - if (queue->submit(infos) != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to submit geometry transfer upload operations!", ILogger::ELL_ERROR); - return; - } + auto& primCount = primitiveCounts->front(); + primCount = data->getPrimitiveCount(); - const ISemaphore::SWaitInfo info[] = - { { - .semaphore = completed.get(), - .value = finishedValue - } }; + tri = data->exportForBLAS(); - m_device->blockForSemaphores(info); - } - } + blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); - bool createGeometries(video::CThreadSafeQueueAdapter* queue, const IGeometryCreator* gc) - { - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - if (!pool) - return logFail("Couldn't create Command Pool for geometry creation!"); - - std::array objectsCpu; - objectsCpu[OT_CUBE] = ReferenceObjectCpu{ .meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }; - objectsCpu[OT_SPHERE] = ReferenceObjectCpu{ .meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }; - objectsCpu[OT_CYLINDER] = ReferenceObjectCpu{ .meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }; - objectsCpu[OT_RECTANGLE] = ReferenceObjectCpu{ .meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }; - objectsCpu[OT_DISK] = ReferenceObjectCpu{ .meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }; - objectsCpu[OT_ARROW] = ReferenceObjectCpu{ .meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }; - objectsCpu[OT_CONE] = ReferenceObjectCpu{ .meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }; - objectsCpu[OT_ICOSPHERE] = ReferenceObjectCpu{ .meta = {.type = OT_ICOSPHERE, .name = "Icosphere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) }; - - struct ScratchVIBindings - { - nbl::asset::SBufferBinding vertex, index; - }; - std::array scratchBuffers; - //std::array geomInfos; - auto geomInfoBuffer = ICPUBuffer::create({ OT_COUNT * sizeof(SGeomInfo) }); - - SGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); - const uint32_t byteOffsets[OT_COUNT] = { 18, 24, 24, 20, 20, 24, 16, 12 }; // based on normals data position - const uint32_t smoothNormals[OT_COUNT] = { 0, 1, 1, 0, 0, 1, 1, 1 }; - - for (uint32_t i = 0; i < objectsCpu.size(); i++) - { - const auto& geom = objectsCpu[i]; - auto& obj = objectsGpu[i]; - auto& scratchObj = scratchBuffers[i]; + + } else if (std::holds_alternative(cpuObjects[blas_i].data)) + { + + const auto data = std::get(cpuObjects[blas_i].data); - obj.meta.name = geom.meta.name; - obj.meta.type = geom.meta.type; + const auto& geometries = *data->getGeometries(); + const auto geometryCount = geometries.size(); - obj.indexCount = geom.data.indexCount; - obj.indexType = geom.data.indexType; - obj.vertexStride = geom.data.inputParams.bindings[0].stride; + auto triangles = make_refctd_dynamic_array>>(geometryCount); + auto primitiveCounts = make_refctd_dynamic_array>(geometryCount); - geomInfos[i].indexType = obj.indexType; - geomInfos[i].vertexStride = obj.vertexStride; - geomInfos[i].smoothNormals = smoothNormals[i]; + for (auto geometry_i = 0u; geometry_i < geometryCount; geometry_i++) + { + const auto& geometry = geometries[geometry_i]; + const auto* polyGeo = static_cast(geometry.geometry.get()); + primitiveCounts->operator[](geometry_i) = polyGeo->getPrimitiveCount(); + auto& triangle = triangles->operator[](geometry_i); + triangle = polyGeo->exportForBLAS(); + if (geometry.hasTransform()) + triangle.transform = geometry.transform; + } - auto vBuffer = smart_refctd_ptr(geom.data.bindings[0].buffer); // no offset - auto vUsage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | - IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - obj.bindings.vertex.offset = 0u; + blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); - auto iBuffer = smart_refctd_ptr(geom.data.indexBuffer.buffer); // no offset - auto iUsage = bitflag(IGPUBuffer::EUF_STORAGE_BUFFER_BIT) | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | - IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - obj.bindings.index.offset = 0u; + } + auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; + if (m_physicalDevice->getProperties().limits.rayTracingPositionFetch) + blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_DATA_ACCESS; - vBuffer->addUsageFlags(vUsage); - vBuffer->setContentHash(vBuffer->computeContentHash()); - scratchObj.vertex = { .offset = 0, .buffer = vBuffer }; + blas->setBuildFlags(blasFlags); + blas->setContentHash(blas->computeContentHash()); + } - if (geom.data.indexType != EIT_UNKNOWN) - if (iBuffer) - { - iBuffer->addUsageFlags(iUsage); - iBuffer->setContentHash(iBuffer->computeContentHash()); - } - scratchObj.index = { .offset = 0, .buffer = iBuffer }; + // get ICPUBottomLevelAccelerationStructure into ICPUTopLevelAccelerationStructure + auto geomInstances = make_refctd_dynamic_array>(cpuObjects.size()); + { + uint32_t i = 0; + for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) + { + ICPUTopLevelAccelerationStructure::StaticInstance inst; + inst.base.blas = cpuBlas[i]; + inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); + inst.base.instanceCustomIndex = cpuObjects[i].instanceID; + inst.base.instanceShaderBindingTableRecordOffset = 0; + inst.base.mask = 0xFF; + inst.transform = cpuObjects[i].transform; + instance->instance = inst; + } } - auto cmdbuf = getSingleUseCommandBufferAndBegin(pool); - cmdbuf->beginDebugMarker("Build geometry vertex and index buffers"); + auto cpuTlas = make_smart_refctd_ptr(); + cpuTlas->setInstances(std::move(geomInstances)); + cpuTlas->setBuildFlags(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); + + // descriptor set and layout + ICPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = IDescriptorSetLayoutBase::SBindingBase::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IDescriptorSetLayoutBase::SBindingBase::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_COMPUTE, + .count = 1, + } + }; + auto descriptorSet = core::make_smart_refctd_ptr(core::make_smart_refctd_ptr(bindings)); + descriptorSet->getDescriptorInfos(IDescriptorSetLayoutBase::CBindingRedirect::binding_number_t{0},IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE).front().desc = cpuTlas; +//#define TEST_REBAR_FALLBACK + // convert with asset converter smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); - CAssetConverter::SInputs inputs = {}; - inputs.logger = m_logger.get(); - - std::array tmpBuffers; + struct MyInputs : CAssetConverter::SInputs { - for (uint32_t i = 0; i < objectsCpu.size(); i++) +#ifndef TEST_REBAR_FALLBACK + inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override { - tmpBuffers[2 * i + 0] = scratchBuffers[i].vertex.buffer.get(); - tmpBuffers[2 * i + 1] = scratchBuffers[i].index.buffer.get(); + assert(memoryBacked); + return memoryBacked->getObjectType()!=IDeviceMemoryBacked::EOT_BUFFER ? (~0u):rebarMemoryTypes; } +#endif + uint32_t rebarMemoryTypes; + } inputs = {}; + inputs.logger = m_logger.get(); + inputs.rebarMemoryTypes = m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); +#ifndef TEST_REBAR_FALLBACK + struct MyAllocator final : public IDeviceMemoryAllocator + { + ILogicalDevice* getDeviceForAllocations() const override {return device;} - std::get>(inputs.assets) = tmpBuffers; - } + SAllocation allocate(const SAllocateInfo& info) override + { + auto retval = device->allocate(info); + // map what is mappable by default so ReBAR checks succeed + if (retval.isValid() && retval.memory->isMappable()) + retval.memory->map({.offset=0,.length=info.size}); + return retval; + } - auto reservation = converter->reserve(inputs); + ILogicalDevice* device; + } myalloc; + myalloc.device = m_device.get(); + inputs.allocator = &myalloc; +#endif + + CAssetConverter::patch_t tlasPatch = {}; + tlasPatch.compactAfterBuild = true; + std::vector> tmpBLASPatches(cpuObjects.size()); + std::vector tmpGeometries; + tmpGeometries.reserve(geometryCount); + std::vector> tmpGeometryPatches; + tmpGeometryPatches.reserve(geometryCount); { - auto prepass = [&](const auto & references) -> bool + tmpBLASPatches.front().compactAfterBuild = true; + std::fill(tmpBLASPatches.begin(),tmpBLASPatches.end(),tmpBLASPatches.front()); + // + for (uint32_t i = 0; i < cpuObjects.size(); i++) { - auto objects = reservation.getGPUObjects(); - uint32_t counter = {}; - for (auto& object : objects) + const auto data = cpuObjects[i].data; + if (std::holds_alternative(data)) { - auto gpu = object.value; - auto* reference = references[counter]; - - if (reference) + const auto polygonData = std::get(data); + tmpGeometries.push_back(polygonData.get()); + tmpGeometryPatches.push_back({}); + tmpGeometryPatches.back().indexBufferUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + } else if (std::holds_alternative(data)) + { + const auto collectionData = std::get(data); + for (const auto& geometryRef : *collectionData->getGeometries()) { - if (!gpu) - { - m_logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); - return false; - } + auto* polyGeo = static_cast(geometryRef.geometry.get()); + tmpGeometries.push_back(polyGeo); + tmpGeometryPatches.push_back({}); + tmpGeometryPatches.back().indexBufferUsages = IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; } - counter++; } - return true; - }; - - prepass.template operator() < ICPUBuffer > (tmpBuffers); + } + assert(tmpGeometries.size() == geometryCount); + assert(tmpGeometryPatches.size() == geometryCount); + + std::get>(inputs.assets) = {&descriptorSet.get(),1}; + std::get>(inputs.assets) = {&cpuTlas.get(),1}; + std::get>(inputs.patches) = {&tlasPatch,1}; + std::get>(inputs.assets) = {&cpuBlas.data()->get(),cpuBlas.size()}; + std::get>(inputs.patches) = tmpBLASPatches; + std::get>(inputs.assets) = tmpGeometries; + std::get>(inputs.patches) = tmpGeometryPatches; } - // not sure if need this (probably not, originally for transition img view) - auto semaphore = m_device->createSemaphore(0u); - - std::array cmdbufs = {}; - cmdbufs.front().cmdbuf = cmdbuf.get(); + auto reservation = converter->reserve(inputs); + constexpr auto XferBufferCount = 2; + std::array,XferBufferCount> xferBufs = {}; + std::array xferBufInfos = {}; + { + auto pool = m_device->createCommandPool(getTransferUpQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,xferBufs); + xferBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i=0; icreateSemaphore(0u); + xferSema->setObjectDebugName("Transfer Semaphore"); SIntendedSubmitInfo transfer = {}; - transfer.queue = queue; - transfer.scratchCommandBuffers = cmdbufs; + transfer.queue = getTransferUpQueue(); + transfer.scratchCommandBuffers = xferBufInfos; transfer.scratchSemaphore = { - .semaphore = semaphore.get(), + .semaphore = xferSema.get(), .value = 0u, .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS }; - // convert - { - CAssetConverter::SConvertParams params = {}; - params.utilities = m_utils.get(); - params.transfer = &transfer; - - auto future = reservation.convert(params); - if (future.copy() != IQueue::RESULT::SUCCESS) - { - m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - - // assign gpu objects to output - auto&& buffers = reservation.getGPUObjects(); - for (uint32_t i = 0; i < objectsCpu.size(); i++) - { - auto& obj = objectsGpu[i]; - obj.bindings.vertex = { .offset = 0, .buffer = buffers[2 * i + 0].value }; - obj.bindings.index = { .offset = 0, .buffer = buffers[2 * i + 1].value }; - - geomInfos[i].vertexBufferAddress = obj.bindings.vertex.buffer->getDeviceAddress() + byteOffsets[i]; - geomInfos[i].indexBufferAddress = obj.useIndex() ? obj.bindings.index.buffer->getDeviceAddress() : geomInfos[i].vertexBufferAddress; - } - } - + + constexpr auto CompBufferCount = 2; + std::array,CompBufferCount> compBufs = {}; + std::array compBufInfos = {}; { - IGPUBuffer::SCreationParams params; - params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = OT_COUNT * sizeof(SGeomInfo); - m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{.queue = queue}, std::move(params), geomInfos).move_into(geometryInfoBuffer); + auto pool = m_device->createCommandPool(getComputeQueue()->getFamilyIndex(),IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,compBufs); + compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i=0; i queryPool = m_device->createQueryPool(std::move(qParams)); - - auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); - if (!pool) - return logFail("Couldn't create Command Pool for blas/tlas creation!"); - - m_api->startCapture(); + auto compSema = m_device->createSemaphore(0u); + compSema->setObjectDebugName("Compute Semaphore"); + SIntendedSubmitInfo compute = {}; + compute.queue = getComputeQueue(); + compute.scratchCommandBuffers = compBufInfos; + compute.scratchSemaphore = { + .semaphore = compSema.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT|PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT + }; + // convert #ifdef TRY_BUILD_FOR_NGFX // NSight is "debugger-challenged" it can't capture anything not happenning "during a frame", so we need to trick it m_currentImageAcquire = m_surface->acquireNextImage(); { @@ -775,273 +766,187 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu } m_currentImageAcquire = m_surface->acquireNextImage(); #endif - size_t totalScratchSize = 0; - const auto scratchOffsetAlignment = m_device->getPhysicalDevice()->getLimits().minAccelerationStructureScratchOffsetAlignment; - - // build bottom level ASes + m_api->startCapture(); + auto gQueue = getGraphicsQueue(); { - IGPUBottomLevelAccelerationStructure::DeviceBuildInfo blasBuildInfos[OT_COUNT]; - uint32_t primitiveCounts[OT_COUNT]; - IGPUBottomLevelAccelerationStructure::Triangles triangles[OT_COUNT]; - uint32_t scratchSizes[OT_COUNT]; - - for (uint32_t i = 0; i < objectsGpu.size(); i++) + smart_refctd_ptr scratchAlloc; { - const auto& obj = objectsGpu[i]; - - const uint32_t vertexStride = obj.vertexStride; - const uint32_t numVertices = obj.bindings.vertex.buffer->getSize() / vertexStride; - if (obj.useIndex()) - primitiveCounts[i] = obj.indexCount / 3; - else - primitiveCounts[i] = numVertices / 3; - - triangles[i].vertexData[0] = obj.bindings.vertex; - triangles[i].indexData = obj.useIndex() ? obj.bindings.index : obj.bindings.vertex; - triangles[i].maxVertex = numVertices - 1; - triangles[i].vertexStride = vertexStride; - triangles[i].vertexFormat = EF_R32G32B32_SFLOAT; - triangles[i].indexType = obj.indexType; - triangles[i].geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; - - auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; - if (m_physicalDevice->getProperties().limits.rayTracingPositionFetch) - blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_DATA_ACCESS_KHR; - - blasBuildInfos[i].buildFlags = blasFlags; - blasBuildInfos[i].geometryCount = 1; // only 1 geometry object per blas - blasBuildInfos[i].srcAS = nullptr; - blasBuildInfos[i].dstAS = nullptr; - blasBuildInfos[i].triangles = &triangles[i]; - blasBuildInfos[i].scratch = {}; - - ILogicalDevice::AccelerationStructureBuildSizes buildSizes; - { - const uint32_t maxPrimCount[1] = { primitiveCounts[i] }; - buildSizes = m_device->getAccelerationStructureBuildSizes(blasFlags, false, std::span{&triangles[i], 1}, maxPrimCount); - if (!buildSizes) - return logFail("Failed to get BLAS build sizes"); - } - - scratchSizes[i] = buildSizes.buildScratchSize; - totalScratchSize = core::alignUp(totalScratchSize, scratchOffsetAlignment); - totalScratchSize += buildSizes.buildScratchSize; - - { - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; - params.size = buildSizes.accelerationStructureSize; - smart_refctd_ptr asBuffer = createBuffer(params); - - IGPUBottomLevelAccelerationStructure::SCreationParams blasParams; - blasParams.bufferRange.buffer = asBuffer; - blasParams.bufferRange.offset = 0u; - blasParams.bufferRange.size = buildSizes.accelerationStructureSize; - blasParams.flags = IGPUBottomLevelAccelerationStructure::SCreationParams::FLAGS::NONE; - gpuBlas[i] = m_device->createBottomLevelAccelerationStructure(std::move(blasParams)); - if (!gpuBlas[i]) - return logFail("Could not create BLAS"); - } - } - - auto cmdbufBlas = getSingleUseCommandBufferAndBegin(pool); - cmdbufBlas->beginDebugMarker("Build BLAS"); + constexpr auto MaxAlignment = 256; + constexpr auto MinAllocationSize = 1024; + const auto scratchSize = core::alignUp(reservation.getMinASBuildScratchSize(false),MaxAlignment); + + + IGPUBuffer::SCreationParams creationParams = {}; + creationParams.size = scratchSize; + creationParams.usage = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT|IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT|IGPUBuffer::EUF_STORAGE_BUFFER_BIT; +#ifdef TEST_REBAR_FALLBACK + creationParams.usage |= IGPUBuffer::EUF_TRANSFER_DST_BIT; + core::unordered_set sharingSet = {compute.queue->getFamilyIndex(),transfer.queue->getFamilyIndex()}; + core::vector sharingIndices(sharingSet.begin(),sharingSet.end()); + if (sharingIndices.size()>1) + creationParams.queueFamilyIndexCount = sharingIndices.size(); + creationParams.queueFamilyIndices = sharingIndices.data(); +#endif + auto scratchBuffer = m_device->createBuffer(std::move(creationParams)); - cmdbufBlas->resetQueryPool(queryPool.get(), 0, objectsGpu.size()); + auto reqs = scratchBuffer->getMemoryReqs(); +#ifndef TEST_REBAR_FALLBACK + reqs.memoryTypeBits &= m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); +#endif + auto allocation = m_device->allocate(reqs,scratchBuffer.get(),IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); +#ifndef TEST_REBAR_FALLBACK + allocation.memory->map({.offset=0,.length=reqs.size}); +#endif - smart_refctd_ptr scratchBuffer; - { - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - params.size = totalScratchSize; - scratchBuffer = createBuffer(params); + scratchAlloc = make_smart_refctd_ptr( + SBufferRange{0ull,scratchSize,std::move(scratchBuffer)}, + core::allocator(),MaxAlignment,MinAllocationSize + ); } - uint32_t queryCount = 0; - IGPUBottomLevelAccelerationStructure::BuildRangeInfo buildRangeInfos[OT_COUNT]; - IGPUBottomLevelAccelerationStructure::BuildRangeInfo* pRangeInfos[OT_COUNT]; - for (uint32_t i = 0; i < objectsGpu.size(); i++) + struct MyParams final : CAssetConverter::SConvertParams { - blasBuildInfos[i].dstAS = gpuBlas[i].get(); - blasBuildInfos[i].scratch.buffer = scratchBuffer; - if (i == 0) + inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override { - blasBuildInfos[i].scratch.offset = 0u; + return finalUser; } - else + inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override { - const auto unalignedOffset = blasBuildInfos[i - 1].scratch.offset + scratchSizes[i - 1]; - blasBuildInfos[i].scratch.offset = core::alignUp(unalignedOffset, scratchOffsetAlignment); + return finalUser; } - buildRangeInfos[i].primitiveCount = primitiveCounts[i]; - buildRangeInfos[i].primitiveByteOffset = 0u; - buildRangeInfos[i].firstVertex = 0u; - buildRangeInfos[i].transformByteOffset = 0u; - - pRangeInfos[i] = &buildRangeInfos[i]; - } - - if (!cmdbufBlas->buildAccelerationStructures({ blasBuildInfos, OT_COUNT }, pRangeInfos)) - return logFail("Failed to build BLAS"); + uint8_t finalUser; + } params = {}; + params.utilities = m_utils.get(); + params.transfer = &transfer; + params.compute = &compute; + params.scratchForDeviceASBuild = scratchAlloc.get(); + params.finalUser = gQueue->getFamilyIndex(); + auto future = reservation.convert(params); + if (future.copy() != IQueue::RESULT::SUCCESS) { - SMemoryBarrier memBarrier; - memBarrier.srcStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - memBarrier.srcAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; - memBarrier.dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - memBarrier.dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_READ_BIT; - cmdbufBlas->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {&memBarrier, 1} }); + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return {}; } - const IGPUAccelerationStructure* ases[OT_COUNT]; - for (uint32_t i = 0; i < objectsGpu.size(); i++) - ases[i] = gpuBlas[i].get(); - if (!cmdbufBlas->writeAccelerationStructureProperties({ ases, OT_COUNT }, IQueryPool::ACCELERATION_STRUCTURE_COMPACTED_SIZE, - queryPool.get(), queryCount++)) - return logFail("Failed to write acceleration structure properties!"); - - cmdbufBlas->endDebugMarker(); - cmdbufSubmitAndWait(cmdbufBlas, queue, 39); - } + auto&& tlases = reservation.getGPUObjects(); + m_gpuTlas = tlases[0].value; - auto cmdbufCompact = getSingleUseCommandBufferAndBegin(pool); - cmdbufCompact->beginDebugMarker("Compact BLAS"); - - // compact blas - { - std::array asSizes{ 0 }; - if (!m_device->getQueryPoolResults(queryPool.get(), 0, objectsGpu.size(), asSizes.data(), sizeof(size_t), IQueryPool::WAIT_BIT)) - return logFail("Could not get query pool results for AS sizes"); + auto&& gpuPolygonGeometries = reservation.getGPUObjects(); + m_gpuPolygons.resize(gpuPolygonGeometries.size()); - std::array, OT_COUNT> cleanupBlas; - for (uint32_t i = 0; i < objectsGpu.size(); i++) + // assign gpu objects to output + for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) { - cleanupBlas[i] = gpuBlas[i]; - { - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; - params.size = asSizes[i]; - smart_refctd_ptr asBuffer = createBuffer(params); - - IGPUBottomLevelAccelerationStructure::SCreationParams blasParams; - blasParams.bufferRange.buffer = asBuffer; - blasParams.bufferRange.offset = 0u; - blasParams.bufferRange.size = asSizes[i]; - blasParams.flags = IGPUBottomLevelAccelerationStructure::SCreationParams::FLAGS::NONE; - gpuBlas[i] = m_device->createBottomLevelAccelerationStructure(std::move(blasParams)); - if (!gpuBlas[i]) - return logFail("Could not create compacted BLAS"); - } - - IGPUBottomLevelAccelerationStructure::CopyInfo copyInfo; - copyInfo.src = cleanupBlas[i].get(); - copyInfo.dst = gpuBlas[i].get(); - copyInfo.mode = IGPUBottomLevelAccelerationStructure::COPY_MODE::COMPACT; - if (!cmdbufCompact->copyAccelerationStructure(copyInfo)) - return logFail("Failed to copy AS to compact"); + const auto& gpuPolygon = gpuPolygonGeometries[i].value; + const auto gpuTriangles = gpuPolygon->exportForBLAS(); + + const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; + const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; + + const auto& normalView = gpuPolygon->getNormalView(); + const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; + + auto normalType = NT_R32G32B32_SFLOAT; + if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) + normalType = NT_R8G8B8A8_SNORM; + + const auto& indexBufferBinding = gpuTriangles.indexData; + auto& geomInfo = geomInfos[i]; + geomInfo = { + .vertexBufferAddress = vertexBufferAddress, + .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, + .normalBufferAddress = normalBufferAddress, + .normalType = normalType, + .indexType = gpuTriangles.indexType, + }; + + m_gpuPolygons[i] = gpuPolygon; } } - cmdbufCompact->endDebugMarker(); - cmdbufSubmitAndWait(cmdbufCompact, queue, 40); - - auto cmdbufTlas = getSingleUseCommandBufferAndBegin(pool); - cmdbufTlas->beginDebugMarker("Build TLAS"); - - // build top level AS + // { - const uint32_t instancesCount = objectsGpu.size(); - IGPUTopLevelAccelerationStructure::DeviceStaticInstance instances[OT_COUNT]; - for (uint32_t i = 0; i < instancesCount; i++) - { - core::matrix3x4SIMD transform; - transform.setTranslation(nbl::core::vectorSIMDf(5.f * i, 0, 0, 0)); - instances[i].base.blas.deviceAddress = gpuBlas[i]->getReferenceForDeviceOperations().deviceAddress; - instances[i].base.mask = 0xFF; - instances[i].base.instanceCustomIndex = i; - instances[i].base.instanceShaderBindingTableRecordOffset = 0; - instances[i].base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); - instances[i].transform = transform; - } - - { - size_t bufSize = instancesCount * sizeof(IGPUTopLevelAccelerationStructure::DeviceStaticInstance); - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT) | IGPUBuffer::EUF_STORAGE_BUFFER_BIT | - IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; - params.size = bufSize; - instancesBuffer = createBuffer(params); - - SBufferRange range = { .offset = 0u, .size = bufSize, .buffer = instancesBuffer }; - cmdbufTlas->updateBuffer(range, instances); - } - - // make sure instances upload complete first - { - SMemoryBarrier memBarrier; - memBarrier.srcStageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS; - memBarrier.srcAccessMask = ACCESS_FLAGS::TRANSFER_WRITE_BIT; - memBarrier.dstStageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT; - memBarrier.dstAccessMask = ACCESS_FLAGS::ACCELERATION_STRUCTURE_WRITE_BIT; - cmdbufTlas->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .memBarriers = {&memBarrier, 1} }); - } - - auto tlasFlags = bitflag(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); - - IGPUTopLevelAccelerationStructure::DeviceBuildInfo tlasBuildInfo; - tlasBuildInfo.buildFlags = tlasFlags; - tlasBuildInfo.srcAS = nullptr; - tlasBuildInfo.dstAS = nullptr; - tlasBuildInfo.instanceData.buffer = instancesBuffer; - tlasBuildInfo.instanceData.offset = 0u; - tlasBuildInfo.scratch = {}; - - auto buildSizes = m_device->getAccelerationStructureBuildSizes(tlasFlags, false, instancesCount); - if (!buildSizes) - return logFail("Failed to get TLAS build sizes"); + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = geometryCount * sizeof(SGeomInfo); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = gQueue }, std::move(params), geomInfos).move_into(geometryInfoBuffer); + } + // acquire ownership + { + smart_refctd_ptr cmdbuf; { - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_ACCELERATION_STRUCTURE_STORAGE_BIT; - params.size = buildSizes.accelerationStructureSize; - smart_refctd_ptr asBuffer = createBuffer(params); - - IGPUTopLevelAccelerationStructure::SCreationParams tlasParams; - tlasParams.bufferRange.buffer = asBuffer; - tlasParams.bufferRange.offset = 0u; - tlasParams.bufferRange.size = buildSizes.accelerationStructureSize; - tlasParams.flags = IGPUTopLevelAccelerationStructure::SCreationParams::FLAGS::NONE; - gpuTlas = m_device->createTopLevelAccelerationStructure(std::move(tlasParams)); - if (!gpuTlas) - return logFail("Could not create TLAS"); + const auto gQFI = gQueue->getFamilyIndex(); + m_device->createCommandPool(gQFI,IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT)->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,{&cmdbuf,1}); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + { + core::vector> bufBarriers; + auto acquireBufferRange = [&bufBarriers](const uint8_t otherQueueFamilyIndex, const SBufferRange& bufferRange) + { + bufBarriers.push_back({ + .barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::NONE, + .srcAccessMask = ACCESS_FLAGS::NONE, + .dstStageMask = PIPELINE_STAGE_FLAGS::COMPUTE_SHADER_BIT, + // we don't care what exactly, uncomplex our code + .dstAccessMask = ACCESS_FLAGS::SHADER_READ_BITS + }, + .ownershipOp = IGPUCommandBuffer::SOwnershipTransferBarrier::OWNERSHIP_OP::ACQUIRE, + .otherQueueFamilyIndex = otherQueueFamilyIndex + }, + .range = bufferRange + }); + }; +#ifdef TEST_REBAR_FALLBACK + if (const auto otherQueueFamilyIndex=transfer.queue->getFamilyIndex(); gQFI!=otherQueueFamilyIndex) + for (const auto& buffer : reservation.getGPUObjects()) + { + const auto& buff = buffer.value; + if (buff) + acquireBufferRange(otherQueueFamilyIndex,{.offset=0,.size=buff->getSize(),.buffer=buff}); + } +#endif + if (const auto otherQueueFamilyIndex=compute.queue->getFamilyIndex(); gQFI!=otherQueueFamilyIndex) + { + auto acquireAS = [&acquireBufferRange,otherQueueFamilyIndex](const IGPUAccelerationStructure* as) + { + acquireBufferRange(otherQueueFamilyIndex,as->getCreationParams().bufferRange); + }; + for (const auto& blas : reservation.getGPUObjects()) + acquireAS(blas.value.get()); + acquireAS(reservation.getGPUObjects().front().value.get()); + } + if (!bufBarriers.empty()) + cmdbuf->pipelineBarrier(asset::E_DEPENDENCY_FLAGS::EDF_NONE,{.memBarriers={},.bufBarriers=bufBarriers}); + } + cmdbuf->end(); } - - smart_refctd_ptr scratchBuffer; + if (!cmdbuf->empty()) { - IGPUBuffer::SCreationParams params; - params.usage = bitflag(IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT) | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; - params.size = buildSizes.buildScratchSize; - scratchBuffer = createBuffer(params); + const IQueue::SSubmitInfo::SCommandBufferInfo cmdbufInfo = { + .cmdbuf = cmdbuf.get() + }; + const IQueue::SSubmitInfo::SSemaphoreInfo signal = { + .semaphore = compute.scratchSemaphore.semaphore, + .value = compute.getFutureScratchSemaphore().value, + .stageMask = asset::PIPELINE_STAGE_FLAGS::ALL_COMMANDS_BITS + }; + auto wait = signal; + wait.value--; + const IQueue::SSubmitInfo info = { + .waitSemaphores = {&wait,1}, // we already waited with the host on the AS build + .commandBuffers = {&cmdbufInfo,1}, + .signalSemaphores = {&signal,1} + }; + if (const auto retval=gQueue->submit({&info,1}); retval!=IQueue::RESULT::SUCCESS) + m_logger->log("Failed to transfer ownership with code %d!",system::ILogger::ELL_ERROR,retval); } - - tlasBuildInfo.dstAS = gpuTlas.get(); - tlasBuildInfo.scratch.buffer = scratchBuffer; - tlasBuildInfo.scratch.offset = 0u; - - IGPUTopLevelAccelerationStructure::BuildRangeInfo buildRangeInfo[1u]; - buildRangeInfo[0].instanceCount = instancesCount; - buildRangeInfo[0].instanceByteOffset = 0u; - IGPUTopLevelAccelerationStructure::BuildRangeInfo* pRangeInfos; - pRangeInfos = &buildRangeInfo[0]; - - if (!cmdbufTlas->buildAccelerationStructures({ &tlasBuildInfo, 1 }, pRangeInfos)) - return logFail("Failed to build TLAS"); } - - cmdbufTlas->endDebugMarker(); - cmdbufSubmitAndWait(cmdbufTlas, queue, 45); - +#undef TEST_REBAR_FALLBACK + #ifdef TRY_BUILD_FOR_NGFX { const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = { { @@ -1054,7 +959,7 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu #endif m_api->endCapture(); - return true; + return reservation.getGPUObjects().front().value; } @@ -1072,31 +977,14 @@ class RayQueryGeometryApp final : public examples::SimpleWindowedApplication, pu Camera camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); video::CDumbPresentationOracle oracle; - std::array objectsGpu; - - std::array, OT_COUNT> gpuBlas; - smart_refctd_ptr gpuTlas; - smart_refctd_ptr instancesBuffer; - smart_refctd_ptr geometryInfoBuffer; smart_refctd_ptr outHDRImage; + core::vector> m_gpuPolygons; + smart_refctd_ptr m_gpuTlas; smart_refctd_ptr renderPipeline; smart_refctd_ptr renderDs; - smart_refctd_ptr renderPool; - - uint16_t gcIndex = {}; - void mouseProcess(const nbl::ui::IMouseEventChannel::range_t& events) - { - for (auto eventIt = events.begin(); eventIt != events.end(); eventIt++) - { - auto ev = *eventIt; - - if (ev.type == nbl::ui::SMouseEvent::EET_SCROLL) - gcIndex = std::clamp(int16_t(gcIndex) + int16_t(core::sign(ev.scrollEvent.verticalScroll)), int64_t(0), int64_t(OT_COUNT - (uint8_t)1u)); - } - } }; NBL_MAIN_FUNC(RayQueryGeometryApp) \ No newline at end of file diff --git a/68_JpegLoading/main.cpp b/68_JpegLoading/main.cpp index 5ef9b637d..663b40759 100644 --- a/68_JpegLoading/main.cpp +++ b/68_JpegLoading/main.cpp @@ -1,22 +1,26 @@ // Copyright (C) 2018-2024 - DevSH GrapMonoAssetManagerAndBuiltinResourceApplicationhics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" + + +#include "nbl/examples/examples.hpp" #include #include "nlohmann/json.hpp" #include "argparse/argparse.hpp" + using json = nlohmann::json; using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; class ThreadPool { @@ -76,11 +80,11 @@ using task_t = std::function; std::atomic m_shouldStop = false; }; -class JpegLoaderApp final : public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class JpegLoaderApp final : public BuiltinResourcesApplication { using clock_t = std::chrono::steady_clock; using clock_resolution_t = std::chrono::milliseconds; - using base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using base_t = BuiltinResourcesApplication; public: using base_t::base_t; diff --git a/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl b/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl index 2d329ac85..64e94f262 100644 --- a/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/advectParticles.comp.hlsl @@ -26,6 +26,7 @@ using namespace nbl::hlsl; // TODO: delta time push constant? (but then for CI need a commandline `-fixed-timestep=MS` and `-frames=N` option too) [numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { uint32_t pid = ID.x; diff --git a/70_FLIPFluids/app_resources/compute/applyBodyForces.comp.hlsl b/70_FLIPFluids/app_resources/compute/applyBodyForces.comp.hlsl index 8ffc5e821..b2c1e0b3f 100644 --- a/70_FLIPFluids/app_resources/compute/applyBodyForces.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/applyBodyForces.comp.hlsl @@ -14,6 +14,7 @@ cbuffer GridData // TODO: can this kernel be fused with any preceeding/succeeding it? [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { // only gravity for now diff --git a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl index 43a57ed38..e53c91d2d 100644 --- a/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/diffusion.comp.hlsl @@ -34,6 +34,7 @@ groupshared uint16_t3 sAxisCellMat[14][14][14]; // TODO: `uint16_t` per axis is groupshared float16_t3 sDiffusion[14][14][14]; [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void setAxisCellMaterial(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl b/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl index b66db1ca2..4c4a76690 100644 --- a/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/genParticleVertices.comp.hlsl @@ -57,6 +57,7 @@ static const float2 quadUVs[4] = { using namespace nbl::hlsl; [numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { uint32_t pid = ID.x; diff --git a/70_FLIPFluids/app_resources/compute/particlesInit.comp.hlsl b/70_FLIPFluids/app_resources/compute/particlesInit.comp.hlsl index 173929b10..27bf4366f 100644 --- a/70_FLIPFluids/app_resources/compute/particlesInit.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/particlesInit.comp.hlsl @@ -17,6 +17,7 @@ cbuffer GridData }; [numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { uint32_t pid = ID.x; diff --git a/70_FLIPFluids/app_resources/compute/prepareCellUpdate.comp.hlsl b/70_FLIPFluids/app_resources/compute/prepareCellUpdate.comp.hlsl index fe82fe946..157da5bb8 100644 --- a/70_FLIPFluids/app_resources/compute/prepareCellUpdate.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/prepareCellUpdate.comp.hlsl @@ -42,6 +42,7 @@ float getWeight(float3 pPos, float3 cPos, float invSpacing) } [numthreads(WorkgroupSize, 1, 1)] +[shader("compute")] void main(uint32_t3 ID : SV_DispatchThreadID) { uint pid = ID.x; diff --git a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl index 668b15c31..b5db995c5 100644 --- a/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/pressureSolver.comp.hlsl @@ -36,6 +36,7 @@ groupshared float sDivergence[14][14][14]; groupshared float sPressure[14][14][14]; [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void calculateNegativeDivergence(uint32_t3 ID : SV_DispatchThreadID) { int3 cellIdx = ID; diff --git a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl index 9d7fabd52..62ddfd822 100644 --- a/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl +++ b/70_FLIPFluids/app_resources/compute/updateFluidCells.comp.hlsl @@ -40,6 +40,7 @@ void updateFluidCells(uint32_t3 ID : SV_DispatchThreadID) } [numthreads(WorkgroupGridDim, WorkgroupGridDim, WorkgroupGridDim)] +[shader("compute")] void updateNeighborFluidCells(uint32_t3 ID : SV_DispatchThreadID) { int3 cIdx = ID; diff --git a/70_FLIPFluids/app_resources/fluidParticles.fragment.hlsl b/70_FLIPFluids/app_resources/fluidParticles.fragment.hlsl index e556ce8ed..cac1bfa4a 100644 --- a/70_FLIPFluids/app_resources/fluidParticles.fragment.hlsl +++ b/70_FLIPFluids/app_resources/fluidParticles.fragment.hlsl @@ -9,6 +9,7 @@ cbuffer CameraData // TODO: BDA instead of UBO, one less thing in DSLayout SMVPParams camParams; }; +[shader("pixel")] float4 main(PSInput input, out float depthTest : SV_DEPTHGREATEREQUAL) : SV_TARGET { float3 N; diff --git a/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl b/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl index 4708083c6..89d37eb6f 100644 --- a/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl +++ b/70_FLIPFluids/app_resources/fluidParticles.vertex.hlsl @@ -14,6 +14,7 @@ struct SPushConstants #include "nbl/builtin/hlsl/bda/__ptr.hlsl" using namespace nbl::hlsl; +[shader("vertex")] PSInput main(uint vertexID : SV_VertexID) { PSInput output; diff --git a/70_FLIPFluids/main.cpp b/70_FLIPFluids/main.cpp index 93e753b68..899d00ba4 100644 --- a/70_FLIPFluids/main.cpp +++ b/70_FLIPFluids/main.cpp @@ -1,28 +1,27 @@ -#include +// Copyright (C) 2024-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h -#include "nbl/application_templates/MonoAssetManagerAndBuiltinResourceApplication.hpp" -#include "SimpleWindowedApplication.hpp" -#include "InputSystem.hpp" -#include "CCamera.hpp" -#include "glm/glm/glm.hpp" -#include -#include +#include "nbl/examples/examples.hpp" +// TODO: why is it not in nabla.h ? +#include "nbl/asset/metadata/CHLSLMetadata.h" -using namespace nbl::hlsl; using namespace nbl; -using namespace core; -using namespace hlsl; -using namespace system; -using namespace asset; -using namespace ui; -using namespace video; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::examples; #include "app_resources/common.hlsl" #include "app_resources/gridUtils.hlsl" #include "app_resources/render_common.hlsl" #include "app_resources/descriptor_bindings.hlsl" + enum SimPresets { CENTER_DROP, @@ -165,10 +164,10 @@ class CEventCallback : public ISimpleManagedSurface::ICallback nbl::system::logger_opt_smart_ptr m_logger = nullptr; }; -class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public application_templates::MonoAssetManagerAndBuiltinResourceApplication +class FLIPFluidsApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication { - using device_base_t = examples::SimpleWindowedApplication; - using asset_base_t = application_templates::MonoAssetManagerAndBuiltinResourceApplication; + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; using clock_t = std::chrono::steady_clock; constexpr static inline uint32_t WIN_WIDTH = 1280, WIN_HEIGHT = 720; @@ -1401,7 +1400,7 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a numParticles = m_gridData.particleInitSize.x * m_gridData.particleInitSize.y * m_gridData.particleInitSize.z * particlesPerCell; } - smart_refctd_ptr compileShader(const std::string& filePath, const std::string& entryPoint = "main") + smart_refctd_ptr compileShader(const std::string& filePath, const std::string& entryPoint = "main") { IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); @@ -1415,17 +1414,19 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); + const auto hlslMetadata = static_cast(bundle.getMetadata()); + const auto shaderStage = hlslMetadata->shaderStages->front(); - smart_refctd_ptr shader = shaderSrc; + smart_refctd_ptr shader = shaderSrc; if (entryPoint != "main") { auto compiler = make_smart_refctd_ptr(smart_refctd_ptr(m_system)); CHLSLCompiler::SOptions options = {}; - options.stage = shaderSrc->getStage(); + options.stage = shaderStage; if (!(options.stage == IShader::E_SHADER_STAGE::ESS_COMPUTE || options.stage == IShader::E_SHADER_STAGE::ESS_FRAGMENT)) options.stage = IShader::E_SHADER_STAGE::ESS_VERTEX; - options.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; + options.preprocessorOptions.targetSpirvVersion = m_device->getPhysicalDevice()->getLimits().spirvVersion; options.spirvOptimizer = nullptr; #ifndef _NBL_DEBUG ISPIRVOptimizer::E_OPTIMIZER_PASS optPasses = ISPIRVOptimizer::EOP_STRIP_DEBUG_INFO; @@ -1443,7 +1444,7 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a shader = compiler->compileToSPIRV((const char*)shaderSrc->getContent()->getPointer(), options); } - return m_device->createShader(shader.get()); + return m_device->compileShader({ shader.get() }); } // TODO: there's a method in IUtilities for this @@ -1562,7 +1563,7 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a // init shaders and pipeline - auto compileShader = [&](const std::string& filePath, IShader::E_SHADER_STAGE stage) -> smart_refctd_ptr + auto compileShader = [&](const std::string& filePath) -> smart_refctd_ptr { IAssetLoader::SAssetLoadParams lparams = {}; lparams.logger = m_logger.get(); @@ -1576,15 +1577,14 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a const auto assets = bundle.getContents(); assert(assets.size() == 1); - smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); - shaderSrc->setShaderStage(stage); + smart_refctd_ptr shaderSrc = IAsset::castDown(assets[0]); if (!shaderSrc) return nullptr; - return m_device->createShader(shaderSrc.get()); + return m_device->compileShader({ shaderSrc.get() }); }; - auto vs = compileShader("app_resources/fluidParticles.vertex.hlsl", IShader::E_SHADER_STAGE::ESS_VERTEX); - auto fs = compileShader("app_resources/fluidParticles.fragment.hlsl", IShader::E_SHADER_STAGE::ESS_FRAGMENT); + auto vs = compileShader("app_resources/fluidParticles.vertex.hlsl"); + auto fs = compileShader("app_resources/fluidParticles.fragment.hlsl"); smart_refctd_ptr descriptorSetLayout1; { @@ -1629,11 +1629,6 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a blendParams.blendParams[0u].colorWriteMask = (1u << 0u) | (1u << 1u) | (1u << 2u) | (1u << 3u); { - IGPUShader::SSpecInfo specInfo[3] = { - {.shader = vs.get()}, - {.shader = fs.get()}, - }; - const asset::SPushConstantRange pcRange = { .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX, .offset = 0, .size = sizeof(uint64_t) }; const auto pipelineLayout = m_device->createPipelineLayout({ &pcRange , 1 }, nullptr, smart_refctd_ptr(descriptorSetLayout1), nullptr, nullptr); @@ -1643,7 +1638,8 @@ class FLIPFluidsApp final : public examples::SimpleWindowedApplication, public a IGPUGraphicsPipeline::SCreationParams params[1] = {}; params[0].layout = pipelineLayout.get(); - params[0].shaders = specInfo; + params[0].vertexShader = { .shader = vs.get(), .entryPoint = "main", }; + params[0].fragmentShader = { .shader = fs.get(), .entryPoint = "main", }; params[0].cached = { .vertexInput = { }, diff --git a/71_RayTracingPipeline/CMakeLists.txt b/71_RayTracingPipeline/CMakeLists.txt new file mode 100644 index 000000000..07b0fd396 --- /dev/null +++ b/71_RayTracingPipeline/CMakeLists.txt @@ -0,0 +1,37 @@ +include(common RESULT_VARIABLE RES) +if(NOT RES) + message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") +endif() + +if(NBL_BUILD_IMGUI) + set(NBL_INCLUDE_SERACH_DIRECTORIES + "${CMAKE_CURRENT_SOURCE_DIR}/include" + ) + + list(APPEND NBL_LIBRARIES + imtestengine + "${NBL_EXT_IMGUI_UI_LIB}" + ) + + nbl_create_executable_project("" "" "${NBL_INCLUDE_SERACH_DIRECTORIES}" "${NBL_LIBRARIES}" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + set(_BR_TARGET_ ${EXECUTABLE_NAME}_builtinResourceData) + set(RESOURCE_DIR "app_resources") + + get_filename_component(_SEARCH_DIRECTORIES_ "${CMAKE_CURRENT_SOURCE_DIR}" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/src" ABSOLUTE) + get_filename_component(_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/include" ABSOLUTE) + + file(GLOB_RECURSE BUILTIN_RESOURCE_FILES RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}" "${CMAKE_CURRENT_SOURCE_DIR}/${RESOURCE_DIR}/*") + foreach(RES_FILE ${BUILTIN_RESOURCE_FILES}) + LIST_BUILTIN_RESOURCE(RESOURCES_TO_EMBED "${RES_FILE}") + endforeach() + + ADD_CUSTOM_BUILTIN_RESOURCES(${_BR_TARGET_} RESOURCES_TO_EMBED "${_SEARCH_DIRECTORIES_}" "${RESOURCE_DIR}" "nbl::this_example::builtin" "${_OUTPUT_DIRECTORY_HEADER_}" "${_OUTPUT_DIRECTORY_SOURCE_}") + + LINK_BUILTIN_RESOURCES_TO_TARGET(${EXECUTABLE_NAME} ${_BR_TARGET_}) + endif() +endif() + + diff --git a/71_RayTracingPipeline/Readme.md b/71_RayTracingPipeline/Readme.md new file mode 100644 index 000000000..4317be9c3 --- /dev/null +++ b/71_RayTracingPipeline/Readme.md @@ -0,0 +1,11 @@ +# Vulkan Ray Tracing Pipeline Demo +![finalResult](docs/Images/final_result.png) + +The scene is rendered using two ray. The first ray(primary ray) is shoot from the camera/generation shader and the second ray(occlusion ray) is shoot from the closest hit shader. +To test intersection shader, the acceleration structures consist of two types of geometries. The cubes are stored as triangle geometries while the spheres are stored as procedural geometries. +To test callable shader, we calculate lighting information of different type in its own callable shader + +## Shader Table Layout +![shaderBindingTable](docs/Images/shader_binding_table.png) + + diff --git a/71_RayTracingPipeline/app_resources/common.hlsl b/71_RayTracingPipeline/app_resources/common.hlsl new file mode 100644 index 000000000..fd719b239 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/common.hlsl @@ -0,0 +1,319 @@ +#ifndef RQG_COMMON_HLSL +#define RQG_COMMON_HLSL + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" +#include "nbl/builtin/hlsl/cpp_compat/basic.h" +#include "nbl/builtin/hlsl/random/pcg.hlsl" + +NBL_CONSTEXPR uint32_t WorkgroupSize = 16; +NBL_CONSTEXPR uint32_t MAX_UNORM_10 = 1023; +NBL_CONSTEXPR uint32_t MAX_UNORM_22 = 4194303; + +inline uint32_t packUnorm10(float32_t v) +{ + return trunc(v * float32_t(MAX_UNORM_10) + 0.5f); +} + +inline float32_t unpackUnorm10(uint32_t packed) +{ + return float32_t(packed & 0x3ff) * (1.0f / float32_t(MAX_UNORM_10)); +} + +inline uint32_t packUnorm22(float32_t v) +{ + const float maxValue = float32_t(MAX_UNORM_22); + return trunc(v * maxValue + 0.5f); +} + +inline float32_t unpackUnorm22(uint32_t packed) +{ + const float maxValue = float32_t(MAX_UNORM_22); + return float32_t(packed & 0x3fffff) * (1.0f / maxValue); +} + +inline uint32_t packUnorm3x10(float32_t3 v) +{ + return (packUnorm10(v.z) << 20 | (packUnorm10(v.y) << 10 | packUnorm10(v.x))); +} + +inline float32_t3 unpackUnorm3x10(uint32_t packed) +{ + return float32_t3(unpackUnorm10(packed), unpackUnorm10(packed >> 10), unpackUnorm10(packed >> 20)); +} + +struct Material +{ + float32_t3 ambient; + float32_t3 diffuse; + float32_t3 specular; + float32_t shininess; + float32_t alpha; + + bool isTransparent() NBL_CONST_MEMBER_FUNC + { + return alpha < 1.0; + } + + bool alphaTest(const float32_t xi) NBL_CONST_MEMBER_FUNC + { + return xi > alpha; + } +}; + +struct MaterialPacked +{ + uint32_t ambient; + uint32_t diffuse; + uint32_t specular; + uint32_t shininess: 22; + uint32_t alpha : 10; + + bool isTransparent() NBL_CONST_MEMBER_FUNC + { + return alpha != MAX_UNORM_10; + } + + bool alphaTest(const uint32_t xi) NBL_CONST_MEMBER_FUNC + { + return (xi>>22) > alpha; + } +}; + +struct SProceduralGeomInfo +{ + MaterialPacked material; + float32_t3 center; + float32_t radius; +}; + +enum NormalType : uint32_t +{ + NT_R8G8B8A8_SNORM, + NT_R32G32B32_SFLOAT, +}; + +struct STriangleGeomInfo +{ + MaterialPacked material; + uint64_t vertexBufferAddress; + uint64_t indexBufferAddress; + uint64_t normalBufferAddress; + + uint32_t normalType : 1; + uint32_t indexType : 1; // 16 bit, 32 bit + +}; + +enum E_GEOM_TYPE : uint16_t +{ + EGT_TRIANGLES, + EGT_PROCEDURAL, + EGT_COUNT +}; + +enum E_RAY_TYPE : uint16_t +{ + ERT_PRIMARY, // Ray shoot from camera + ERT_OCCLUSION, + ERT_COUNT +}; + +enum E_MISS_TYPE : uint16_t +{ + EMT_PRIMARY, + EMT_OCCLUSION, + EMT_COUNT +}; + +enum E_LIGHT_TYPE : uint16_t +{ + ELT_DIRECTIONAL, + ELT_POINT, + ELT_SPOT, + ELT_COUNT +}; + +struct Light +{ + float32_t3 direction; + float32_t3 position; + float32_t outerCutoff; + uint16_t type; + + +#ifndef __HLSL_VERSION + bool operator==(const Light&) const = default; +#endif + +}; + +static const float LightIntensity = 100.0f; + +struct SPushConstants +{ + uint64_t proceduralGeomInfoBuffer; + uint64_t triangleGeomInfoBuffer; + + float32_t3 camPos; + uint32_t frameCounter; + float32_t4x4 invMVP; + + Light light; +}; + + +struct RayLight +{ + float32_t3 inHitPosition; + float32_t outLightDistance; + float32_t3 outLightDir; + float32_t outIntensity; +}; + +#ifdef __HLSL_VERSION + +struct [raypayload] OcclusionPayload +{ + // TODO: will this break DXC? Tbh should come from push constant or some autoexposure feedback + // NBL_CONSTEXPR_STATIC_INLINE float32_t MinAttenuation = 1.f/1024.f; + + float32_t attenuation : read(caller,anyhit,miss) : write(caller,anyhit,miss); +}; + +struct MaterialId +{ + const static uint32_t PROCEDURAL_FLAG = (1 << 31); + const static uint32_t PROCEDURAL_MASK = ~PROCEDURAL_FLAG; + + uint32_t data; + + static MaterialId createProcedural(uint32_t index) + { + MaterialId id; + id.data = index | PROCEDURAL_FLAG; + return id; + } + + static MaterialId createTriangle(uint32_t index) + { + MaterialId id; + id.data = index; + return id; + } + + uint32_t getMaterialIndex() + { + return data & PROCEDURAL_MASK; + } + + bool isHitProceduralGeom() + { + return data & PROCEDURAL_FLAG; + } +}; + +struct [raypayload] PrimaryPayload +{ + using generator_t = nbl::hlsl::random::Pcg; + + float32_t3 worldNormal : read(caller) : write(closesthit); + float32_t rayDistance : read(caller) : write(closesthit,miss); + generator_t pcg : read(anyhit) : write(caller,anyhit); + MaterialId materialId : read(caller) : write(closesthit); + +}; + +struct ProceduralHitAttribute +{ + float32_t3 center; +}; + +enum ObjectType : uint32_t // matches c++ +{ + OT_CUBE = 0, + OT_SPHERE, + OT_CYLINDER, + OT_RECTANGLE, + OT_DISK, + OT_ARROW, + OT_CONE, + OT_ICOSPHERE, + + OT_COUNT +}; + +float32_t3 computeDiffuse(Material mat, float32_t3 light_dir, float32_t3 normal) +{ + float32_t dotNL = max(dot(normal, light_dir), 0.0); + float32_t3 c = mat.diffuse * dotNL; + return c; +} + +float32_t3 computeSpecular(Material mat, float32_t3 view_dir, + float32_t3 light_dir, float32_t3 normal) +{ + const float32_t kPi = 3.14159265; + const float32_t kShininess = max(mat.shininess, 4.0); + + // Specular + const float32_t kEnergyConservation = (2.0 + kShininess) / (2.0 * kPi); + float32_t3 V = normalize(-view_dir); + float32_t3 R = reflect(-light_dir, normal); + float32_t specular = kEnergyConservation * pow(max(dot(V, R), 0.0), kShininess); + + return float32_t3(mat.specular * specular); +} + +float3 unpackNormals3x10(uint32_t v) +{ + // host side changes float32_t3 to EF_A2B10G10R10_SNORM_PACK32 + // follows unpacking scheme from https://github.com/KhronosGroup/SPIRV-Cross/blob/main/reference/shaders-hlsl/frag/unorm-snorm-packing.frag + int signedValue = int(v); + int3 pn = int3(signedValue << 22, signedValue << 12, signedValue << 2) >> 22; + return clamp(float3(pn) / 511.0, -1.0, 1.0); +} + +#endif + +namespace nbl +{ +namespace hlsl +{ +namespace impl +{ + +template<> +struct static_cast_helper +{ + static inline Material cast(MaterialPacked packed) + { + Material material; + material.ambient = unpackUnorm3x10(packed.ambient); + material.diffuse = unpackUnorm3x10(packed.diffuse); + material.specular = unpackUnorm3x10(packed.specular); + material.shininess = unpackUnorm22(packed.shininess); + material.alpha = unpackUnorm10(packed.alpha); + return material; + } +}; + +template<> +struct static_cast_helper +{ + static inline MaterialPacked cast(Material material) + { + MaterialPacked packed; + packed.ambient = packUnorm3x10(material.ambient); + packed.diffuse = packUnorm3x10(material.diffuse); + packed.specular = packUnorm3x10(material.specular); + packed.shininess = packUnorm22(material.shininess); + packed.alpha = packUnorm10(material.alpha); + return packed; + } +}; + +} +} +} + +#endif // RQG_COMMON_HLSL diff --git a/71_RayTracingPipeline/app_resources/light_directional.rcall.hlsl b/71_RayTracingPipeline/app_resources/light_directional.rcall.hlsl new file mode 100644 index 000000000..1eb18be34 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/light_directional.rcall.hlsl @@ -0,0 +1,11 @@ +#include "common.hlsl" + +[[vk::push_constant]] SPushConstants pc; + +[shader("callable")] +void main(inout RayLight cLight) +{ + cLight.outLightDir = normalize(-pc.light.direction); + cLight.outIntensity = 1; + cLight.outLightDistance = 10000000; +} diff --git a/71_RayTracingPipeline/app_resources/light_point.rcall.hlsl b/71_RayTracingPipeline/app_resources/light_point.rcall.hlsl new file mode 100644 index 000000000..2265a98e7 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/light_point.rcall.hlsl @@ -0,0 +1,13 @@ +#include "common.hlsl" + +[[vk::push_constant]] SPushConstants pc; + +[shader("callable")] +void main(inout RayLight cLight) +{ + float32_t3 lDir = pc.light.position - cLight.inHitPosition; + float lightDistance = length(lDir); + cLight.outIntensity = LightIntensity / (lightDistance * lightDistance); + cLight.outLightDir = normalize(lDir); + cLight.outLightDistance = lightDistance; +} \ No newline at end of file diff --git a/71_RayTracingPipeline/app_resources/light_spot.rcall.hlsl b/71_RayTracingPipeline/app_resources/light_spot.rcall.hlsl new file mode 100644 index 000000000..f298e4643 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/light_spot.rcall.hlsl @@ -0,0 +1,16 @@ +#include "common.hlsl" + +[[vk::push_constant]] SPushConstants pc; + +[shader("callable")] +void main(inout RayLight cLight) +{ + float32_t3 lDir = pc.light.position - cLight.inHitPosition; + cLight.outLightDistance = length(lDir); + cLight.outIntensity = LightIntensity / (cLight.outLightDistance * cLight.outLightDistance); + cLight.outLightDir = normalize(lDir); + float theta = dot(cLight.outLightDir, normalize(-pc.light.direction)); + float epsilon = 1.f - pc.light.outerCutoff; + float spotIntensity = clamp((theta - pc.light.outerCutoff) / epsilon, 0.0, 1.0); + cLight.outIntensity *= spotIntensity; +} diff --git a/71_RayTracingPipeline/app_resources/present.frag.hlsl b/71_RayTracingPipeline/app_resources/present.frag.hlsl new file mode 100644 index 000000000..00ab6e31d --- /dev/null +++ b/71_RayTracingPipeline/app_resources/present.frag.hlsl @@ -0,0 +1,19 @@ +// Copyright (C) 2024-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h + +#pragma wave shader_stage(fragment) + +// vertex shader is provided by the fullScreenTriangle extension +#include +using namespace nbl::hlsl; +using namespace ext::FullScreenTriangle; + +// binding 0 set 0 +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] Texture2D texture; +[[vk::combinedImageSampler]] [[vk::binding(0, 0)]] SamplerState samplerState; + +[[vk::location(0)]] float32_t4 main(SVertexAttributes vxAttr) : SV_Target0 +{ + return float32_t4(texture.Sample(samplerState, vxAttr.uv).rgb, 1.0f); +} diff --git a/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl new file mode 100644 index 000000000..956ad5fe6 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace.rahit.hlsl @@ -0,0 +1,19 @@ +#include "common.hlsl" + +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[shader("anyhit")] +void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + const int instID = spirv::InstanceCustomIndexKHR; + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + + const uint32_t bitpattern = payload.pcg(); + // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279 + if (geom.material.alphaTest(bitpattern)) + IgnoreHit(); +} diff --git a/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl new file mode 100644 index 000000000..0a8bc5ec8 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace.rchit.hlsl @@ -0,0 +1,93 @@ +#include "common.hlsl" + +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" +#include "nbl/builtin/hlsl/bda/__ptr.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +float3 calculateNormals(int primID, STriangleGeomInfo geom, float2 bary) +{ + const uint indexType = geom.indexType; + const uint normalType = geom.normalType; + + const uint64_t vertexBufferAddress = geom.vertexBufferAddress; + const uint64_t indexBufferAddress = geom.indexBufferAddress; + const uint64_t normalBufferAddress = geom.normalBufferAddress; + + uint32_t3 indices; + if (indexBufferAddress == 0) + { + indices[0] = primID * 3; + indices[1] = indices[0] + 1; + indices[2] = indices[0] + 2; + } + else { + switch (indexType) + { + case 0: // EIT_16BIT + indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); + break; + case 1: // EIT_32BIT + indices = uint32_t3((nbl::hlsl::bda::__ptr::create(indexBufferAddress)+primID).deref().load()); + break; + } + } + + if (normalBufferAddress == 0) + { + float3 v0 = vk::RawBufferLoad(vertexBufferAddress + indices[0] * 12); + float3 v1 = vk::RawBufferLoad(vertexBufferAddress + indices[1] * 12); + float3 v2 = vk::RawBufferLoad(vertexBufferAddress + indices[2] * 12); + + return normalize(cross(v2 - v0, v1 - v0)); + } + + float3 n0, n1, n2; + switch (normalType) + { + case NT_R8G8B8A8_SNORM: + { + uint32_t v0 = vk::RawBufferLoad(normalBufferAddress + indices[0] * 4); + uint32_t v1 = vk::RawBufferLoad(normalBufferAddress + indices[1] * 4); + uint32_t v2 = vk::RawBufferLoad(normalBufferAddress + indices[2] * 4); + + n0 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v0).xyz); + n1 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v1).xyz); + n2 = normalize(nbl::hlsl::spirv::unpackSnorm4x8(v2).xyz); + } + break; + case NT_R32G32B32_SFLOAT: + { + n0 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[0] * 12)); + n1 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[1] * 12)); + n2 = normalize(vk::RawBufferLoad(normalBufferAddress + indices[2] * 12)); + } + break; + } + + float3 barycentrics = float3(0.0, bary); + barycentrics.x = 1.0 - barycentrics.y - barycentrics.z; + + return barycentrics.x * n0 + barycentrics.y * n1 + barycentrics.z * n2; +} + + +[shader("closesthit")] +void main(inout PrimaryPayload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + const int primID = spirv::PrimitiveId; + const int instanceCustomIndex = spirv::InstanceCustomIndexKHR; + const int geometryIndex = spirv::RayGeometryIndexKHR; + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + (instanceCustomIndex + geometryIndex) * sizeof(STriangleGeomInfo)); + const float32_t3 vertexNormal = calculateNormals(primID, geom, attribs.barycentrics); + const float32_t3 worldNormal = normalize(mul(vertexNormal, transpose(spirv::WorldToObjectKHR)).xyz); + + payload.materialId = MaterialId::createTriangle(instanceCustomIndex); + + payload.worldNormal = worldNormal; + payload.rayDistance = spirv::RayTmaxKHR; + +} \ No newline at end of file diff --git a/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl new file mode 100644 index 000000000..43d16f161 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace.rgen.hlsl @@ -0,0 +1,139 @@ +#include "common.hlsl" + +#include "nbl/builtin/hlsl/jit/device_capabilities.hlsl" +#include "nbl/builtin/hlsl/random/xoroshiro.hlsl" + +#include "nbl/builtin/hlsl/glsl_compat/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +static const int32_t s_sampleCount = 10; +static const float32_t3 s_clearColor = float32_t3(0.3, 0.3, 0.8); + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[[vk::binding(0, 0)]] RaytracingAccelerationStructure topLevelAS; + +[[vk::binding(1, 0)]] RWTexture2D colorImage; + +float32_t nextRandomUnorm(inout nbl::hlsl::Xoroshiro64StarStar rnd) +{ + return float32_t(rnd()) / float32_t(0xFFFFFFFF); +} + +[shader("raygeneration")] +void main() +{ + const uint32_t3 launchID = spirv::LaunchIdKHR; + const uint32_t3 launchSize = spirv::LaunchSizeKHR; + const uint32_t2 coords = launchID.xy; + + const uint32_t seed1 = nbl::hlsl::random::Pcg::create(pc.frameCounter)(); + const uint32_t seed2 = nbl::hlsl::random::Pcg::create(launchID.y * launchSize.x + launchID.x)(); + nbl::hlsl::Xoroshiro64StarStar rnd = nbl::hlsl::Xoroshiro64StarStar::construct(uint32_t2(seed1, seed2)); + + float32_t3 hitValues = float32_t3(0, 0, 0); + for (uint32_t sample_i = 0; sample_i < s_sampleCount; sample_i++) + { + const float32_t r1 = nextRandomUnorm(rnd); + const float32_t r2 = nextRandomUnorm(rnd); + const float32_t2 subpixelJitter = pc.frameCounter == 0 ? float32_t2(0.5f, 0.5f) : float32_t2(r1, r2); + + const float32_t2 pixelCenter = float32_t2(coords) + subpixelJitter; + const float32_t2 inUV = pixelCenter / float32_t2(launchSize.xy); + + const float32_t2 d = inUV * 2.0 - 1.0; + const float32_t4 tmp = mul(pc.invMVP, float32_t4(d.x, d.y, 1, 1)); + const float32_t3 targetPos = tmp.xyz / tmp.w; + + const float32_t3 camDirection = normalize(targetPos - pc.camPos); + + RayDesc rayDesc; + rayDesc.Origin = pc.camPos; + rayDesc.Direction = camDirection; + rayDesc.TMin = 0.01; + rayDesc.TMax = 10000.0; + + [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] + PrimaryPayload payload; + payload.pcg = PrimaryPayload::generator_t::create(rnd()); + spirv::traceRayKHR(topLevelAS, spv::RayFlagsMaskNone, 0xff, ERT_PRIMARY, 0, EMT_PRIMARY, rayDesc.Origin, rayDesc.TMin, rayDesc.Direction, rayDesc.TMax, payload); + // TraceRay(topLevelAS, RAY_FLAG_NONE, 0xff, ERT_PRIMARY, 0, EMT_PRIMARY, rayDesc, payload); + + const float32_t rayDistance = payload.rayDistance; + if (rayDistance < 0) + { + hitValues += s_clearColor; + continue; + } + + const float32_t3 worldPosition = pc.camPos + (camDirection * rayDistance); + + // make sure to call with least live state + [[vk::ext_storage_class(spv::StorageClassCallableDataKHR)]] + RayLight cLight; + cLight.inHitPosition = worldPosition; + spirv::executeCallable(pc.light.type, cLight); + + const float32_t3 worldNormal = payload.worldNormal; + + Material material; + MaterialId materialId = payload.materialId; + // we use negative index to indicate that this is a procedural geometry + if (materialId.isHitProceduralGeom()) + { + const MaterialPacked materialPacked = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(SProceduralGeomInfo)); + material = nbl::hlsl::_static_cast(materialPacked); + } + else + { + const MaterialPacked materialPacked = vk::RawBufferLoad(pc.triangleGeomInfoBuffer + materialId.getMaterialIndex() * sizeof(STriangleGeomInfo)); + material = nbl::hlsl::_static_cast(materialPacked); + } + + float32_t attenuation = 1; + + if (dot(worldNormal, cLight.outLightDir) > 0) + { + RayDesc rayDesc; + rayDesc.Origin = worldPosition; + rayDesc.Direction = cLight.outLightDir; + rayDesc.TMin = 0.01; + rayDesc.TMax = cLight.outLightDistance; + + [[vk::ext_storage_class(spv::StorageClassRayPayloadKHR)]] + OcclusionPayload occlusionPayload; + // negative means its a hit, the miss shader will flip it back around to positive + occlusionPayload.attenuation = -1.f; + // abuse of miss shader to mean "not hit shader" solves us having to call closest hit shaders + uint32_t shadowRayFlags = spv::RayFlagsTerminateOnFirstHitKHRMask | spv::RayFlagsSkipClosestHitShaderKHRMask; + spirv::traceRayKHR(topLevelAS, shadowRayFlags, 0xFF, ERT_OCCLUSION, 0, EMT_OCCLUSION, rayDesc.Origin, rayDesc.TMin, rayDesc.Direction, rayDesc.TMax, occlusionPayload); + + // uint32_t shadowRayFlags = RAY_FLAG_ACCEPT_FIRST_HIT_AND_END_SEARCH | RAY_FLAG_SKIP_CLOSEST_HIT_SHADER; + // TraceRay(topLevelAS, shadowRayFlags, 0xFF, ERT_OCCLUSION, 0, EMT_OCCLUSION, rayDesc, occlusionPayload); + + attenuation = occlusionPayload.attenuation; + if (occlusionPayload.attenuation > 1.f/1024.f) + { + const float32_t3 diffuse = computeDiffuse(material, cLight.outLightDir, worldNormal); + const float32_t3 specular = computeSpecular(material, camDirection, cLight.outLightDir, worldNormal); + hitValues += (cLight.outIntensity * attenuation * (diffuse + specular)); + } + } + hitValues += material.ambient; + } + + const float32_t3 hitValue = hitValues / s_sampleCount; + + if (pc.frameCounter > 0) + { + float32_t a = 1.0f / float32_t(pc.frameCounter + 1); + float32_t3 oldColor = colorImage[coords].xyz; + colorImage[coords] = float32_t4(lerp(oldColor, hitValue, a), 1.0f); + } + else + { + colorImage[coords] = float32_t4(hitValue, 1.0f); + } +} diff --git a/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl new file mode 100644 index 000000000..72f9beffd --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace.rint.hlsl @@ -0,0 +1,53 @@ +#include "common.hlsl" + +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +struct Ray +{ + float32_t3 origin; + float32_t3 direction; +}; + +// Ray-Sphere intersection +// http://viclw17.github.io/2018/07/16/raytracing-ray-sphere-intersection/ +float32_t hitSphere(SProceduralGeomInfo s, Ray r) +{ + float32_t3 oc = r.origin - s.center; + float32_t a = dot(r.direction, r.direction); + float32_t b = 2.0 * dot(oc, r.direction); + float32_t c = dot(oc, oc) - s.radius * s.radius; + float32_t discriminant = b * b - 4 * a * c; + + // return whatever, if the discriminant is negative, it will produce a NaN, and NaN will compare false + return (-b - sqrt(discriminant)) / (2.0 * a); +} + +[shader("intersection")] +void main() +{ + Ray ray; + ray.origin = spirv::WorldRayOriginKHR; + ray.direction = spirv::WorldRayDirectionKHR; + + const int primID = spirv::PrimitiveId; + + // Sphere data + SProceduralGeomInfo sphere = vk::RawBufferLoad(pc.proceduralGeomInfoBuffer + primID * sizeof(SProceduralGeomInfo)); + + const float32_t tHit = hitSphere(sphere, ray); + + [[vk::ext_storage_class(spv::StorageClassHitAttributeKHR)]] + ProceduralHitAttribute hitAttrib; + + // Report hit point + if (tHit > 0) + { + hitAttrib.center = sphere.center; + spirv::reportIntersectionKHR(tHit, 0); + } +} \ No newline at end of file diff --git a/71_RayTracingPipeline/app_resources/raytrace.rmiss.hlsl b/71_RayTracingPipeline/app_resources/raytrace.rmiss.hlsl new file mode 100644 index 000000000..5ccfed470 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace.rmiss.hlsl @@ -0,0 +1,7 @@ +#include "common.hlsl" + +[shader("miss")] +void main(inout PrimaryPayload payload) +{ + payload.rayDistance = -1; +} diff --git a/71_RayTracingPipeline/app_resources/raytrace_procedural.rchit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_procedural.rchit.hlsl new file mode 100644 index 000000000..6c2dc9903 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace_procedural.rchit.hlsl @@ -0,0 +1,20 @@ +#include "common.hlsl" + +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[shader("closesthit")] +void main(inout PrimaryPayload payload, in ProceduralHitAttribute attrib) +{ + const float32_t3 worldPosition = spirv::WorldRayOriginKHR + spirv::WorldRayDirectionKHR * spirv::RayTmaxKHR; + const float32_t3 worldNormal = normalize(worldPosition - attrib.center); + + payload.materialId = MaterialId::createProcedural(spirv::PrimitiveId); // we use negative value to indicate that this is procedural + + payload.worldNormal = worldNormal; + payload.rayDistance = spirv::RayTmaxKHR; + +} \ No newline at end of file diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl new file mode 100644 index 000000000..e41551512 --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rahit.hlsl @@ -0,0 +1,26 @@ +#include "common.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/raytracing.hlsl" +#include "nbl/builtin/hlsl/spirv_intrinsics/core.hlsl" + +using namespace nbl::hlsl; + +[[vk::push_constant]] SPushConstants pc; + +[shader("anyhit")] +void main(inout OcclusionPayload payload, in BuiltInTriangleIntersectionAttributes attribs) +{ + const int instID = spirv::InstanceCustomIndexKHR; + const STriangleGeomInfo geom = vk::RawBufferLoad < STriangleGeomInfo > (pc.triangleGeomInfoBuffer + instID * sizeof(STriangleGeomInfo)); + const Material material = nbl::hlsl::_static_cast(geom.material); + + const float attenuation = (1.f-material.alpha) * payload.attenuation; + // DXC cogegens weird things in the presence of termination instructions + payload.attenuation = attenuation; + + + // Cannot use spirv::ignoreIntersectionKHR and spirv::terminateRayKHR due to https://github.com/microsoft/DirectXShaderCompiler/issues/7279 + // arbitrary constant, whatever you want the smallest attenuation to be. Remember until miss, the attenuatio is negative + if (attenuation > -1.f/1024.f) + AcceptHitAndEndSearch(); + IgnoreHit(); +} diff --git a/71_RayTracingPipeline/app_resources/raytrace_shadow.rmiss.hlsl b/71_RayTracingPipeline/app_resources/raytrace_shadow.rmiss.hlsl new file mode 100644 index 000000000..441a1b42a --- /dev/null +++ b/71_RayTracingPipeline/app_resources/raytrace_shadow.rmiss.hlsl @@ -0,0 +1,8 @@ +#include "common.hlsl" + +[shader("miss")] +void main(inout OcclusionPayload payload) +{ + // make positive + payload.attenuation = -payload.attenuation; +} diff --git a/71_RayTracingPipeline/docs/Images/final_result.png b/71_RayTracingPipeline/docs/Images/final_result.png new file mode 100644 index 000000000..af1f2b9b8 Binary files /dev/null and b/71_RayTracingPipeline/docs/Images/final_result.png differ diff --git a/71_RayTracingPipeline/docs/Images/shader_binding_table.png b/71_RayTracingPipeline/docs/Images/shader_binding_table.png new file mode 100644 index 000000000..b146adeec Binary files /dev/null and b/71_RayTracingPipeline/docs/Images/shader_binding_table.png differ diff --git a/71_RayTracingPipeline/include/common.hpp b/71_RayTracingPipeline/include/common.hpp new file mode 100644 index 000000000..6727c879c --- /dev/null +++ b/71_RayTracingPipeline/include/common.hpp @@ -0,0 +1,35 @@ +#ifndef _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ +#define _NBL_THIS_EXAMPLE_COMMON_H_INCLUDED_ + +#include "nbl/examples/examples.hpp" + +using namespace nbl; +using namespace nbl::core; +using namespace nbl::hlsl; +using namespace nbl::system; +using namespace nbl::asset; +using namespace nbl::ui; +using namespace nbl::video; +using namespace nbl::application_templates; +using namespace nbl::examples; + +#include "nbl/ui/ICursorControl.h" +#include "nbl/ext/ImGui/ImGui.h" +#include "imgui/imgui_internal.h" + +#include "app_resources/common.hlsl" + +namespace nbl::scene +{ + +struct ReferenceObjectCpu +{ + core::smart_refctd_ptr data; + Material material; + core::matrix3x4SIMD transform; + +}; + +} + +#endif // __NBL_THIS_EXAMPLE_COMMON_H_INCLUDED__ diff --git a/71_RayTracingPipeline/main.cpp b/71_RayTracingPipeline/main.cpp new file mode 100644 index 000000000..59b610f4b --- /dev/null +++ b/71_RayTracingPipeline/main.cpp @@ -0,0 +1,1577 @@ +// Copyright (C) 2018-2024 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#include "common.hpp" + +#include "nbl/ext/FullScreenTriangle/FullScreenTriangle.h" +#include "nbl/builtin/hlsl/indirect_commands.hlsl" + +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + + +class RaytracingPipelineApp final : public SimpleWindowedApplication, public BuiltinResourcesApplication +{ + using device_base_t = SimpleWindowedApplication; + using asset_base_t = BuiltinResourcesApplication; + using clock_t = std::chrono::steady_clock; + + constexpr static inline uint32_t WIN_W = 1280, WIN_H = 720; + constexpr static inline uint32_t MaxFramesInFlight = 3u; + constexpr static inline uint8_t MaxUITextureCount = 1u; + constexpr static inline uint32_t NumberOfProceduralGeometries = 5; + + static constexpr const char* s_lightTypeNames[E_LIGHT_TYPE::ELT_COUNT] = { + "Directional", + "Point", + "Spot" + }; + + struct ShaderBindingTable + { + SBufferRange raygenGroupRange; + SBufferRange hitGroupsRange; + uint32_t hitGroupsStride; + SBufferRange missGroupsRange; + uint32_t missGroupsStride; + SBufferRange callableGroupsRange; + uint32_t callableGroupsStride; + }; + + +public: + inline RaytracingPipelineApp(const path& _localInputCWD, const path& _localOutputCWD, const path& _sharedInputCWD, const path& _sharedOutputCWD) + : IApplicationFramework(_localInputCWD, _localOutputCWD, _sharedInputCWD, _sharedOutputCWD) + { + } + + inline SPhysicalDeviceFeatures getRequiredDeviceFeatures() const override + { + auto retval = device_base_t::getRequiredDeviceFeatures(); + retval.rayTracingPipeline = true; + retval.accelerationStructure = true; + retval.rayQuery = true; + return retval; + } + + inline SPhysicalDeviceFeatures getPreferredDeviceFeatures() const override + { + auto retval = device_base_t::getPreferredDeviceFeatures(); + retval.accelerationStructureHostCommands = true; + return retval; + } + + inline core::vector getSurfaces() const override + { + if (!m_surface) + { + { + auto windowCallback = core::make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem), smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = core::make_smart_refctd_ptr(); + params.width = WIN_W; + params.height = WIN_H; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "RaytracingPipelineApp"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + // so that we can use the same queue for asset converter and rendering + inline core::vector getQueueRequirements() const override + { + auto reqs = device_base_t::getQueueRequirements(); + reqs.front().requiredFlags |= IQueue::FAMILY_FLAGS::COMPUTE_BIT; + return reqs; + } + + inline bool onAppInitialized(smart_refctd_ptr&& system) override + { + m_inputSystem = make_smart_refctd_ptr(logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + + if (!device_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + if (!asset_base_t::onAppInitialized(smart_refctd_ptr(system))) + return false; + + smart_refctd_ptr shaderReadCache = nullptr; + smart_refctd_ptr shaderWriteCache = core::make_smart_refctd_ptr(); + auto shaderCachePath = localOutputCWD / "main_pipeline_shader_cache.bin"; + + { + core::smart_refctd_ptr shaderReadCacheFile; + { + system::ISystem::future_t> future; + m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_READ); + if (future.wait()) + { + future.acquire().move_into(shaderReadCacheFile); + if (shaderReadCacheFile) + { + const size_t size = shaderReadCacheFile->getSize(); + if (size > 0ull) + { + std::vector contents(size); + system::IFile::success_t succ; + shaderReadCacheFile->read(succ, contents.data(), 0, size); + if (succ) + shaderReadCache = IShaderCompiler::CCache::deserialize(contents); + } + } + } + else + m_logger->log("Failed Openning Shader Cache File.", ILogger::ELL_ERROR); + } + + } + + // Load Custom Shader + auto loadCompileAndCreateShader = [&](const std::string& relPath) -> smart_refctd_ptr + { + IAssetLoader::SAssetLoadParams lp = {}; + lp.logger = m_logger.get(); + lp.workingDirectory = ""; // virtual root + auto assetBundle = m_assetMgr->getAsset(relPath, lp); + const auto assets = assetBundle.getContents(); + if (assets.empty()) + return nullptr; + + // lets go straight from ICPUSpecializedShader to IGPUSpecializedShader + auto sourceRaw = IAsset::castDown(assets[0]); + if (!sourceRaw) + return nullptr; + + return m_device->compileShader({ sourceRaw.get(), nullptr, shaderReadCache.get(), shaderWriteCache.get() }); + }; + + // load shaders + const auto raygenShader = loadCompileAndCreateShader("app_resources/raytrace.rgen.hlsl"); + const auto closestHitShader = loadCompileAndCreateShader("app_resources/raytrace.rchit.hlsl"); + const auto proceduralClosestHitShader = loadCompileAndCreateShader("app_resources/raytrace_procedural.rchit.hlsl"); + const auto intersectionHitShader = loadCompileAndCreateShader("app_resources/raytrace.rint.hlsl"); + const auto anyHitShaderColorPayload = loadCompileAndCreateShader("app_resources/raytrace.rahit.hlsl"); + const auto anyHitShaderShadowPayload = loadCompileAndCreateShader("app_resources/raytrace_shadow.rahit.hlsl"); + const auto missShader = loadCompileAndCreateShader("app_resources/raytrace.rmiss.hlsl"); + const auto missShadowShader = loadCompileAndCreateShader("app_resources/raytrace_shadow.rmiss.hlsl"); + const auto directionalLightCallShader = loadCompileAndCreateShader("app_resources/light_directional.rcall.hlsl"); + const auto pointLightCallShader = loadCompileAndCreateShader("app_resources/light_point.rcall.hlsl"); + const auto spotLightCallShader = loadCompileAndCreateShader("app_resources/light_spot.rcall.hlsl"); + const auto fragmentShader = loadCompileAndCreateShader("app_resources/present.frag.hlsl"); + + core::smart_refctd_ptr shaderWriteCacheFile; + { + system::ISystem::future_t> future; + m_system->deleteFile(shaderCachePath); // temp solution instead of trimming, to make sure we won't have corrupted json + m_system->createFile(future, shaderCachePath.c_str(), system::IFile::ECF_WRITE); + if (future.wait()) + { + future.acquire().move_into(shaderWriteCacheFile); + if (shaderWriteCacheFile) + { + auto serializedCache = shaderWriteCache->serialize(); + if (shaderWriteCacheFile) + { + system::IFile::success_t succ; + shaderWriteCacheFile->write(succ, serializedCache->getPointer(), 0, serializedCache->getSize()); + if (!succ) + m_logger->log("Failed Writing To Shader Cache File.", ILogger::ELL_ERROR); + } + } + else + m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); + } + else + m_logger->log("Failed Creating Shader Cache File.", ILogger::ELL_ERROR); + } + + m_semaphore = m_device->createSemaphore(m_realFrameIx); + if (!m_semaphore) + return logFail("Failed to Create a Semaphore!"); + + auto gQueue = getGraphicsQueue(); + + // Create renderpass and init surface + nbl::video::IGPURenderpass* renderpass; + { + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + const static IGPURenderpass::SCreationParams::SSubpassDependency dependencies[] = + { + { + .srcSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .dstSubpass = 0, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COPY_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::TRANSFER_WRITE_BIT, + .dstStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + { + .srcSubpass = 0, + .dstSubpass = IGPURenderpass::SCreationParams::SSubpassDependency::External, + .memoryBarrier = + { + .srcStageMask = asset::PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .srcAccessMask = asset::ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }, + IGPURenderpass::SCreationParams::DependenciesEnd + }; + + auto scResources = std::make_unique(m_device.get(), swapchainParams.surfaceFormat.format, dependencies); + renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + if (!m_surface || !m_surface->init(gQueue, std::move(scResources), swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + } + + auto pool = m_device->createCommandPool(gQueue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + + m_converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + + for (auto i = 0u; i < MaxFramesInFlight; i++) + { + if (!pool) + return logFail("Couldn't create Command Pool!"); + if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { m_cmdBufs.data() + i, 1 })) + return logFail("Couldn't create Command Buffer!"); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + + + // create output images + m_hdrImage = m_device->createImage({ + { + .type = IGPUImage::ET_2D, + .samples = ICPUImage::ESCF_1_BIT, + .format = EF_R16G16B16A16_SFLOAT, + .extent = {WIN_W, WIN_H, 1}, + .mipLevels = 1, + .arrayLayers = 1, + .flags = IImage::ECF_NONE, + .usage = bitflag(IImage::EUF_STORAGE_BIT) | IImage::EUF_TRANSFER_SRC_BIT | IImage::EUF_SAMPLED_BIT + } + }); + + if (!m_hdrImage || !m_device->allocate(m_hdrImage->getMemoryReqs(), m_hdrImage.get()).isValid()) + return logFail("Could not create HDR Image"); + + m_hdrImageView = m_device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::E_USAGE_FLAGS::EUF_STORAGE_BIT | IGPUImage::E_USAGE_FLAGS::EUF_SAMPLED_BIT, + .image = m_hdrImage, + .viewType = IGPUImageView::E_TYPE::ET_2D, + .format = asset::EF_R16G16B16A16_SFLOAT + }); + + + + // ray trace pipeline and descriptor set layout setup + { + const auto bindings = std::array{ + ICPUDescriptorSetLayout::SBinding{ + .binding = 0, + .type = asset::IDescriptor::E_TYPE::ET_ACCELERATION_STRUCTURE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, + }, + { + .binding = 1, + .type = asset::IDescriptor::E_TYPE::ET_STORAGE_IMAGE, + .createFlags = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = asset::IShader::E_SHADER_STAGE::ESS_RAYGEN, + .count = 1, + } + }; + auto cpuDescriptorSetLayout = core::make_smart_refctd_ptr(bindings); + + const SPushConstantRange pcRange = { + .stageFlags = IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, + .offset = 0u, + .size = sizeof(SPushConstants), + }; + const auto cpuPipelineLayout = core::make_smart_refctd_ptr(std::span({ pcRange }), std::move(cpuDescriptorSetLayout), nullptr, nullptr, nullptr); + + const auto pipeline = ICPURayTracingPipeline::create(cpuPipelineLayout.get()); + pipeline->getCachedCreationParams() = { + .flags = IGPURayTracingPipeline::SCreationParams::FLAGS::NO_NULL_INTERSECTION_SHADERS, + .maxRecursionDepth = 1, + .dynamicStackSize = true, + }; + + pipeline->getSpecInfos(ESS_RAYGEN)[0] = { + .shader = raygenShader, + .entryPoint = "main", + }; + + pipeline->getSpecInfoVector(ESS_MISS)->resize(EMT_COUNT); + const auto missGroups = pipeline->getSpecInfos(ESS_MISS); + missGroups[EMT_PRIMARY] = { .shader = missShader, .entryPoint = "main" }; + missGroups[EMT_OCCLUSION] = { .shader = missShadowShader, .entryPoint = "main" }; + + auto getHitGroupIndex = [](E_GEOM_TYPE geomType, E_RAY_TYPE rayType) + { + return geomType * ERT_COUNT + rayType; + }; + + const auto hitGroupCount = ERT_COUNT * EGT_COUNT; + pipeline->getSpecInfoVector(ESS_CLOSEST_HIT)->resize(hitGroupCount); + pipeline->getSpecInfoVector(ESS_ANY_HIT)->resize(hitGroupCount); + pipeline->getSpecInfoVector(ESS_INTERSECTION)->resize(hitGroupCount); + + const auto closestHitSpecs = pipeline->getSpecInfos(ESS_CLOSEST_HIT); + const auto anyHitSpecs = pipeline->getSpecInfos(ESS_ANY_HIT); + const auto intersectionSpecs = pipeline->getSpecInfos(ESS_INTERSECTION); + + closestHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = { .shader = closestHitShader, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_PRIMARY)] = {.shader = anyHitShaderColorPayload, .entryPoint = "main"}; + + anyHitSpecs[getHitGroupIndex(EGT_TRIANGLES, ERT_OCCLUSION)] = { .shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + + closestHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = proceduralClosestHitShader, .entryPoint = "main" }; + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = anyHitShaderColorPayload, .entryPoint = "main" }; + intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_PRIMARY)] = { .shader = intersectionHitShader, .entryPoint = "main" }; + + anyHitSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = {.shader = anyHitShaderShadowPayload, .entryPoint = "main" }; + intersectionSpecs[getHitGroupIndex(EGT_PROCEDURAL, ERT_OCCLUSION)] = { .shader = intersectionHitShader, .entryPoint = "main" }; + + pipeline->getSpecInfoVector(ESS_CALLABLE)->resize(ELT_COUNT); + const auto callableGroups = pipeline->getSpecInfos(ESS_CALLABLE); + callableGroups[ELT_DIRECTIONAL] = { .shader = directionalLightCallShader, .entryPoint = "main" }; + callableGroups[ELT_POINT] = { .shader = pointLightCallShader, .entryPoint = "main" }; + callableGroups[ELT_SPOT] = { .shader = spotLightCallShader, .entryPoint = "main" }; + + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + CAssetConverter::SInputs inputs = {}; + inputs.logger = m_logger.get(); + + const std::array cpuPipelines = { pipeline.get() }; + std::get>(inputs.assets) = cpuPipelines; + + CAssetConverter::SConvertParams params = {}; + params.utilities = m_utils.get(); + + auto reservation = converter->reserve(inputs); + auto future = reservation.convert(params); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + + // assign gpu objects to output + auto&& pipelines = reservation.getGPUObjects(); + m_rayTracingPipeline = pipelines[0].value; + const auto* gpuDsLayout = m_rayTracingPipeline->getLayout()->getDescriptorSetLayouts()[0]; + + const std::array dsLayoutPtrs = { gpuDsLayout }; + m_rayTracingDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT, std::span(dsLayoutPtrs.begin(), dsLayoutPtrs.end())); + m_rayTracingDs = m_rayTracingDsPool->createDescriptorSet(core::smart_refctd_ptr(gpuDsLayout)); + + calculateRayTracingStackSize(m_rayTracingPipeline); + + if (!createShaderBindingTable(m_rayTracingPipeline)) + return logFail("Could not create shader binding table"); + + } + + auto assetManager = make_smart_refctd_ptr(smart_refctd_ptr(system)); + + if (!createIndirectBuffer()) + return logFail("Could not create indirect buffer"); + + if (!createAccelerationStructuresFromGeometry()) + return logFail("Could not create acceleration structures from geometry creator"); + + ISampler::SParams samplerParams = { + .AnisotropicFilter = 0 + }; + auto defaultSampler = m_device->createSampler(samplerParams); + + { + const IGPUDescriptorSetLayout::SBinding bindings[] = { + { + .binding = 0u, + .type = nbl::asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER, + .createFlags = ICPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS::ECF_NONE, + .stageFlags = IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = 1u, + .immutableSamplers = &defaultSampler + } + }; + auto gpuPresentDescriptorSetLayout = m_device->createDescriptorSetLayout(bindings); + const video::IGPUDescriptorSetLayout* const layouts[] = { gpuPresentDescriptorSetLayout.get() }; + const uint32_t setCounts[] = { 1u }; + m_presentDsPool = m_device->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); + m_presentDs = m_presentDsPool->createDescriptorSet(gpuPresentDescriptorSetLayout); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + ext::FullScreenTriangle::ProtoPipeline fsTriProtoPPln(m_assetMgr.get(), m_device.get(), m_logger.get()); + if (!fsTriProtoPPln) + return logFail("Failed to create Full Screen Triangle protopipeline or load its vertex shader!"); + + const IGPUPipelineBase::SShaderSpecInfo fragSpec = { + .shader = fragmentShader.get(), + .entryPoint = "main", + }; + + auto presentLayout = m_device->createPipelineLayout( + {}, + core::smart_refctd_ptr(gpuPresentDescriptorSetLayout), + nullptr, + nullptr, + nullptr + ); + m_presentPipeline = fsTriProtoPPln.createPipeline(fragSpec, presentLayout.get(), scRes->getRenderpass()); + if (!m_presentPipeline) + return logFail("Could not create Graphics Pipeline!"); + } + + // write descriptors + IGPUDescriptorSet::SDescriptorInfo infos[3]; + infos[0].desc = m_gpuTlas; + + infos[1].desc = m_hdrImageView; + if (!infos[1].desc) + return logFail("Failed to create image view"); + infos[1].info.image.imageLayout = IImage::LAYOUT::GENERAL; + + infos[2].desc = m_hdrImageView; + infos[2].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + IGPUDescriptorSet::SWriteDescriptorSet writes[] = { + {.dstSet = m_rayTracingDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[0]}, + {.dstSet = m_rayTracingDs.get(), .binding = 1, .arrayElement = 0, .count = 1, .info = &infos[1]}, + {.dstSet = m_presentDs.get(), .binding = 0, .arrayElement = 0, .count = 1, .info = &infos[2] }, + }; + m_device->updateDescriptorSets(std::span(writes), {}); + + // gui descriptor setup + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + { + IGPUSampler::SParams params; + params.AnisotropicFilter = 1u; + params.TextureWrapU = ETC_REPEAT; + params.TextureWrapV = ETC_REPEAT; + params.TextureWrapW = ETC_REPEAT; + + m_ui.samplers.gui = m_device->createSampler(params); + m_ui.samplers.gui->setObjectDebugName("Nabla IMGUI UI Sampler"); + } + + std::array, 69u> immutableSamplers; + for (auto& it : immutableSamplers) + it = smart_refctd_ptr(m_ui.samplers.scene); + + immutableSamplers[nbl::ext::imgui::UI::FontAtlasTexId] = smart_refctd_ptr(m_ui.samplers.gui); + + nbl::ext::imgui::UI::SCreationParameters params; + + params.resources.texturesInfo = { .setIx = 0u, .bindingIx = 0u }; + params.resources.samplersInfo = { .setIx = 0u, .bindingIx = 1u }; + params.assetManager = m_assetMgr; + params.pipelineCache = nullptr; + params.pipelineLayout = nbl::ext::imgui::UI::createDefaultPipelineLayout(m_utils->getLogicalDevice(), params.resources.texturesInfo, params.resources.samplersInfo, MaxUITextureCount); + params.renderpass = smart_refctd_ptr(renderpass); + params.streamingBuffer = nullptr; + params.subpassIx = 0u; + params.transfer = getGraphicsQueue(); + params.utilities = m_utils; + { + m_ui.manager = ext::imgui::UI::create(std::move(params)); + + // note that we use default layout provided by our extension, but you are free to create your own by filling nbl::ext::imgui::UI::S_CREATION_PARAMETERS::resources + const auto* descriptorSetLayout = m_ui.manager->getPipeline()->getLayout()->getDescriptorSetLayout(0u); + const auto& params = m_ui.manager->getCreationParameters(); + + IDescriptorPool::SCreateInfo descriptorPoolInfo = {}; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLER)] = (uint32_t)nbl::ext::imgui::UI::DefaultSamplerIx::COUNT; + descriptorPoolInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_SAMPLED_IMAGE)] = MaxUITextureCount; + descriptorPoolInfo.maxSets = 1u; + descriptorPoolInfo.flags = IDescriptorPool::E_CREATE_FLAGS::ECF_UPDATE_AFTER_BIND_BIT; + + m_guiDescriptorSetPool = m_device->createDescriptorPool(std::move(descriptorPoolInfo)); + assert(m_guiDescriptorSetPool); + + m_guiDescriptorSetPool->createDescriptorSets(1u, &descriptorSetLayout, &m_ui.descriptorSet); + assert(m_ui.descriptorSet); + } + } + + m_ui.manager->registerListener( + [this]() -> void { + ImGuiIO& io = ImGui::GetIO(); + + m_camera.setProjectionMatrix([&]() + { + static matrix4SIMD projection; + + projection = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(m_cameraSetting.fov), + io.DisplaySize.x / io.DisplaySize.y, + m_cameraSetting.zNear, + m_cameraSetting.zFar); + + return projection; + }()); + + ImGui::SetNextWindowPos(ImVec2(1024, 100), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(256, 256), ImGuiCond_Appearing); + + // create a window and insert the inspector + ImGui::SetNextWindowPos(ImVec2(10, 10), ImGuiCond_Appearing); + ImGui::SetNextWindowSize(ImVec2(320, 340), ImGuiCond_Appearing); + ImGui::Begin("Controls"); + + ImGui::SameLine(); + + ImGui::Text("Camera"); + + ImGui::SliderFloat("Move speed", &m_cameraSetting.moveSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Rotate speed", &m_cameraSetting.rotateSpeed, 0.1f, 10.f); + ImGui::SliderFloat("Fov", &m_cameraSetting.fov, 20.f, 150.f); + ImGui::SliderFloat("zNear", &m_cameraSetting.zNear, 0.1f, 100.f); + ImGui::SliderFloat("zFar", &m_cameraSetting.zFar, 110.f, 10000.f); + Light m_oldLight = m_light; + int light_type = m_light.type; + ImGui::ListBox("LightType", &light_type, s_lightTypeNames, ELT_COUNT); + m_light.type = static_cast(light_type); + if (m_light.type == ELT_DIRECTIONAL) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + } + else if (m_light.type == ELT_POINT) + { + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + } + else if (m_light.type == ELT_SPOT) + { + ImGui::SliderFloat3("Light Direction", &m_light.direction.x, -1.f, 1.f); + ImGui::SliderFloat3("Light Position", &m_light.position.x, -20.f, 20.f); + + float32_t dOuterCutoff = hlsl::degrees(acos(m_light.outerCutoff)); + if (ImGui::SliderFloat("Light Outer Cutoff", &dOuterCutoff, 0.0f, 45.0f)) + { + m_light.outerCutoff = cos(hlsl::radians(dOuterCutoff)); + } + } + ImGui::Checkbox("Use Indirect Command", &m_useIndirectCommand); + if (m_light != m_oldLight) + { + m_frameAccumulationCounter = 0; + } + + ImGui::Text("X: %f Y: %f", io.MousePos.x, io.MousePos.y); + + ImGui::End(); + } + ); + + // Set Camera + { + core::vectorSIMDf cameraPosition(0, 5, -10); + matrix4SIMD proj = matrix4SIMD::buildProjectionMatrixPerspectiveFovRH( + core::radians(60.0f), + WIN_W / WIN_H, + 0.01f, + 500.0f + ); + m_camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), proj); + } + + m_winMgr->setWindowSize(m_window.get(), WIN_W, WIN_H); + m_surface->recreateSwapchain(); + m_winMgr->show(m_window.get()); + m_oracle.reportBeginFrameRecord(); + m_camera.mapKeysToWASD(); + + return true; + } + + bool updateGUIDescriptorSet() + { + // texture atlas, note we don't create info & write pair for the font sampler because UI extension's is immutable and baked into DS layout + static std::array descriptorInfo; + static IGPUDescriptorSet::SWriteDescriptorSet writes[MaxUITextureCount]; + + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].info.image.imageLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + descriptorInfo[nbl::ext::imgui::UI::FontAtlasTexId].desc = smart_refctd_ptr(m_ui.manager->getFontAtlasView()); + + for (uint32_t i = 0; i < descriptorInfo.size(); ++i) + { + writes[i].dstSet = m_ui.descriptorSet.get(); + writes[i].binding = 0u; + writes[i].arrayElement = i; + writes[i].count = 1u; + } + writes[nbl::ext::imgui::UI::FontAtlasTexId].info = descriptorInfo.data() + nbl::ext::imgui::UI::FontAtlasTexId; + + return m_device->updateDescriptorSets(writes, {}); + } + + inline void workLoopBody() override + { + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlight = core::min(MaxFramesInFlight, m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_realFrameIx >= framesInFlight) + { + const ISemaphore::SWaitInfo cbDonePending[] = + { + { + .semaphore = m_semaphore.get(), + .value = m_realFrameIx + 1 - framesInFlight + } + }; + if (m_device->blockForSemaphores(cbDonePending) != ISemaphore::WAIT_RESULT::SUCCESS) + return; + } + const auto resourceIx = m_realFrameIx % MaxFramesInFlight; + + m_api->startCapture(); + + update(); + + auto queue = getGraphicsQueue(); + auto cmdbuf = m_cmdBufs[resourceIx].get(); + + if (!keepRunning()) + return; + + cmdbuf->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); + cmdbuf->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + cmdbuf->beginDebugMarker("RaytracingPipelineApp Frame"); + + const auto viewMatrix = m_camera.getViewMatrix(); + const auto projectionMatrix = m_camera.getProjectionMatrix(); + const auto viewProjectionMatrix = m_camera.getConcatenatedMatrix(); + + core::matrix3x4SIMD modelMatrix; + modelMatrix.setTranslation(nbl::core::vectorSIMDf(0, 0, 0, 0)); + modelMatrix.setRotation(quaternion(0, 0, 0)); + + core::matrix4SIMD modelViewProjectionMatrix = core::concatenateBFollowedByA(viewProjectionMatrix, modelMatrix); + if (m_cachedModelViewProjectionMatrix != modelViewProjectionMatrix) + { + m_frameAccumulationCounter = 0; + m_cachedModelViewProjectionMatrix = modelViewProjectionMatrix; + } + core::matrix4SIMD invModelViewProjectionMatrix; + modelViewProjectionMatrix.getInverseTransform(invModelViewProjectionMatrix); + + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, // previous frame read from framgent shader + .srcAccessMask = ACCESS_FLAGS::SHADER_READ_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, + .dstAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS + } + }; + imageBarriers[0].image = m_hdrImage.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = m_frameAccumulationCounter == 0 ? IImage::LAYOUT::UNDEFINED : IImage::LAYOUT::READ_ONLY_OPTIMAL; + imageBarriers[0].newLayout = IImage::LAYOUT::GENERAL; + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + // Trace Rays Pass + { + SPushConstants pc; + pc.light = m_light; + pc.proceduralGeomInfoBuffer = m_proceduralGeomInfoBuffer->getDeviceAddress(); + pc.triangleGeomInfoBuffer = m_triangleGeomInfoBuffer->getDeviceAddress(); + pc.frameCounter = m_frameAccumulationCounter; + const core::vector3df camPos = m_camera.getPosition().getAsVector3df(); + pc.camPos = { camPos.X, camPos.Y, camPos.Z }; + memcpy(&pc.invMVP, invModelViewProjectionMatrix.pointer(), sizeof(pc.invMVP)); + + cmdbuf->bindRayTracingPipeline(m_rayTracingPipeline.get()); + cmdbuf->setRayTracingPipelineStackSize(m_rayTracingStackSize); + cmdbuf->pushConstants(m_rayTracingPipeline->getLayout(), IShader::E_SHADER_STAGE::ESS_ALL_RAY_TRACING, 0, sizeof(SPushConstants), &pc); + cmdbuf->bindDescriptorSets(EPBP_RAY_TRACING, m_rayTracingPipeline->getLayout(), 0, 1, &m_rayTracingDs.get()); + if (m_useIndirectCommand) + { + cmdbuf->traceRaysIndirect( + SBufferBinding{ + .offset = 0, + .buffer = m_indirectBuffer, + }); + } + else + { + cmdbuf->traceRays( + m_shaderBindingTable.raygenGroupRange, + m_shaderBindingTable.missGroupsRange, m_shaderBindingTable.missGroupsStride, + m_shaderBindingTable.hitGroupsRange, m_shaderBindingTable.hitGroupsStride, + m_shaderBindingTable.callableGroupsRange, m_shaderBindingTable.callableGroupsStride, + WIN_W, WIN_H, 1); + } + } + + // pipeline barrier + { + IGPUCommandBuffer::SPipelineBarrierDependencyInfo::image_barrier_t imageBarriers[1]; + imageBarriers[0].barrier = { + .dep = { + .srcStageMask = PIPELINE_STAGE_FLAGS::RAY_TRACING_SHADER_BIT, + .srcAccessMask = ACCESS_FLAGS::SHADER_WRITE_BITS, + .dstStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, + .dstAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT + } + }; + imageBarriers[0].image = m_hdrImage.get(); + imageBarriers[0].subresourceRange = { + .aspectMask = IImage::EAF_COLOR_BIT, + .baseMipLevel = 0u, + .levelCount = 1u, + .baseArrayLayer = 0u, + .layerCount = 1u + }; + imageBarriers[0].oldLayout = IImage::LAYOUT::GENERAL; + imageBarriers[0].newLayout = IImage::LAYOUT::READ_ONLY_OPTIMAL; + + cmdbuf->pipelineBarrier(E_DEPENDENCY_FLAGS::EDF_NONE, { .imgBarriers = imageBarriers }); + } + + { + asset::SViewport viewport; + { + viewport.minDepth = 1.f; + viewport.maxDepth = 0.f; + viewport.x = 0u; + viewport.y = 0u; + viewport.width = WIN_W; + viewport.height = WIN_H; + } + cmdbuf->setViewport(0u, 1u, &viewport); + + + VkRect2D defaultScisors[] = { {.offset = {(int32_t)viewport.x, (int32_t)viewport.y}, .extent = {(uint32_t)viewport.width, (uint32_t)viewport.height}} }; + cmdbuf->setScissor(defaultScisors); + + auto scRes = static_cast(m_surface->getSwapchainResources()); + const VkRect2D currentRenderArea = + { + .offset = {0,0}, + .extent = {m_window->getWidth(),m_window->getHeight()} + }; + const IGPUCommandBuffer::SClearColorValue clearColor = { .float32 = {0.f,0.f,0.f,1.f} }; + const IGPUCommandBuffer::SRenderpassBeginInfo info = + { + .framebuffer = scRes->getFramebuffer(m_currentImageAcquire.imageIndex), + .colorClearValues = &clearColor, + .depthStencilClearValues = nullptr, + .renderArea = currentRenderArea + }; + nbl::video::ISemaphore::SWaitInfo waitInfo = { .semaphore = m_semaphore.get(), .value = m_realFrameIx + 1u }; + + cmdbuf->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); + + cmdbuf->bindGraphicsPipeline(m_presentPipeline.get()); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, m_presentPipeline->getLayout(), 0, 1u, &m_presentDs.get()); + ext::FullScreenTriangle::recordDrawCall(cmdbuf); + + const auto uiParams = m_ui.manager->getCreationParameters(); + auto* uiPipeline = m_ui.manager->getPipeline(); + cmdbuf->bindGraphicsPipeline(uiPipeline); + cmdbuf->bindDescriptorSets(EPBP_GRAPHICS, uiPipeline->getLayout(), uiParams.resources.texturesInfo.setIx, 1u, &m_ui.descriptorSet.get()); + m_ui.manager->render(cmdbuf, waitInfo); + + cmdbuf->endRenderPass(); + + } + + cmdbuf->endDebugMarker(); + cmdbuf->end(); + + { + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = + { + { + .semaphore = m_semaphore.get(), + .value = ++m_realFrameIx, + .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS + } + }; + { + { + const IQueue::SSubmitInfo::SCommandBufferInfo commandBuffers[] = + { + {.cmdbuf = cmdbuf } + }; + + const IQueue::SSubmitInfo::SSemaphoreInfo acquired[] = + { + { + .semaphore = m_currentImageAcquire.semaphore, + .value = m_currentImageAcquire.acquireCount, + .stageMask = PIPELINE_STAGE_FLAGS::NONE + } + }; + const IQueue::SSubmitInfo infos[] = + { + { + .waitSemaphores = acquired, + .commandBuffers = commandBuffers, + .signalSemaphores = rendered + } + }; + + updateGUIDescriptorSet(); + + if (queue->submit(infos) != IQueue::RESULT::SUCCESS) + m_realFrameIx--; + } + } + + m_window->setCaption("[Nabla Engine] Ray Tracing Pipeline"); + m_surface->present(m_currentImageAcquire.imageIndex, rendered); + } + m_api->endCapture(); + m_frameAccumulationCounter++; + } + + inline void update() + { + m_camera.setMoveSpeed(m_cameraSetting.moveSpeed); + m_camera.setRotateSpeed(m_cameraSetting.rotateSpeed); + + static std::chrono::microseconds previousEventTimestamp{}; + + m_inputSystem->getDefaultMouse(&m_mouse); + m_inputSystem->getDefaultKeyboard(&m_keyboard); + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + m_oracle.reportEndFrameRecord(); + const auto timestamp = m_oracle.getNextPresentationTimeStamp(); + m_oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + struct + { + std::vector mouse{}; + std::vector keyboard{}; + } capturedEvents; + + m_camera.beginInputProcessing(nextPresentationTimestamp); + { + const auto& io = ImGui::GetIO(); + m_mouse.consumeEvents([&](const IMouseEventChannel::range_t& events) -> void + { + if (!io.WantCaptureMouse) + m_camera.mouseProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.mouse.emplace_back(e); + + } + }, m_logger.get()); + + m_keyboard.consumeEvents([&](const IKeyboardEventChannel::range_t& events) -> void + { + if (!io.WantCaptureKeyboard) + m_camera.keyboardProcess(events); // don't capture the events, only let camera handle them with its impl + + for (const auto& e : events) // here capture + { + if (e.timeStamp < previousEventTimestamp) + continue; + + previousEventTimestamp = e.timeStamp; + capturedEvents.keyboard.emplace_back(e); + } + }, m_logger.get()); + + } + m_camera.endInputProcessing(nextPresentationTimestamp); + + const core::SRange mouseEvents(capturedEvents.mouse.data(), capturedEvents.mouse.data() + capturedEvents.mouse.size()); + const core::SRange keyboardEvents(capturedEvents.keyboard.data(), capturedEvents.keyboard.data() + capturedEvents.keyboard.size()); + const auto cursorPosition = m_window->getCursorControl()->getPosition(); + const auto mousePosition = float32_t2(cursorPosition.x, cursorPosition.y) - float32_t2(m_window->getX(), m_window->getY()); + + const ext::imgui::UI::SUpdateParameters params = + { + .mousePosition = mousePosition, + .displaySize = { m_window->getWidth(), m_window->getHeight() }, + .mouseEvents = mouseEvents, + .keyboardEvents = keyboardEvents + }; + + m_ui.manager->update(params); + } + + inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + inline bool onAppTerminated() override + { + return device_base_t::onAppTerminated(); + } + +private: + uint32_t getWorkgroupCount(uint32_t dim, uint32_t size) + { + return (dim + size - 1) / size; + } + + bool createIndirectBuffer() + { + const auto getBufferRangeAddress = [](const SBufferRange& range) + { + return range.buffer->getDeviceAddress() + range.offset; + }; + const auto command = TraceRaysIndirectCommand_t{ + .raygenShaderRecordAddress = getBufferRangeAddress(m_shaderBindingTable.raygenGroupRange), + .raygenShaderRecordSize = m_shaderBindingTable.raygenGroupRange.size, + .missShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.missGroupsRange), + .missShaderBindingTableSize = m_shaderBindingTable.missGroupsRange.size, + .missShaderBindingTableStride = m_shaderBindingTable.missGroupsStride, + .hitShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.hitGroupsRange), + .hitShaderBindingTableSize = m_shaderBindingTable.hitGroupsRange.size, + .hitShaderBindingTableStride = m_shaderBindingTable.hitGroupsStride, + .callableShaderBindingTableAddress = getBufferRangeAddress(m_shaderBindingTable.callableGroupsRange), + .callableShaderBindingTableSize = m_shaderBindingTable.callableGroupsRange.size, + .callableShaderBindingTableStride = m_shaderBindingTable.callableGroupsStride, + .width = WIN_W, + .height = WIN_H, + .depth = 1, + }; + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INDIRECT_BUFFER_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = sizeof(TraceRaysIndirectCommand_t); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), &command).move_into(m_indirectBuffer); + return true; + } + + void calculateRayTracingStackSize(const smart_refctd_ptr& pipeline) + { + const auto raygenStackSize = pipeline->getRaygenStackSize(); + auto getMaxSize = [&](auto ranges, auto valProj) -> uint16_t + { + auto maxValue = 0; + for (const auto& val : ranges) + { + maxValue = std::max(maxValue, std::invoke(valProj, val)); + } + return maxValue; + }; + + const auto closestHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::closestHit); + const auto anyHitStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::anyHit); + const auto intersectionStackMax = getMaxSize(pipeline->getHitStackSizes(), &IGPURayTracingPipeline::SHitGroupStackSize::intersection); + const auto missStackMax = getMaxSize(pipeline->getMissStackSizes(), std::identity{}); + const auto callableStackMax = getMaxSize(pipeline->getCallableStackSizes(), std::identity{}); + auto firstDepthStackSizeMax = std::max(closestHitStackMax, missStackMax); + firstDepthStackSizeMax = std::max(firstDepthStackSizeMax, intersectionStackMax + anyHitStackMax); + m_rayTracingStackSize = raygenStackSize + std::max(firstDepthStackSizeMax, callableStackMax); + } + + bool createShaderBindingTable(const smart_refctd_ptr& pipeline) + { + const auto& limits = m_device->getPhysicalDevice()->getLimits(); + const auto handleSize = SPhysicalDeviceLimits::ShaderGroupHandleSize; + const auto handleSizeAligned = nbl::core::alignUp(handleSize, limits.shaderGroupHandleAlignment); + + auto& raygenRange = m_shaderBindingTable.raygenGroupRange; + + auto& hitRange = m_shaderBindingTable.hitGroupsRange; + const auto hitHandles = pipeline->getHitHandles(); + + auto& missRange = m_shaderBindingTable.missGroupsRange; + const auto missHandles = pipeline->getMissHandles(); + + auto& callableRange = m_shaderBindingTable.callableGroupsRange; + const auto callableHandles = pipeline->getCallableHandles(); + + raygenRange = { + .offset = 0, + .size = core::alignUp(handleSizeAligned, limits.shaderGroupBaseAlignment) + }; + + missRange = { + .offset = raygenRange.size, + .size = core::alignUp(missHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.missGroupsStride = handleSizeAligned; + + hitRange = { + .offset = missRange.offset + missRange.size, + .size = core::alignUp(hitHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.hitGroupsStride = handleSizeAligned; + + callableRange = { + .offset = hitRange.offset + hitRange.size, + .size = core::alignUp(callableHandles.size() * handleSizeAligned, limits.shaderGroupBaseAlignment), + }; + m_shaderBindingTable.callableGroupsStride = handleSizeAligned; + + const auto bufferSize = raygenRange.size + missRange.size + hitRange.size + callableRange.size; + + ICPUBuffer::SCreationParams cpuBufferParams; + cpuBufferParams.size = bufferSize; + auto cpuBuffer = ICPUBuffer::create(std::move(cpuBufferParams)); + uint8_t* pData = reinterpret_cast(cpuBuffer->getPointer()); + + // copy raygen region + memcpy(pData, &pipeline->getRaygen(), handleSize); + + // copy miss region + uint8_t* pMissData = pData + missRange.offset; + for (const auto& handle : missHandles) + { + memcpy(pMissData, &handle, handleSize); + pMissData += m_shaderBindingTable.missGroupsStride; + } + + // copy hit region + uint8_t* pHitData = pData + hitRange.offset; + for (const auto& handle : hitHandles) + { + memcpy(pHitData, &handle, handleSize); + pHitData += m_shaderBindingTable.hitGroupsStride; + } + + // copy callable region + uint8_t* pCallableData = pData + callableRange.offset; + for (const auto& handle : callableHandles) + { + memcpy(pCallableData, &handle, handleSize); + pCallableData += m_shaderBindingTable.callableGroupsStride; + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_SHADER_BINDING_TABLE_BIT; + params.size = bufferSize; + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = getGraphicsQueue() }, std::move(params), pData).move_into(raygenRange.buffer); + missRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + hitRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + callableRange.buffer = core::smart_refctd_ptr(raygenRange.buffer); + } + + return true; + } + + bool createAccelerationStructuresFromGeometry() + { + auto queue = getGraphicsQueue(); + // get geometries into ICPUBuffers + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); + if (!pool) + return logFail("Couldn't create Command Pool for geometry creation!"); + + const auto defaultMaterial = Material{ + .ambient = {0.2, 0.1, 0.1}, + .diffuse = {0.8, 0.3, 0.3}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 1.0f, + }; + + auto getTranslationMatrix = [](float32_t x, float32_t y, float32_t z) + { + core::matrix3x4SIMD transform; + transform.setTranslation(nbl::core::vectorSIMDf(x, y, z, 0)); + return transform; + }; + + core::matrix3x4SIMD planeTransform; + planeTransform.setRotation(quaternion::fromAngleAxis(core::radians(-90.0f), vector3df_SIMD{ 1, 0, 0 })); + + // triangles geometries + auto geometryCreator = make_smart_refctd_ptr(); + + const auto cpuObjects = std::array{ + scene::ReferenceObjectCpu { + .data = geometryCreator->createRectangle({10, 10}), + .material = defaultMaterial, + .transform = planeTransform, + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1, 1, 1}), + .material = defaultMaterial, + .transform = getTranslationMatrix(0, 0.5f, 0), + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1.5, 1.5, 1.5}), + .material = Material{ + .ambient = {0.1, 0.1, 0.2}, + .diffuse = {0.2, 0.2, 0.8}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 1.0f, + }, + .transform = getTranslationMatrix(-5.0f, 1.0f, 0), + }, + scene::ReferenceObjectCpu { + .data = geometryCreator->createCube({1.5, 1.5, 1.5}), + .material = Material{ + .ambient = {0.1, 0.2, 0.1}, + .diffuse = {0.2, 0.8, 0.2}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + .alpha = 0.2, + }, + .transform = getTranslationMatrix(5.0f, 1.0f, 0), + }, + }; + + // procedural geometries + using Aabb = IGPUBottomLevelAccelerationStructure::AABB_t; + + smart_refctd_ptr cpuProcBuffer; + { + ICPUBuffer::SCreationParams params; + params.size = NumberOfProceduralGeometries * sizeof(Aabb); + cpuProcBuffer = ICPUBuffer::create(std::move(params)); + } + + core::vector proceduralGeoms; + proceduralGeoms.reserve(NumberOfProceduralGeometries); + auto proceduralGeometries = reinterpret_cast(cpuProcBuffer->getPointer()); + for (int32_t i = 0; i < NumberOfProceduralGeometries; i++) + { + const auto middle_i = NumberOfProceduralGeometries / 2.0; + SProceduralGeomInfo sphere = { + .material = hlsl::_static_cast(Material{ + .ambient = {0.1, 0.05 * i, 0.1}, + .diffuse = {0.3, 0.2 * i, 0.3}, + .specular = {0.8, 0.8, 0.8}, + .shininess = 1.0f, + }), + .center = float32_t3((i - middle_i) * 4.0, 2, 5.0), + .radius = 1, + }; + + proceduralGeoms.push_back(sphere); + const auto sphereMin = sphere.center - sphere.radius; + const auto sphereMax = sphere.center + sphere.radius; + proceduralGeometries[i] = { + vector3d(sphereMin.x, sphereMin.y, sphereMin.z), + vector3d(sphereMax.x, sphereMax.y, sphereMax.z) + }; + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = proceduralGeoms.size() * sizeof(SProceduralGeomInfo); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), proceduralGeoms.data()).move_into(m_proceduralGeomInfoBuffer); + } + + // get ICPUBuffers into ICPUBLAS + // TODO use one BLAS and multiple triangles/aabbs in one + const auto blasCount = std::size(cpuObjects) + 1; + const auto proceduralBlasIdx = std::size(cpuObjects); + + std::array, std::size(cpuObjects)+1u> cpuBlasList; + for (uint32_t i = 0; i < blasCount; i++) + { + auto& blas = cpuBlasList[i]; + blas = make_smart_refctd_ptr(); + + if (i == proceduralBlasIdx) + { + auto aabbs = make_refctd_dynamic_array>>(1u); + auto primitiveCounts = make_refctd_dynamic_array>(1u); + + auto& aabb = aabbs->front(); + auto& primCount = primitiveCounts->front(); + + primCount = NumberOfProceduralGeometries; + aabb.data = { .offset = 0, .buffer = cpuProcBuffer }; + aabb.stride = sizeof(IGPUBottomLevelAccelerationStructure::AABB_t); + aabb.geometryFlags = IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; // only allow opaque for now + + blas->setGeometries(std::move(aabbs), std::move(primitiveCounts)); + } + else + { + auto triangles = make_refctd_dynamic_array>>(1u); + auto primitiveCounts = make_refctd_dynamic_array>(1u); + + auto& tri = triangles->front(); + + auto& primCount = primitiveCounts->front(); + primCount = cpuObjects[i].data->getPrimitiveCount(); + + tri = cpuObjects[i].data->exportForBLAS(); + tri.geometryFlags = cpuObjects[i].material.isTransparent() ? + IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::NO_DUPLICATE_ANY_HIT_INVOCATION_BIT : + IGPUBottomLevelAccelerationStructure::GEOMETRY_FLAGS::OPAQUE_BIT; + + blas->setGeometries(std::move(triangles), std::move(primitiveCounts)); + } + + auto blasFlags = bitflag(IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT) | IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::ALLOW_COMPACTION_BIT; + if (i == proceduralBlasIdx) + blasFlags |= IGPUBottomLevelAccelerationStructure::BUILD_FLAGS::GEOMETRY_TYPE_IS_AABB_BIT; + + blas->setBuildFlags(blasFlags); + blas->setContentHash(blas->computeContentHash()); + } + + auto geomInfoBuffer = ICPUBuffer::create({ std::size(cpuObjects) * sizeof(STriangleGeomInfo) }); + STriangleGeomInfo* geomInfos = reinterpret_cast(geomInfoBuffer->getPointer()); + + // get ICPUBLAS into ICPUTLAS + auto geomInstances = make_refctd_dynamic_array>(blasCount); + { + uint32_t i = 0; + for (auto instance = geomInstances->begin(); instance != geomInstances->end(); instance++, i++) + { + const auto isProceduralInstance = i == proceduralBlasIdx; + ICPUTopLevelAccelerationStructure::StaticInstance inst; + inst.base.blas = cpuBlasList[i]; + inst.base.flags = static_cast(IGPUTopLevelAccelerationStructure::INSTANCE_FLAGS::TRIANGLE_FACING_CULL_DISABLE_BIT); + inst.base.instanceCustomIndex = i; + inst.base.instanceShaderBindingTableRecordOffset = isProceduralInstance ? 2 : 0; + inst.base.mask = 0xFF; + inst.transform = isProceduralInstance ? matrix3x4SIMD() : cpuObjects[i].transform; + + instance->instance = inst; + } + } + + auto cpuTlas = make_smart_refctd_ptr(); + cpuTlas->setInstances(std::move(geomInstances)); + cpuTlas->setBuildFlags(IGPUTopLevelAccelerationStructure::BUILD_FLAGS::PREFER_FAST_TRACE_BIT); + + // convert with asset converter + smart_refctd_ptr converter = CAssetConverter::create({ .device = m_device.get(), .optimizer = {} }); + struct MyInputs : CAssetConverter::SInputs + { + // For the GPU Buffers to be directly writeable and so that we don't need a Transfer Queue submit at all + inline uint32_t constrainMemoryTypeBits(const size_t groupCopyID, const IAsset* canonicalAsset, const blake3_hash_t& contentHash, const IDeviceMemoryBacked* memoryBacked) const override + { + assert(memoryBacked); + return memoryBacked->getObjectType() != IDeviceMemoryBacked::EOT_BUFFER ? (~0u) : rebarMemoryTypes; + } + + uint32_t rebarMemoryTypes; + } inputs = {}; + inputs.logger = m_logger.get(); + inputs.rebarMemoryTypes = m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); + // the allocator needs to be overriden to hand out memory ranges which have already been mapped so that the ReBAR fast-path can kick in + // (multiple buffers can be bound to same memory, but memory can only be mapped once at one place, so Asset Converter can't do it) + struct MyAllocator final : public IDeviceMemoryAllocator + { + ILogicalDevice* getDeviceForAllocations() const override { return device; } + + SAllocation allocate(const SAllocateInfo& info) override + { + auto retval = device->allocate(info); + // map what is mappable by default so ReBAR checks succeed + if (retval.isValid() && retval.memory->isMappable()) + retval.memory->map({ .offset = 0,.length = info.size }); + return retval; + } + + ILogicalDevice* device; + } myalloc; + myalloc.device = m_device.get(); + inputs.allocator = &myalloc; + + std::array tmpTlas; + std::array tmpBuffers; + std::array tmpGeometries; + std::array, std::size(cpuObjects)> tmpGeometryPatches; + { + tmpTlas[0] = cpuTlas.get(); + tmpBuffers[0] = cpuProcBuffer.get(); + for (uint32_t i = 0; i < cpuObjects.size(); i++) + { + tmpGeometries[i] = cpuObjects[i].data.get(); + tmpGeometryPatches[i].indexBufferUsages= IGPUBuffer::E_USAGE_FLAGS::EUF_SHADER_DEVICE_ADDRESS_BIT; + } + + std::get>(inputs.assets) = tmpTlas; + std::get>(inputs.assets) = tmpBuffers; + std::get>(inputs.assets) = tmpGeometries; + std::get>(inputs.patches) = tmpGeometryPatches; + } + + auto reservation = converter->reserve(inputs); + { + auto prepass = [&](const auto & references) -> bool + { + auto objects = reservation.getGPUObjects(); + uint32_t counter = {}; + for (auto& object : objects) + { + auto gpu = object.value; + auto* reference = references[counter]; + + if (reference) + { + if (!gpu) + { + m_logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); + return false; + } + } + counter++; + } + return true; + }; + + prepass.template operator() < ICPUTopLevelAccelerationStructure > (tmpTlas); + prepass.template operator() < ICPUBuffer > (tmpBuffers); + prepass.template operator() < ICPUPolygonGeometry > (tmpGeometries); + } + + constexpr auto CompBufferCount = 2; + std::array, CompBufferCount> compBufs = {}; + std::array compBufInfos = {}; + { + auto pool = m_device->createCommandPool(queue->getFamilyIndex(), IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT | IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, compBufs); + compBufs.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + for (auto i = 0; i < CompBufferCount; i++) + compBufInfos[i].cmdbuf = compBufs[i].get(); + } + auto compSema = m_device->createSemaphore(0u); + SIntendedSubmitInfo compute = {}; + compute.queue = queue; + compute.scratchCommandBuffers = compBufInfos; + compute.scratchSemaphore = { + .semaphore = compSema.get(), + .value = 0u, + .stageMask = PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_BUILD_BIT | PIPELINE_STAGE_FLAGS::ACCELERATION_STRUCTURE_COPY_BIT + }; + // convert + { + smart_refctd_ptr scratchAlloc; + { + constexpr auto MaxAlignment = 256; + constexpr auto MinAllocationSize = 1024; + const auto scratchSize = core::alignUp(reservation.getMaxASBuildScratchSize(false), MaxAlignment); + + + IGPUBuffer::SCreationParams creationParams = {}; + creationParams.size = scratchSize; + creationParams.usage = IGPUBuffer::EUF_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT | IGPUBuffer::EUF_STORAGE_BUFFER_BIT; + auto scratchBuffer = m_device->createBuffer(std::move(creationParams)); + + auto reqs = scratchBuffer->getMemoryReqs(); + reqs.memoryTypeBits &= m_physicalDevice->getDirectVRAMAccessMemoryTypeBits(); + + auto allocation = m_device->allocate(reqs, scratchBuffer.get(), IDeviceMemoryAllocation::EMAF_DEVICE_ADDRESS_BIT); + allocation.memory->map({ .offset = 0,.length = reqs.size }); + + scratchAlloc = make_smart_refctd_ptr( + SBufferRange{0ull, scratchSize, std::move(scratchBuffer)}, + core::allocator(), MaxAlignment, MinAllocationSize + ); + } + + struct MyParams final : CAssetConverter::SConvertParams + { + inline uint32_t getFinalOwnerQueueFamily(const IGPUBuffer* buffer, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + inline uint32_t getFinalOwnerQueueFamily(const IGPUAccelerationStructure* image, const core::blake3_hash_t& createdFrom) override + { + return finalUser; + } + + uint8_t finalUser; + } params = {}; + params.utilities = m_utils.get(); + params.compute = &compute; + params.scratchForDeviceASBuild = scratchAlloc.get(); + params.finalUser = queue->getFamilyIndex(); + + auto future = reservation.convert(params); + if (future.copy() != IQueue::RESULT::SUCCESS) + { + m_logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); + return false; + } + // 2 submits, BLAS build, TLAS build, DO NOT ADD COMPACTIONS IN THIS EXAMPLE! + if (compute.getFutureScratchSemaphore().value>3) + m_logger->log("Overflow submitted on Compute Queue despite using ReBAR (no transfer submits or usage of staging buffer) and providing a AS Build Scratch Buffer of correctly queried max size!",system::ILogger::ELL_ERROR); + + // assign gpu objects to output + auto&& tlases = reservation.getGPUObjects(); + m_gpuTlas = tlases[0].value; + auto&& buffers = reservation.getGPUObjects(); + m_proceduralAabbBuffer = buffers[0].value; + + auto&& gpuPolygonGeometries = reservation.getGPUObjects(); + m_gpuPolygons.resize(gpuPolygonGeometries.size()); + + for (uint32_t i = 0; i < gpuPolygonGeometries.size(); i++) + { + const auto& cpuObject = cpuObjects[i]; + const auto& gpuPolygon = gpuPolygonGeometries[i].value; + const auto gpuTriangles = gpuPolygon->exportForBLAS(); + + const auto& vertexBufferBinding = gpuTriangles.vertexData[0]; + const uint64_t vertexBufferAddress = vertexBufferBinding.buffer->getDeviceAddress() + vertexBufferBinding.offset; + + const auto& normalView = gpuPolygon->getNormalView(); + const uint64_t normalBufferAddress = normalView ? normalView.src.buffer->getDeviceAddress() + normalView.src.offset : 0; + auto normalType = NT_R32G32B32_SFLOAT; + if (normalView && normalView.composed.format == EF_R8G8B8A8_SNORM) + normalType = NT_R8G8B8A8_SNORM; + + const auto& indexBufferBinding = gpuTriangles.indexData; + auto& geomInfo = geomInfos[i]; + geomInfo = { + .material = hlsl::_static_cast(cpuObject.material), + .vertexBufferAddress = vertexBufferAddress, + .indexBufferAddress = indexBufferBinding.buffer ? indexBufferBinding.buffer->getDeviceAddress() + indexBufferBinding.offset : vertexBufferAddress, + .normalBufferAddress = normalBufferAddress, + .normalType = normalType, + .indexType = gpuTriangles.indexType, + }; + + m_gpuPolygons[i] = gpuPolygon; + } + } + + { + IGPUBuffer::SCreationParams params; + params.usage = IGPUBuffer::EUF_STORAGE_BUFFER_BIT | IGPUBuffer::EUF_TRANSFER_DST_BIT | IGPUBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF | IGPUBuffer::EUF_SHADER_DEVICE_ADDRESS_BIT; + params.size = geomInfoBuffer->getSize(); + m_utils->createFilledDeviceLocalBufferOnDedMem(SIntendedSubmitInfo{ .queue = queue }, std::move(params), geomInfos).move_into(m_triangleGeomInfoBuffer); + } + + return true; + } + + smart_refctd_ptr m_window; + smart_refctd_ptr> m_surface; + smart_refctd_ptr m_semaphore; + uint64_t m_realFrameIx = 0; + uint32_t m_frameAccumulationCounter = 0; + std::array, MaxFramesInFlight> m_cmdBufs; + ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + + core::smart_refctd_ptr m_inputSystem; + InputSystem::ChannelReader m_mouse; + InputSystem::ChannelReader m_keyboard; + + struct CameraSetting + { + float fov = 60.f; + float zNear = 0.1f; + float zFar = 10000.f; + float moveSpeed = 1.f; + float rotateSpeed = 1.f; + float viewWidth = 10.f; + float camYAngle = 165.f / 180.f * 3.14159f; + float camXAngle = 32.f / 180.f * 3.14159f; + + } m_cameraSetting; + Camera m_camera = Camera(core::vectorSIMDf(0, 0, 0), core::vectorSIMDf(0, 0, 0), core::matrix4SIMD()); + + Light m_light = { + .direction = {-1.0f, -1.0f, -0.4f}, + .position = {10.0f, 15.0f, 8.0f}, + .outerCutoff = 0.866025404f, // {cos(radians(30.0f))}, + .type = ELT_DIRECTIONAL + }; + + video::CDumbPresentationOracle m_oracle; + + struct C_UI + { + nbl::core::smart_refctd_ptr manager; + + struct + { + core::smart_refctd_ptr gui, scene; + } samplers; + + core::smart_refctd_ptr descriptorSet; + } m_ui; + core::smart_refctd_ptr m_guiDescriptorSetPool; + + core::vector m_gpuIntersectionSpheres; + uint32_t m_intersectionHitGroupIdx; + + core::vector> m_gpuPolygons; + smart_refctd_ptr m_gpuTlas; + smart_refctd_ptr m_instanceBuffer; + + smart_refctd_ptr m_triangleGeomInfoBuffer; + smart_refctd_ptr m_proceduralGeomInfoBuffer; + smart_refctd_ptr m_proceduralAabbBuffer; + smart_refctd_ptr m_indirectBuffer; + + smart_refctd_ptr m_hdrImage; + smart_refctd_ptr m_hdrImageView; + + smart_refctd_ptr m_rayTracingDsPool; + smart_refctd_ptr m_rayTracingDs; + smart_refctd_ptr m_rayTracingPipeline; + uint64_t m_rayTracingStackSize; + ShaderBindingTable m_shaderBindingTable; + + smart_refctd_ptr m_presentDs; + smart_refctd_ptr m_presentDsPool; + smart_refctd_ptr m_presentPipeline; + + smart_refctd_ptr m_converter; + + + core::matrix4SIMD m_cachedModelViewProjectionMatrix; + bool m_useIndirectCommand = false; + +}; +NBL_MAIN_FUNC(RaytracingPipelineApp) \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt index fb03f95a4..7e1d613f5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,100 +2,116 @@ # This file is part of the "Nabla Engine". # For conditions of distribution and use, see copyright notice in nabla.h -function(NBL_HOOK_COMMON_API NBL_EXCLUDE_TARGETS_LIST) - if(NOT TARGET nblCommonAPI) - message(FATAL_ERROR "nblCommonAPI not defined!") - endif() - - NBL_GET_ALL_TARGETS(NBL_TARGETS) - - foreach(NBL_TARGET IN LISTS NBL_TARGETS) - # TODO: exclude builtin targets created by examples as well - doesn't impact anything at all now - if(NOT ${NBL_TARGET} IN_LIST NBL_EXCLUDE_TARGETS_LIST) - - target_include_directories(${NBL_TARGET} PRIVATE $) - target_link_libraries(${NBL_TARGET} PRIVATE nblCommonAPI) - endif() - endforeach() -endfunction() - -# PCH & CommonAPI library for Nabla framework examples -add_subdirectory(common EXCLUDE_FROM_ALL) - if(NBL_BUILD_EXAMPLES) + project(NablaExamples) + if(NBL_BUILD_ANDROID) nbl_android_create_media_storage_apk() endif() + #! Common api library & precompiled headers for Nabla framework examples + add_subdirectory(common EXCLUDE_FROM_ALL) + + #! use "EXCLUDE_FROM_ALL" to exclude an example from the NablaExamples project + #[[ + useful if we don't want the example to be tested by CI but still want + the example's project to be generated + + https://cmake.org/cmake/help/latest/prop_tgt/EXCLUDE_FROM_ALL.html + ]] + # showcase the use of `nbl::core`,`nbl::system` and `nbl::asset` - add_subdirectory(01_HelloCoreSystemAsset EXCLUDE_FROM_ALL) + add_subdirectory(01_HelloCoreSystemAsset) # showcase the use of `system::IApplicationFramework` and `nbl::video` - add_subdirectory(02_HelloCompute EXCLUDE_FROM_ALL) + add_subdirectory(02_HelloCompute) # showcase physical device selection, resource embedding and the use of identical headers in HLSL and C++ - add_subdirectory(03_DeviceSelectionAndSharedSources EXCLUDE_FROM_ALL) + add_subdirectory(03_DeviceSelectionAndSharedSources) # showcase the creation of windows and polling for input - add_subdirectory(04_HelloUI EXCLUDE_FROM_ALL) + add_subdirectory(04_HelloUI) # showcase the semi-advanced use of Nabla's Streaming Buffers and BDA - add_subdirectory(05_StreamingAndBufferDeviceAddressApp EXCLUDE_FROM_ALL) + add_subdirectory(05_StreamingAndBufferDeviceAddressApp) # showcase the use of a graphics queue - add_subdirectory(06_HelloGraphicsQueue EXCLUDE_FROM_ALL) + add_subdirectory(06_HelloGraphicsQueue) # showcase the set-up of multiple queues - add_subdirectory(07_StagingAndMultipleQueues EXCLUDE_FROM_ALL) + add_subdirectory(07_StagingAndMultipleQueues) # showcase the set-up of a swapchain and picking of a matching device - add_subdirectory(08_HelloSwapchain EXCLUDE_FROM_ALL) - add_subdirectory(09_GeometryCreator EXCLUDE_FROM_ALL) - # demonstrate the counting sort utility - add_subdirectory(10_CountingSort EXCLUDE_FROM_ALL) + add_subdirectory(08_HelloSwapchain) + add_subdirectory(09_GeometryCreator) + # demonstrate the counting sort utility + add_subdirectory(10_CountingSort) # showcase use of FFT for post-FX Bloom effect - add_subdirectory(11_FFT EXCLUDE_FROM_ALL) - + add_subdirectory(11_FFT) + # + add_subdirectory(12_MeshLoaders) + # + #add_subdirectory(13_MaterialCompiler EXCLUDE_FROM_ALL) # Waiting for a refactor - #add_subdirectory(27_PLYSTLDemo EXCLUDE_FROM_ALL) - #add_subdirectory(29_SpecializationConstants EXCLUDE_FROM_ALL) - #add_subdirectory(33_Draw3DLine EXCLUDE_FROM_ALL) + #add_subdirectory(27_PLYSTLDemo) + #add_subdirectory(33_Draw3DLine) # Unit Test Examples - add_subdirectory(20_AllocatorTest EXCLUDE_FROM_ALL) - add_subdirectory(21_LRUCacheUnitTest EXCLUDE_FROM_ALL) - add_subdirectory(22_CppCompat EXCLUDE_FROM_ALL) - add_subdirectory(23_ArithmeticUnitTest EXCLUDE_FROM_ALL) - add_subdirectory(24_ColorSpaceTest EXCLUDE_FROM_ALL) + add_subdirectory(20_AllocatorTest) + add_subdirectory(21_LRUCacheUnitTest) + add_subdirectory(22_CppCompat) + add_subdirectory(23_Arithmetic2UnitTest) + add_subdirectory(24_ColorSpaceTest) add_subdirectory(25_FilterTest EXCLUDE_FROM_ALL) - add_subdirectory(26_Blur EXCLUDE_FROM_ALL) - add_subdirectory(27_MPMCScheduler EXCLUDE_FROM_ALL) - add_subdirectory(28_FFTBloom EXCLUDE_FROM_ALL) - # add_subdirectory(36_CUDAInterop EXCLUDE_FROM_ALL) + add_subdirectory(26_Blur) + add_subdirectory(27_MPMCScheduler) + add_subdirectory(28_FFTBloom) + add_subdirectory(29_Arithmetic2Bench) + # add_subdirectory(36_CUDAInterop) # Showcase compute pathtracing - add_subdirectory(30_ComputeShaderPathTracer EXCLUDE_FROM_ALL) + add_subdirectory(30_ComputeShaderPathTracer) - add_subdirectory(38_EXRSplit EXCLUDE_FROM_ALL) + add_subdirectory(31_HLSLPathTracer EXCLUDE_FROM_ALL) + + add_subdirectory(38_EXRSplit) # if (NBL_BUILD_MITSUBA_LOADER AND NBL_BUILD_OPTIX) - # add_subdirectory(39_DenoiserTonemapper EXCLUDE_FROM_ALL) + # add_subdirectory(39_DenoiserTonemapper) # endif() - add_subdirectory(42_FragmentShaderPathTracer EXCLUDE_FROM_ALL) - #add_subdirectory(43_SumAndCDFFilters EXCLUDE_FROM_ALL) - #add_subdirectory(45_BRDFEvalTest EXCLUDE_FROM_ALL) - #add_subdirectory(46_SamplingValidation EXCLUDE_FROM_ALL) + #add_subdirectory(43_SumAndCDFFilters) add_subdirectory(47_DerivMapTest EXCLUDE_FROM_ALL) - add_subdirectory(53_ComputeShaders EXCLUDE_FROM_ALL) add_subdirectory(54_Transformations EXCLUDE_FROM_ALL) add_subdirectory(55_RGB18E7S3 EXCLUDE_FROM_ALL) - add_subdirectory(56_RayQuery EXCLUDE_FROM_ALL) - add_subdirectory(60_ClusteredRendering EXCLUDE_FROM_ALL) - add_subdirectory(61_UI EXCLUDE_FROM_ALL) - add_subdirectory(62_CAD EXCLUDE_FROM_ALL) + add_subdirectory(61_UI) + add_subdirectory(62_CAD EXCLUDE_FROM_ALL) # TODO: Erfan, Przemek, Francisco and co. need to resurrect this add_subdirectory(62_SchusslerTest EXCLUDE_FROM_ALL) - add_subdirectory(64_EmulatedFloatTest EXCLUDE_FROM_ALL) + add_subdirectory(64_EmulatedFloatTest) add_subdirectory(0_ImportanceSamplingEnvMaps EXCLUDE_FROM_ALL) #TODO: integrate back into 42 add_subdirectory(66_HLSLBxDFTests EXCLUDE_FROM_ALL) - add_subdirectory(67_RayQueryGeometry EXCLUDE_FROM_ALL) - add_subdirectory(68_JpegLoading EXCLUDE_FROM_ALL) - - add_subdirectory(70_FLIPFluids EXCLUDE_FROM_ALL) + add_subdirectory(67_RayQueryGeometry) + add_subdirectory(68_JpegLoading) + + add_subdirectory(70_FLIPFluids) + add_subdirectory(71_RayTracingPipeline) + + # add new examples *before* NBL_GET_ALL_TARGETS invocation, it gathers recursively all targets created so far in this subdirectory + NBL_GET_ALL_TARGETS(TARGETS) + + # we want to loop only over the examples so we exclude examples' interface libraries created in common subdirectory + list(REMOVE_ITEM TARGETS ${NBL_EXAMPLES_API_TARGET} ${NBL_EXAMPLES_API_LIBRARIES}) + + # we link common example api library and force examples to reuse its PCH + foreach(T IN LISTS TARGETS) + get_target_property(TYPE ${T} TYPE) + if(NOT ${TYPE} MATCHES INTERFACE) + target_link_libraries(${T} PUBLIC ${NBL_EXAMPLES_API_TARGET}) + target_include_directories(${T} PUBLIC $) + set_target_properties(${T} PROPERTIES DISABLE_PRECOMPILE_HEADERS OFF) + target_precompile_headers(${T} REUSE_FROM "${NBL_EXAMPLES_API_TARGET}") + + if(NBL_EMBED_BUILTIN_RESOURCES) + LINK_BUILTIN_RESOURCES_TO_TARGET(${T} NblExtExamplesAPIBuiltinsSource) + LINK_BUILTIN_RESOURCES_TO_TARGET(${T} NblExtExamplesAPIBuiltinsInclude) + LINK_BUILTIN_RESOURCES_TO_TARGET(${T} NblExtExamplesAPIBuiltinsBuild) + endif() + endif() + endforeach() - NBL_HOOK_COMMON_API("${NBL_COMMON_API_TARGETS}") -endif() + NBL_ADJUST_FOLDERS(examples) +endif() \ No newline at end of file diff --git a/common/CMakeLists.txt b/common/CMakeLists.txt index d9073f273..b3e57da6f 100644 --- a/common/CMakeLists.txt +++ b/common/CMakeLists.txt @@ -1,28 +1,109 @@ -########################################### -# TODO: the way it should work is following (remove the comment once all done!) -# - one top PCH which includes -> currently not done -# - sources used only within examples splitted into "common libraries" (optional -> with options to toggle if include them to build tree), each common library should reuse the above top PCH -# - examples_tests CMake loop over example targets and hook the interface library with NBL_HOOK_COMMON_API [done] -# - each common library should declare ONLY interface and never expose source definition into headers nor any 3rdparty stuff! -## +#! Examples API proxy library +#[[ + We create the Nabla Examples API as a static library extension, this + allows all examples to reuse a single precompiled header (PCH) + instead of generating their own -# interface libraries don't have build rules (except custom commands however it doesn't matter here) but properties -add_library(nblCommonAPI INTERFACE) -set(NBL_COMMON_API_INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include") -target_include_directories(nblCommonAPI INTERFACE "${NBL_COMMON_API_INCLUDE_DIRECTORY}") + The PCH includes Nabla.h + example common interface headers and takes + around 1 GB per configuration, so sharing it avoids significant disk space waste +]] -add_subdirectory(src EXCLUDE_FROM_ALL) +nbl_create_ext_library_project(ExamplesAPI "" "${CMAKE_CURRENT_SOURCE_DIR}/src/nbl/examples/pch.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/include" "" "") -########## <- -# TODO: disable this CommonPCH thing! + DEPRICATED! -# TODO: move asset converer into separate library +set_target_properties(${LIB_NAME} PROPERTIES DISABLE_PRECOMPILE_HEADERS OFF) +target_precompile_headers(${LIB_NAME} PUBLIC "${CMAKE_CURRENT_SOURCE_DIR}/include/nbl/examples/PCH.hpp") -nbl_create_ext_library_project(CommonAPI "" "${CMAKE_CURRENT_SOURCE_DIR}/src/empty.cpp" "" "" "") -set(NBL_EXECUTABLE_COMMON_API_TARGET "${LIB_NAME}" CACHE INTERNAL "") +set(COMMON_INCLUDE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/include") -add_subdirectory(CommonPCH EXCLUDE_FROM_ALL) +function(INTERFACE_TO_BUILTINS TARGET) + #[[ + even though builtin target is static library its still valid to reuse + common PCH to boost its build speed to not preprocess entire Nabla again (**) + ]] + set_target_properties(${TARGET} PROPERTIES DISABLE_PRECOMPILE_HEADERS OFF) + target_precompile_headers(${TARGET} REUSE_FROM "${LIB_NAME}") -#target_precompile_headers("${NBL_EXECUTABLE_COMMON_API_TARGET}" REUSE_FROM "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") -########## <- + target_include_directories(${TARGET} PUBLIC "${COMMON_INCLUDE_DIRECTORY}") + target_link_libraries(${TARGET} INTERFACE ${LIB_NAME}) +endfunction() -set(NBL_COMMON_API_TARGETS nblCommonAPI ${NBL_COMMON_API_TARGETS} ${NBL_EXECUTABLE_COMMON_API_TARGET} PARENT_SCOPE) +function(REGISTER_COMMON_BUILTINS) + cmake_parse_arguments(EX "" "TARGET;BIND;NAMESPACE" "GLOB_RGX" ${ARGN}) + get_filename_component(MOUNT_POINT "${CMAKE_CURRENT_SOURCE_DIR}/${EX_BIND}" ABSOLUTE) + list(TRANSFORM EX_GLOB_RGX PREPEND "${MOUNT_POINT}/") + file(GLOB_RECURSE KEYS RELATIVE "${MOUNT_POINT}" CONFIGURE_DEPENDS ${EX_GLOB_RGX}) + + NBL_CREATE_RESOURCE_ARCHIVE( + TARGET ${EX_TARGET} + BIND "${MOUNT_POINT}" + BUILTINS ${KEYS} + NAMESPACE ${EX_NAMESPACE} + ) + INTERFACE_TO_BUILTINS(${EX_TARGET}) +endfunction() + +#! common example API builtins as static library targets linked to each example +if(NBL_EMBED_BUILTIN_RESOURCES) + REGISTER_COMMON_BUILTINS( + TARGET NblExtExamplesAPIBuiltinsSource + BIND src/nbl/examples + NAMESPACE nbl::builtin::examples::src + GLOB_RGX *.hlsl *.txt + ) + + REGISTER_COMMON_BUILTINS( + TARGET NblExtExamplesAPIBuiltinsInclude + BIND include/nbl/examples + NAMESPACE nbl::builtin::examples::include + GLOB_RGX *.hpp *.h *.hlsl *.txt + ) +endif() + +#! Examples API common libraries +#[[ + The rule is to avoid creating additional libraries as part of the examples' common + interface in order to prevent generating another precompiled header (PCH) and wasting disk space + + If you have new utilities that could be shared across examples then try to implement them as header only + and include in the PCH or in `examples.h` *if you cannot* (open the header to see details) + + but If you have a good reason to create library because you cannot make it header only + AND you *can REUSE* the examples' PCH then go ahead anyway and put it under `src/nbl/examples`, + otherwise keep it header only - a good example would be to use our embedded-whatever-you-want tool + which does create library but can reuse example's PCH (see NblExtExamplesAPIBuiltinsSource + and NblExtExamplesAPIBuiltinsInclude targets) +]] + +add_subdirectory("src/nbl/examples" EXCLUDE_FROM_ALL) +target_link_libraries(${LIB_NAME} PUBLIC NblExtExamplesAPISPIRV) +if(NBL_EMBED_BUILTIN_RESOURCES) + INTERFACE_TO_BUILTINS(NblExtExamplesAPIBuiltinsBuild) + + #[[ + we have SPIRV keys include file in examples' PCH which then gets REUSE(d) by common archives (**) in built-in mode, + to not glitch compiler we need to ensure we inherit interface properties (include directories needed) for all targets + which share PCH, also note it doest really link any library, the target we inherit properties from is INTERFACE + ]] + target_link_libraries(NblExtExamplesAPIBuiltinsSource PUBLIC NblExtExamplesAPISPIRV) + target_link_libraries(NblExtExamplesAPIBuiltinsInclude PUBLIC NblExtExamplesAPISPIRV) + target_link_libraries(NblExtExamplesAPIBuiltinsBuild PUBLIC NblExtExamplesAPISPIRV) +endif() + +NBL_GET_ALL_TARGETS(TARGETS) +list(REMOVE_ITEM TARGETS ${LIB_NAME}) + +# the Examples API proxy library CMake target name +#[[ + this one gets linked to each executable automatically with its interface libraries +]] +set(NBL_EXAMPLES_API_TARGET ${LIB_NAME} PARENT_SCOPE) + +#! names of CMake targets created in src/nbl/examples +#[[ + if your example wants to use anything from src/nbl/examples + then you must target_link_libraries() the lib you want as we + don't link all those libraries to each executable automatically +]] +set(NBL_EXAMPLES_API_LIBRARIES ${TARGETS} PARENT_SCOPE) + +NBL_ADJUST_FOLDERS(common) \ No newline at end of file diff --git a/common/CommonAPI.h b/common/CommonAPI.h deleted file mode 100644 index aca8c0741..000000000 --- a/common/CommonAPI.h +++ /dev/null @@ -1,111 +0,0 @@ -#ifndef __NBL_COMMON_API_H_INCLUDED__ -#define __NBL_COMMON_API_H_INCLUDED__ - -#include - -#include "MonoSystemMonoLoggerApplication.hpp" - -#include "nbl/ui/CGraphicalApplicationAndroid.h" -#include "nbl/ui/CWindowManagerAndroid.h" - -// TODO: see TODO below -// TODO: make these include themselves via `nabla.h` - -#include "nbl/video/utilities/SPhysicalDeviceFilter.h" - -#if 0 -class CommonAPI -{ - CommonAPI() = delete; -public: - class CommonAPIEventCallback : public nbl::ui::IWindow::IEventCallback - { - public: - CommonAPIEventCallback(nbl::core::smart_refctd_ptr&& inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(inputSystem)), m_logger(std::move(logger)), m_gotWindowClosedMsg(false){} - CommonAPIEventCallback() {} - bool isWindowOpen() const {return !m_gotWindowClosedMsg;} - void setLogger(nbl::system::logger_opt_smart_ptr& logger) - { - m_logger = logger; - } - void setInputSystem(nbl::core::smart_refctd_ptr&& inputSystem) - { - m_inputSystem = std::move(inputSystem); - } - private: - - bool onWindowClosed_impl() override - { - m_logger.log("Window closed"); - m_gotWindowClosedMsg = true; - return true; - } - - void onMouseConnected_impl(nbl::core::smart_refctd_ptr&& mch) override - { - m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get()); - m_inputSystem.get()->add(m_inputSystem.get()->m_mouse,std::move(mch)); - } - void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override - { - m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch); - m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse,mch); - } - void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr&& kbch) override - { - m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get()); - m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard,std::move(kbch)); - } - void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override - { - m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch); - m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard,kbch); - } - - private: - nbl::core::smart_refctd_ptr m_inputSystem = nullptr; - nbl::system::logger_opt_smart_ptr m_logger = nullptr; - bool m_gotWindowClosedMsg; - }; - - // old code from init - { - // ... - - result.inputSystem = nbl::core::make_smart_refctd_ptr(system::logger_opt_smart_ptr(nbl::core::smart_refctd_ptr(result.logger))); - result.assetManager = nbl::core::make_smart_refctd_ptr(nbl::core::smart_refctd_ptr(result.system), nbl::core::smart_refctd_ptr(result.compilerSet)); // we should let user choose it? - - if (!headlessCompute) - { - params.windowCb->setInputSystem(nbl::core::smart_refctd_ptr(result.inputSystem)); - if (!params.window) - { - #ifdef _NBL_PLATFORM_WINDOWS_ - result.windowManager = ui::IWindowManagerWin32::create(); // on the Windows path - #elif defined(_NBL_PLATFORM_LINUX_) - result.windowManager = nbl::core::make_smart_refctd_ptr(); // on the Android path - #else - #error "Unsupported platform" - #endif - - nbl::ui::IWindow::SCreationParams windowsCreationParams; - windowsCreationParams.width = params.windowWidth; - windowsCreationParams.height = params.windowHeight; - windowsCreationParams.x = 64u; - windowsCreationParams.y = 64u; - windowsCreationParams.flags = nbl::ui::IWindow::ECF_RESIZABLE; - windowsCreationParams.windowCaption = params.appName.data(); - windowsCreationParams.callback = params.windowCb; - - params.window = result.windowManager->createWindow(std::move(windowsCreationParams)); - } - params.windowCb = nbl::core::smart_refctd_ptr((CommonAPIEventCallback*) params.window->getEventCallback()); - } - - // ... - } -}; - -#endif - -#endif diff --git a/common/CommonPCH/CMakeLists.txt b/common/CommonPCH/CMakeLists.txt deleted file mode 100644 index 5e62f885f..000000000 --- a/common/CommonPCH/CMakeLists.txt +++ /dev/null @@ -1,15 +0,0 @@ -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in '${NBL_ROOT_PATH}/cmake' directory") -endif() - -nbl_create_executable_project("" "" "" "" "") - -set(NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET "${EXECUTABLE_NAME}" CACHE INTERNAL "") -get_target_property(NBL_NABLA_TARGET_SOURCE_DIR Nabla SOURCE_DIR) -set_target_properties("${EXECUTABLE_NAME}" PROPERTIES DISABLE_PRECOMPILE_HEADERS OFF) -target_precompile_headers("${EXECUTABLE_NAME}" PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/PCH.hpp" # Common PCH for examples - "${NBL_NABLA_TARGET_SOURCE_DIR}/pch.h" # Nabla's PCH -) -unset(NBL_NABLA_TARGET_SOURCE_DIR) \ No newline at end of file diff --git a/common/CommonPCH/PCH.hpp b/common/CommonPCH/PCH.hpp deleted file mode 100644 index 5b9d6a433..000000000 --- a/common/CommonPCH/PCH.hpp +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h -#ifndef _EXAMPLES_COMMON_PCH_HPP_ -#define _EXAMPLES_COMMON_PCH_HPP_ - -#include - -#include -#include -#include - -#endif // _EXAMPLES_COMMON_PCH_HPP_ \ No newline at end of file diff --git a/common/CommonPCH/main.cpp b/common/CommonPCH/main.cpp deleted file mode 100644 index c19ee3c45..000000000 --- a/common/CommonPCH/main.cpp +++ /dev/null @@ -1,9 +0,0 @@ -// Copyright (C) 2018-2022 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -int main(int argc, char** argv) -{ - return 0; -} - diff --git a/common/include/CEventCallback.hpp b/common/include/CEventCallback.hpp deleted file mode 100644 index 2d4e36932..000000000 --- a/common/include/CEventCallback.hpp +++ /dev/null @@ -1,49 +0,0 @@ -#ifndef __NBL_C_EVENT_CALLBACK_HPP_INCLUDED__ -#define __NBL_C_EVENT_CALLBACK_HPP_INCLUDED__ - -#include "nbl/video/utilities/CSimpleResizeSurface.h" -#include "InputSystem.hpp" - -class CEventCallback : public nbl::video::ISimpleManagedSurface::ICallback -{ -public: - CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)) {} - CEventCallback() {} - - void setLogger(nbl::system::logger_opt_smart_ptr& logger) - { - m_logger = logger; - } - void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) - { - m_inputSystem = std::move(m_inputSystem); - } -private: - - void onMouseConnected_impl(nbl::core::smart_refctd_ptr&& mch) override - { - m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get()); - m_inputSystem.get()->add(m_inputSystem.get()->m_mouse, std::move(mch)); - } - void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override - { - m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch); - m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse, mch); - } - void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr&& kbch) override - { - m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get()); - m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard, std::move(kbch)); - } - void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override - { - m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch); - m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard, kbch); - } - -private: - nbl::core::smart_refctd_ptr m_inputSystem = nullptr; - nbl::system::logger_opt_smart_ptr m_logger = nullptr; -}; - -#endif // __NBL_C_EVENT_CALLBACK_HPP_INCLUDED__ \ No newline at end of file diff --git a/common/include/CGeomtryCreatorScene.hpp b/common/include/CGeomtryCreatorScene.hpp deleted file mode 100644 index 0d9bc6edd..000000000 --- a/common/include/CGeomtryCreatorScene.hpp +++ /dev/null @@ -1,1346 +0,0 @@ -#ifndef _NBL_GEOMETRY_CREATOR_SCENE_H_INCLUDED_ -#define _NBL_GEOMETRY_CREATOR_SCENE_H_INCLUDED_ - -#include - -#include "nbl/asset/utils/CGeometryCreator.h" -#include "SBasicViewParameters.hlsl" -#include "geometry/creator/spirv/builtin/CArchive.h" -#include "geometry/creator/spirv/builtin/builtinResources.h" - -namespace nbl::scene::geometrycreator -{ - -enum ObjectType : uint8_t -{ - OT_CUBE, - OT_SPHERE, - OT_CYLINDER, - OT_RECTANGLE, - OT_DISK, - OT_ARROW, - OT_CONE, - OT_ICOSPHERE, - - OT_COUNT, - OT_UNKNOWN = std::numeric_limits::max() -}; - -struct ObjectMeta -{ - ObjectType type = OT_UNKNOWN; - std::string_view name = "Unknown"; -}; - -constexpr static inline struct ClearValues -{ - nbl::video::IGPUCommandBuffer::SClearColorValue color = { .float32 = {0.f,0.f,0.f,1.f} }; - nbl::video::IGPUCommandBuffer::SClearDepthStencilValue depth = { .depth = 0.f }; -} clear; - -#define TYPES_IMPL_BOILERPLATE(WithConverter) struct Types \ -{ \ - using descriptor_set_layout_t = std::conditional_t; \ - using pipeline_layout_t = std::conditional_t; \ - using renderpass_t = std::conditional_t; \ - using image_view_t = std::conditional_t; \ - using image_t = std::conditional_t; \ - using buffer_t = std::conditional_t; \ - using shader_t = std::conditional_t; \ - using graphics_pipeline_t = std::conditional_t; \ - using descriptor_set = std::conditional_t; \ -} - -template -struct ResourcesBundleBase -{ - TYPES_IMPL_BOILERPLATE(withAssetConverter); - - struct ReferenceObject - { - struct Bindings - { - nbl::asset::SBufferBinding vertex, index; - }; - - nbl::core::smart_refctd_ptr pipeline = nullptr; - - Bindings bindings; - nbl::asset::E_INDEX_TYPE indexType = nbl::asset::E_INDEX_TYPE::EIT_UNKNOWN; - uint32_t indexCount = {}; - }; - - using ReferenceDrawHook = std::pair; - - nbl::core::smart_refctd_ptr renderpass; - std::array objects; - nbl::asset::SBufferBinding ubo; - - struct - { - nbl::core::smart_refctd_ptr color, depth; - } attachments; - - nbl::core::smart_refctd_ptr descriptorSet; -}; - -struct ResourcesBundle : public ResourcesBundleBase -{ - using base_t = ResourcesBundleBase; -}; - -#define EXPOSE_NABLA_NAMESPACES() using namespace nbl; \ -using namespace core; \ -using namespace asset; \ -using namespace video; \ -using namespace scene; \ -using namespace system - -template -class ResourceBuilder -{ -public: - TYPES_IMPL_BOILERPLATE(withAssetConverter); - - using this_t = ResourceBuilder; - - ResourceBuilder(nbl::video::IUtilities* const _utilities, nbl::video::IGPUCommandBuffer* const _commandBuffer, nbl::system::ILogger* const _logger, const nbl::asset::IGeometryCreator* const _geometryCreator) - : utilities(_utilities), commandBuffer(_commandBuffer), logger(_logger), geometries(_geometryCreator) - { - assert(utilities); - assert(logger); - } - - /* - if (withAssetConverter) then - -> .build cpu objects - else - -> .build gpu objects & record any resource update upload transfers into command buffer - */ - - inline bool build() - { - EXPOSE_NABLA_NAMESPACES(); - - if constexpr (!withAssetConverter) - { - commandBuffer->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - commandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - commandBuffer->beginDebugMarker("Resources builder's buffers upload [manual]"); - } - - using functor_t = std::function; - - auto work = std::to_array - ({ - functor_t(std::bind(&this_t::createDescriptorSetLayout, this)), - functor_t(std::bind(&this_t::createPipelineLayout, this)), - functor_t(std::bind(&this_t::createRenderpass, this)), - functor_t(std::bind(&this_t::createFramebufferAttachments, this)), - functor_t(std::bind(&this_t::createShaders, this)), - functor_t(std::bind(&this_t::createGeometries, this)), - functor_t(std::bind(&this_t::createViewParametersUboBuffer, this)), - functor_t(std::bind(&this_t::createDescriptorSet, this)) - }); - - for (auto& task : work) - if (!task()) - return false; - - if constexpr (!withAssetConverter) - commandBuffer->end(); - - return true; - } - - /* - if (withAssetConverter) then - -> .convert cpu objects to gpu & update gpu buffers - else - -> update gpu buffers - */ - - inline bool finalize(ResourcesBundle& output, nbl::video::CThreadSafeQueueAdapter* transferCapableQueue) - { - EXPOSE_NABLA_NAMESPACES(); - - // TODO: use multiple command buffers - std::array commandBuffers = {}; - { - commandBuffers.front().cmdbuf = commandBuffer; - } - - if constexpr (withAssetConverter) - { - // note that asset converter records basic transfer uploads itself, we only begin the recording with ONE_TIME_SUBMIT_BIT - commandBuffer->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - commandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - commandBuffer->beginDebugMarker("Resources builder's buffers upload [asset converter]"); - - // asset converter - scratch at this point has ready to convert cpu resources - smart_refctd_ptr converter = CAssetConverter::create({ .device = utilities->getLogicalDevice(),.optimizer = {} }); - CAssetConverter::SInputs inputs = {}; - inputs.logger = logger; - - struct ProxyCpuHooks - { - using object_size_t = std::tuple_size; - - std::array renderpass; - std::array pipelines; - std::array buffers; - std::array attachments; - std::array descriptorSet; - } hooks; - - enum AttachmentIx - { - AI_COLOR = 0u, - AI_DEPTH = 1u, - - AI_COUNT - }; - - // gather CPU assets into span memory views - { - hooks.renderpass.front() = scratch.renderpass.get(); - for (uint32_t i = 0u; i < hooks.pipelines.size(); ++i) - { - auto& [reference, meta] = scratch.objects[static_cast(i)]; - hooks.pipelines[i] = reference.pipeline.get(); - - // [[ [vertex, index] [vertex, index] [vertex, index] ... [ubo] ]] - hooks.buffers[2u * i + 0u] = reference.bindings.vertex.buffer.get(); - hooks.buffers[2u * i + 1u] = reference.bindings.index.buffer.get(); - } - hooks.buffers.back() = scratch.ubo.buffer.get(); - hooks.attachments[AI_COLOR] = scratch.attachments.color.get(); - hooks.attachments[AI_DEPTH] = scratch.attachments.depth.get(); - hooks.descriptorSet.front() = scratch.descriptorSet.get(); - } - - // assign the CPU hooks to converter's inputs - { - std::get>(inputs.assets) = hooks.renderpass; - std::get>(inputs.assets) = hooks.pipelines; - std::get>(inputs.assets) = hooks.buffers; - // std::get>(inputs.assets) = hooks.attachments; // NOTE: THIS IS NOT IMPLEMENTED YET IN CONVERTER! - std::get>(inputs.assets) = hooks.descriptorSet; - } - - // reserve and create the GPU object handles - auto reservation = converter->reserve(inputs); - { - auto prepass = [&](const auto& references) -> bool - { - // retrieve the reserved handles - auto objects = reservation.getGPUObjects(); - - uint32_t counter = {}; - for (auto& object : objects) - { - // anything that fails to be reserved is a nullptr in the span of GPU Objects - auto gpu = object.value; - auto* reference = references[counter]; - - if (reference) - { - // validate - if (!gpu) // throw errors only if corresponding cpu hook was VALID (eg. we may have nullptr for some index buffers in the span for converter but it's OK, I'm too lazy to filter them before passing to the converter inputs and don't want to deal with dynamic alloc) - { - logger->log("Failed to convert a CPU object to GPU!", ILogger::ELL_ERROR); - return false; - } - } - - ++counter; - } - - return true; - }; - - prepass.template operator() < ICPURenderpass > (hooks.renderpass); - prepass.template operator() < ICPUGraphicsPipeline > (hooks.pipelines); - prepass.template operator() < ICPUBuffer > (hooks.buffers); - // validate.template operator() < ICPUImageView > (hooks.attachments); - prepass.template operator() < ICPUDescriptorSet > (hooks.descriptorSet); - } - - auto semaphore = utilities->getLogicalDevice()->createSemaphore(0u); - - // TODO: compute submit as well for the images' mipmaps - SIntendedSubmitInfo transfer = {}; - transfer.queue = transferCapableQueue; - transfer.scratchCommandBuffers = commandBuffers; - transfer.scratchSemaphore = { - .semaphore = semaphore.get(), - .value = 0u, - .stageMask = PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS - }; - // issue the convert call - { - CAssetConverter::SConvertParams params = {}; - params.utilities = utilities; - params.transfer = &transfer; - - // basically it records all data uploads and submits them right away - auto future = reservation.convert(params); - if (future.copy()!=IQueue::RESULT::SUCCESS) - { - logger->log("Failed to await submission feature!", ILogger::ELL_ERROR); - return false; - } - - // assign gpu objects to output - auto& base = static_cast(output); - { - auto&& [renderpass, pipelines, buffers, descriptorSet] = std::make_tuple(reservation.getGPUObjects().front().value, reservation.getGPUObjects(), reservation.getGPUObjects(), reservation.getGPUObjects().front().value); - { - base.renderpass = renderpass; - for (uint32_t i = 0u; i < pipelines.size(); ++i) - { - const auto type = static_cast(i); - const auto& [rcpu, rmeta] = scratch.objects[type]; - auto& [gpu, meta] = base.objects[type]; - - gpu.pipeline = pipelines[i].value; - // [[ [vertex, index] [vertex, index] [vertex, index] ... [ubo] ]] - gpu.bindings.vertex = {.offset = 0u, .buffer = buffers[2u * i + 0u].value}; - gpu.bindings.index = {.offset = 0u, .buffer = buffers[2u * i + 1u].value}; - - gpu.indexCount = rcpu.indexCount; - gpu.indexType = rcpu.indexType; - meta.name = rmeta.name; - meta.type = rmeta.type; - } - base.ubo = {.offset = 0u, .buffer = buffers.back().value}; - base.descriptorSet = descriptorSet; - - /* - // base.attachments.color = attachments[AI_COLOR].value; - // base.attachments.depth = attachments[AI_DEPTH].value; - - note conversion of image views is not yet supported by the asset converter - - it's complicated, we have to kinda temporary ignore DRY a bit here to not break the design which is correct - - TEMPORARY: we patch attachments by allocating them ourselves here given cpu instances & parameters - TODO: remove following code once asset converter works with image views & update stuff - */ - - for (uint32_t i = 0u; i < AI_COUNT; ++i) - { - const auto* reference = hooks.attachments[i]; - auto& out = (i == AI_COLOR ? base.attachments.color : base.attachments.depth); - - const auto& viewParams = reference->getCreationParameters(); - const auto& imageParams = viewParams.image->getCreationParameters(); - - auto image = utilities->getLogicalDevice()->createImage - ( - IGPUImage::SCreationParams - ({ - .type = imageParams.type, - .samples = imageParams.samples, - .format = imageParams.format, - .extent = imageParams.extent, - .mipLevels = imageParams.mipLevels, - .arrayLayers = imageParams.arrayLayers, - .usage = imageParams.usage - }) - ); - - if (!image) - { - logger->log("Could not create image!", ILogger::ELL_ERROR); - return false; - } - - bool IS_DEPTH = isDepthOrStencilFormat(imageParams.format); - std::string_view DEBUG_NAME = IS_DEPTH ? "UI Scene Depth Attachment Image" : "UI Scene Color Attachment Image"; - image->setObjectDebugName(DEBUG_NAME.data()); - - if (!utilities->getLogicalDevice()->allocate(image->getMemoryReqs(), image.get()).isValid()) - { - logger->log("Could not allocate memory for an image!", ILogger::ELL_ERROR); - return false; - } - - out = utilities->getLogicalDevice()->createImageView - ( - IGPUImageView::SCreationParams - ({ - .flags = viewParams.flags, - .subUsages = viewParams.subUsages, - .image = std::move(image), - .viewType = viewParams.viewType, - .format = viewParams.format, - .subresourceRange = viewParams.subresourceRange - }) - ); - - if (!out) - { - logger->log("Could not create image view!", ILogger::ELL_ERROR); - return false; - } - } - - logger->log("Image View attachments has been allocated by hand after asset converter successful submit becasuse it doesn't support converting them yet!", ILogger::ELL_WARNING); - } - } - } - } - else - { - auto completed = utilities->getLogicalDevice()->createSemaphore(0u); - - std::array signals; - { - auto& signal = signals.front(); - signal.value = 1; - signal.stageMask = bitflag(PIPELINE_STAGE_FLAGS::ALL_TRANSFER_BITS); - signal.semaphore = completed.get(); - } - - const IQueue::SSubmitInfo infos [] = - { - { - .waitSemaphores = {}, - .commandBuffers = commandBuffers, // note that here our command buffer is already recorded! - .signalSemaphores = signals - } - }; - - if (transferCapableQueue->submit(infos) != IQueue::RESULT::SUCCESS) - { - logger->log("Failed to submit transfer upload operations!", ILogger::ELL_ERROR); - return false; - } - - const ISemaphore::SWaitInfo info [] = - { { - .semaphore = completed.get(), - .value = 1 - } }; - - utilities->getLogicalDevice()->blockForSemaphores(info); - - static_cast(output) = static_cast(scratch); // scratch has all ready to use allocated gpu resources with uploaded memory so now just assign resources to base output - } - - // write the descriptor set - { - // descriptor write ubo - IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = output.descriptorSet.get(); - write.binding = 0; - write.arrayElement = 0u; - write.count = 1u; - - IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = smart_refctd_ptr(output.ubo.buffer); - info.info.buffer.offset = output.ubo.offset; - info.info.buffer.size = output.ubo.buffer->getSize(); - } - - write.info = &info; - - if(!utilities->getLogicalDevice()->updateDescriptorSets(1u, &write, 0u, nullptr)) - { - logger->log("Could not write descriptor set!", ILogger::ELL_ERROR); - return false; - } - } - - return true; - } - -private: - bool createDescriptorSetLayout() - { - EXPOSE_NABLA_NAMESPACES(); - - typename Types::descriptor_set_layout_t::SBinding bindings[] = - { - { - .binding = 0u, - .type = IDescriptor::E_TYPE::ET_UNIFORM_BUFFER, - .createFlags = Types::descriptor_set_layout_t::SBinding::E_CREATE_FLAGS::ECF_NONE, - .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX | IShader::E_SHADER_STAGE::ESS_FRAGMENT, - .count = 1u, - } - }; - - if constexpr (withAssetConverter) - scratch.descriptorSetLayout = make_smart_refctd_ptr(bindings); - else - scratch.descriptorSetLayout = utilities->getLogicalDevice()->createDescriptorSetLayout(bindings); - - if (!scratch.descriptorSetLayout) - { - logger->log("Could not descriptor set layout!", ILogger::ELL_ERROR); - return false; - } - - return true; - } - - bool createDescriptorSet() - { - EXPOSE_NABLA_NAMESPACES(); - - if constexpr (withAssetConverter) - scratch.descriptorSet = make_smart_refctd_ptr(smart_refctd_ptr(scratch.descriptorSetLayout)); - else - { - const IGPUDescriptorSetLayout* const layouts[] = { scratch.descriptorSetLayout.get()}; - const uint32_t setCounts[] = { 1u }; - - // note descriptor set has back smart pointer to its pool, so we dont need to keep it explicitly - auto pool = utilities->getLogicalDevice()->createDescriptorPoolForDSLayouts(IDescriptorPool::E_CREATE_FLAGS::ECF_NONE, layouts, setCounts); - - if (!pool) - { - logger->log("Could not create Descriptor Pool!", ILogger::ELL_ERROR); - return false; - } - - pool->createDescriptorSets(layouts, &scratch.descriptorSet); - } - - if (!scratch.descriptorSet) - { - logger->log("Could not create Descriptor Set!", ILogger::ELL_ERROR); - return false; - } - - return true; - } - - bool createPipelineLayout() - { - EXPOSE_NABLA_NAMESPACES(); - - const std::span range = {}; - - if constexpr (withAssetConverter) - scratch.pipelineLayout = make_smart_refctd_ptr(range, nullptr, smart_refctd_ptr(scratch.descriptorSetLayout), nullptr, nullptr); - else - scratch.pipelineLayout = utilities->getLogicalDevice()->createPipelineLayout(range, nullptr, smart_refctd_ptr(scratch.descriptorSetLayout), nullptr, nullptr); - - if (!scratch.pipelineLayout) - { - logger->log("Could not create pipeline layout!", ILogger::ELL_ERROR); - return false; - } - - return true; - } - - bool createRenderpass() - { - EXPOSE_NABLA_NAMESPACES(); - - static constexpr Types::renderpass_t::SCreationParams::SColorAttachmentDescription colorAttachments[] = - { - { - { - { - .format = ColorFboAttachmentFormat, - .samples = Samples, - .mayAlias = false - }, - /* .loadOp = */ Types::renderpass_t::LOAD_OP::CLEAR, - /* .storeOp = */ Types::renderpass_t::STORE_OP::STORE, - /* .initialLayout = */ Types::image_t::LAYOUT::UNDEFINED, - /* .finalLayout = */ Types::image_t::LAYOUT::READ_ONLY_OPTIMAL - } - }, - Types::renderpass_t::SCreationParams::ColorAttachmentsEnd - }; - - static constexpr Types::renderpass_t::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = - { - { - { - { - .format = DepthFboAttachmentFormat, - .samples = Samples, - .mayAlias = false - }, - /* .loadOp = */ {Types::renderpass_t::LOAD_OP::CLEAR}, - /* .storeOp = */ {Types::renderpass_t::STORE_OP::STORE}, - /* .initialLayout = */ {Types::image_t::LAYOUT::UNDEFINED}, - /* .finalLayout = */ {Types::image_t::LAYOUT::ATTACHMENT_OPTIMAL} - } - }, - Types::renderpass_t::SCreationParams::DepthStencilAttachmentsEnd - }; - - typename Types::renderpass_t::SCreationParams::SSubpassDescription subpasses[] = - { - {}, - Types::renderpass_t::SCreationParams::SubpassesEnd - }; - - subpasses[0].depthStencilAttachment.render = { .attachmentIndex = 0u,.layout = Types::image_t::LAYOUT::ATTACHMENT_OPTIMAL }; - subpasses[0].colorAttachments[0] = { .render = {.attachmentIndex = 0u, .layout = Types::image_t::LAYOUT::ATTACHMENT_OPTIMAL } }; - - static constexpr Types::renderpass_t::SCreationParams::SSubpassDependency dependencies[] = - { - // wipe-transition of Color to ATTACHMENT_OPTIMAL - { - .srcSubpass = Types::renderpass_t::SCreationParams::SSubpassDependency::External, - .dstSubpass = 0, - .memoryBarrier = - { - // - .srcStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT, - // destination needs to wait as early as possible - .dstStageMask = PIPELINE_STAGE_FLAGS::EARLY_FRAGMENT_TESTS_BIT | PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // because of depth test needing a read and a write - .dstAccessMask = ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | ACCESS_FLAGS::DEPTH_STENCIL_ATTACHMENT_READ_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_READ_BIT | ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT - } - // leave view offsets and flags default - }, - // color from ATTACHMENT_OPTIMAL to PRESENT_SRC - { - .srcSubpass = 0, - .dstSubpass = Types::renderpass_t::SCreationParams::SSubpassDependency::External, - .memoryBarrier = - { - // last place where the depth can get modified - .srcStageMask = PIPELINE_STAGE_FLAGS::COLOR_ATTACHMENT_OUTPUT_BIT, - // only write ops, reads can't be made available - .srcAccessMask = ACCESS_FLAGS::COLOR_ATTACHMENT_WRITE_BIT, - // - .dstStageMask = PIPELINE_STAGE_FLAGS::FRAGMENT_SHADER_BIT, - // - .dstAccessMask = ACCESS_FLAGS::SAMPLED_READ_BIT - // - } - // leave view offsets and flags default - }, - Types::renderpass_t::SCreationParams::DependenciesEnd - }; - - typename Types::renderpass_t::SCreationParams params = {}; - params.colorAttachments = colorAttachments; - params.depthStencilAttachments = depthAttachments; - params.subpasses = subpasses; - params.dependencies = dependencies; - - if constexpr (withAssetConverter) - scratch.renderpass = ICPURenderpass::create(params); - else - scratch.renderpass = utilities->getLogicalDevice()->createRenderpass(params); - - if (!scratch.renderpass) - { - logger->log("Could not create render pass!", ILogger::ELL_ERROR); - return false; - } - - return true; - } - - bool createFramebufferAttachments() - { - EXPOSE_NABLA_NAMESPACES(); - - auto createImageView = [&](smart_refctd_ptr& outView) -> smart_refctd_ptr - { - constexpr bool IS_DEPTH = isDepthOrStencilFormat(); - constexpr auto USAGE = [](const bool isDepth) - { - bitflag usage = Types::image_t::EUF_RENDER_ATTACHMENT_BIT; - - if (!isDepth) - usage |= Types::image_t::EUF_SAMPLED_BIT; - - return usage; - }(IS_DEPTH); - constexpr auto ASPECT = IS_DEPTH ? IImage::E_ASPECT_FLAGS::EAF_DEPTH_BIT : IImage::E_ASPECT_FLAGS::EAF_COLOR_BIT; - constexpr std::string_view DEBUG_NAME = IS_DEPTH ? "UI Scene Depth Attachment Image" : "UI Scene Color Attachment Image"; - { - smart_refctd_ptr image; - { - auto params = typename Types::image_t::SCreationParams( - { - .type = Types::image_t::ET_2D, - .samples = Samples, - .format = format, - .extent = { FramebufferW, FramebufferH, 1u }, - .mipLevels = 1u, - .arrayLayers = 1u, - .usage = USAGE - }); - - if constexpr (withAssetConverter) - image = ICPUImage::create(params); - else - image = utilities->getLogicalDevice()->createImage(std::move(params)); - } - - if (!image) - { - logger->log("Could not create image!", ILogger::ELL_ERROR); - return nullptr; - } - - if constexpr (withAssetConverter) - { - auto dummyBuffer = ICPUBuffer::create({ FramebufferW * FramebufferH * getTexelOrBlockBytesize() }); - dummyBuffer->setContentHash(dummyBuffer->computeContentHash()); - - auto regions = make_refctd_dynamic_array>(1u); - auto& region = regions->front(); - - region.imageSubresource = { .aspectMask = ASPECT, .mipLevel = 0u, .baseArrayLayer = 0u, .layerCount = 0u }; - region.bufferOffset = 0u; - region.bufferRowLength = IImageAssetHandlerBase::calcPitchInBlocks(FramebufferW, getTexelOrBlockBytesize()); - region.bufferImageHeight = 0u; - region.imageOffset = { 0u, 0u, 0u }; - region.imageExtent = { FramebufferW, FramebufferH, 1u }; - - if (!image->setBufferAndRegions(std::move(dummyBuffer), regions)) - { - logger->log("Could not set image's regions!", ILogger::ELL_ERROR); - return nullptr; - } - image->setContentHash(image->computeContentHash()); - } - else - { - image->setObjectDebugName(DEBUG_NAME.data()); - - if (!utilities->getLogicalDevice()->allocate(image->getMemoryReqs(), image.get()).isValid()) - { - logger->log("Could not allocate memory for an image!", ILogger::ELL_ERROR); - return nullptr; - } - } - - auto params = typename Types::image_view_t::SCreationParams - ({ - .flags = Types::image_view_t::ECF_NONE, - .subUsages = USAGE, - .image = std::move(image), - .viewType = Types::image_view_t::ET_2D, - .format = format, - .subresourceRange = { .aspectMask = ASPECT, .baseMipLevel = 0u, .levelCount = 1u, .baseArrayLayer = 0u, .layerCount = 1u } - }); - - if constexpr (withAssetConverter) - outView = make_smart_refctd_ptr(std::move(params)); - else - outView = utilities->getLogicalDevice()->createImageView(std::move(params)); - - if (!outView) - { - logger->log("Could not create image view!", ILogger::ELL_ERROR); - return nullptr; - } - - return smart_refctd_ptr(outView); - } - }; - - const bool allocated = createImageView.template operator() < ColorFboAttachmentFormat > (scratch.attachments.color) && createImageView.template operator() < DepthFboAttachmentFormat > (scratch.attachments.depth); - - if (!allocated) - { - logger->log("Could not allocate frame buffer's attachments!", ILogger::ELL_ERROR); - return false; - } - - return true; - } - - bool createShaders() - { - EXPOSE_NABLA_NAMESPACES(); - - auto createShader = [&](IShader::E_SHADER_STAGE stage, smart_refctd_ptr& outShader) -> smart_refctd_ptr - { - // TODO: use SPIRV loader & our ::system ns to get those cpu shaders, do not create myself (shit I forgot it exists) - - const SBuiltinFile& in = ::geometry::creator::spirv::builtin::get_resource(); - const auto buffer = ICPUBuffer::create({ { in.size }, (void*)in.contents, core::getNullMemoryResource() }, adopt_memory); - auto shader = make_smart_refctd_ptr(smart_refctd_ptr(buffer), stage, IShader::E_CONTENT_TYPE::ECT_SPIRV, ""); // must create cpu instance regardless underlying type - - if constexpr (withAssetConverter) - { - buffer->setContentHash(buffer->computeContentHash()); - outShader = std::move(shader); - } - else - outShader = utilities->getLogicalDevice()->createShader(shader.get()); - - return outShader; - }; - - typename ResourcesBundleScratch::Shaders& basic = scratch.shaders[GeometriesCpu::GP_BASIC]; - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.basic.vertex.spv") > (IShader::E_SHADER_STAGE::ESS_VERTEX, basic.vertex); - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.basic.fragment.spv") > (IShader::E_SHADER_STAGE::ESS_FRAGMENT, basic.fragment); - - typename ResourcesBundleScratch::Shaders& cone = scratch.shaders[GeometriesCpu::GP_CONE]; - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.cone.vertex.spv") > (IShader::E_SHADER_STAGE::ESS_VERTEX, cone.vertex); - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.basic.fragment.spv") > (IShader::E_SHADER_STAGE::ESS_FRAGMENT, cone.fragment); // note we reuse fragment from basic! - - typename ResourcesBundleScratch::Shaders& ico = scratch.shaders[GeometriesCpu::GP_ICO]; - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.ico.vertex.spv") > (IShader::E_SHADER_STAGE::ESS_VERTEX, ico.vertex); - createShader.template operator() < NBL_CORE_UNIQUE_STRING_LITERAL_TYPE("geometryCreator/spirv/gc.basic.fragment.spv") > (IShader::E_SHADER_STAGE::ESS_FRAGMENT, ico.fragment); // note we reuse fragment from basic! - - for (const auto& it : scratch.shaders) - { - if (!it.vertex || !it.fragment) - { - logger->log("Could not create shaders!", ILogger::ELL_ERROR); - return false; - } - } - - return true; - } - - bool createGeometries() - { - EXPOSE_NABLA_NAMESPACES(); - - for (uint32_t i = 0; i < geometries.objects.size(); ++i) - { - const auto& inGeometry = geometries.objects[i]; - auto& [obj, meta] = scratch.objects[i]; - - bool status = true; - - meta.name = inGeometry.meta.name; - meta.type = inGeometry.meta.type; - - struct - { - SBlendParams blend; - SRasterizationParams rasterization; - typename Types::graphics_pipeline_t::SCreationParams pipeline; - } params; - - { - params.blend.logicOp = ELO_NO_OP; - - auto& b = params.blend.blendParams[0]; - b.srcColorFactor = EBF_SRC_ALPHA; - b.dstColorFactor = EBF_ONE_MINUS_SRC_ALPHA; - b.colorBlendOp = EBO_ADD; - b.srcAlphaFactor = EBF_SRC_ALPHA; - b.dstAlphaFactor = EBF_SRC_ALPHA; - b.alphaBlendOp = EBO_ADD; - b.colorWriteMask = (1u << 0u) | (1u << 1u) | (1u << 2u) | (1u << 3u); - } - - params.rasterization.faceCullingMode = EFCM_NONE; - { - const typename Types::shader_t::SSpecInfo info [] = - { - {.entryPoint = "VSMain", .shader = scratch.shaders[inGeometry.shadersType].vertex.get() }, - {.entryPoint = "PSMain", .shader = scratch.shaders[inGeometry.shadersType].fragment.get() } - }; - - params.pipeline.layout = scratch.pipelineLayout.get(); - params.pipeline.shaders = info; - params.pipeline.renderpass = scratch.renderpass.get(); - params.pipeline.cached = { .vertexInput = inGeometry.data.inputParams, .primitiveAssembly = inGeometry.data.assemblyParams, .rasterization = params.rasterization, .blend = params.blend, .subpassIx = 0u }; - - obj.indexCount = inGeometry.data.indexCount; - obj.indexType = inGeometry.data.indexType; - - // TODO: cache pipeline & try lookup for existing one first maybe - - // similar issue like with shaders again, in this case gpu contructor allows for extra cache parameters + there is no constructor you can use to fire make_smart_refctd_ptr yourself for cpu - if constexpr (withAssetConverter) - obj.pipeline = ICPUGraphicsPipeline::create(params.pipeline); - else - { - const std::array info = { { params.pipeline } }; - utilities->getLogicalDevice()->createGraphicsPipelines(nullptr, info, &obj.pipeline); - } - - if (!obj.pipeline) - { - logger->log("Could not create graphics pipeline for [%s] object!", ILogger::ELL_ERROR, meta.name.data()); - status = false; - } - - // object buffers - auto createVIBuffers = [&]() -> bool - { - using ibuffer_t = ::nbl::asset::IBuffer; // seems to be ambigous, both asset & core namespaces has IBuffer - - // note: similar issue like with shaders, this time with cpu-gpu constructors differing in arguments - auto vBuffer = smart_refctd_ptr(inGeometry.data.bindings[0].buffer); // no offset - constexpr static auto VERTEX_USAGE = bitflag(ibuffer_t::EUF_VERTEX_BUFFER_BIT) | ibuffer_t::EUF_TRANSFER_DST_BIT | ibuffer_t::EUF_INLINE_UPDATE_VIA_CMDBUF; - obj.bindings.vertex.offset = 0u; - - auto iBuffer = smart_refctd_ptr(inGeometry.data.indexBuffer.buffer); // no offset - constexpr static auto INDEX_USAGE = bitflag(ibuffer_t::EUF_INDEX_BUFFER_BIT) | ibuffer_t::EUF_VERTEX_BUFFER_BIT | ibuffer_t::EUF_TRANSFER_DST_BIT | ibuffer_t::EUF_INLINE_UPDATE_VIA_CMDBUF; - obj.bindings.index.offset = 0u; - - if constexpr (withAssetConverter) - { - if (!vBuffer) - return false; - - vBuffer->addUsageFlags(VERTEX_USAGE); - vBuffer->setContentHash(vBuffer->computeContentHash()); - obj.bindings.vertex = { .offset = 0u, .buffer = vBuffer }; - - if (inGeometry.data.indexType != EIT_UNKNOWN) - if (iBuffer) - { - iBuffer->addUsageFlags(INDEX_USAGE); - iBuffer->setContentHash(iBuffer->computeContentHash()); - } - else - return false; - - obj.bindings.index = { .offset = 0u, .buffer = iBuffer }; - } - else - { - auto vertexBuffer = utilities->getLogicalDevice()->createBuffer(IGPUBuffer::SCreationParams({ .size = vBuffer->getSize(), .usage = VERTEX_USAGE })); - auto indexBuffer = iBuffer ? utilities->getLogicalDevice()->createBuffer(IGPUBuffer::SCreationParams({ .size = iBuffer->getSize(), .usage = INDEX_USAGE })) : nullptr; - - if (!vertexBuffer) - return false; - - if (inGeometry.data.indexType != EIT_UNKNOWN) - if (!indexBuffer) - return false; - - const auto mask = utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - for (auto it : { vertexBuffer , indexBuffer }) - { - if (it) - { - auto reqs = it->getMemoryReqs(); - reqs.memoryTypeBits &= mask; - - utilities->getLogicalDevice()->allocate(reqs, it.get()); - } - } - - // record transfer uploads - obj.bindings.vertex = { .offset = 0u, .buffer = std::move(vertexBuffer) }; - { - const SBufferRange range = { .offset = obj.bindings.vertex.offset, .size = obj.bindings.vertex.buffer->getSize(), .buffer = obj.bindings.vertex.buffer }; - if (!commandBuffer->updateBuffer(range, vBuffer->getPointer())) - { - logger->log("Could not record vertex buffer transfer upload for [%s] object!", ILogger::ELL_ERROR, meta.name.data()); - status = false; - } - } - obj.bindings.index = { .offset = 0u, .buffer = std::move(indexBuffer) }; - { - if (iBuffer) - { - const SBufferRange range = { .offset = obj.bindings.index.offset, .size = obj.bindings.index.buffer->getSize(), .buffer = obj.bindings.index.buffer }; - - if (!commandBuffer->updateBuffer(range, iBuffer->getPointer())) - { - logger->log("Could not record index buffer transfer upload for [%s] object!", ILogger::ELL_ERROR, meta.name.data()); - status = false; - } - } - } - } - - return true; - }; - - if (!createVIBuffers()) - { - logger->log("Could not create buffers for [%s] object!", ILogger::ELL_ERROR, meta.name.data()); - status = false; - } - - if (!status) - { - logger->log("[%s] object will not be created!", ILogger::ELL_ERROR, meta.name.data()); - - obj.bindings.vertex = {}; - obj.bindings.index = {}; - obj.indexCount = 0u; - obj.indexType = E_INDEX_TYPE::EIT_UNKNOWN; - obj.pipeline = nullptr; - - continue; - } - } - } - - return true; - } - - bool createViewParametersUboBuffer() - { - EXPOSE_NABLA_NAMESPACES(); - - using ibuffer_t = ::nbl::asset::IBuffer; // seems to be ambigous, both asset & core namespaces has IBuffer - constexpr static auto UboUsage = bitflag(ibuffer_t::EUF_UNIFORM_BUFFER_BIT) | ibuffer_t::EUF_TRANSFER_DST_BIT | ibuffer_t::EUF_INLINE_UPDATE_VIA_CMDBUF; - - if constexpr (withAssetConverter) - { - auto uboBuffer = ICPUBuffer::create({ sizeof(SBasicViewParameters) }); - uboBuffer->addUsageFlags(UboUsage); - uboBuffer->setContentHash(uboBuffer->computeContentHash()); - scratch.ubo = { .offset = 0u, .buffer = std::move(uboBuffer) }; - } - else - { - const auto mask = utilities->getLogicalDevice()->getPhysicalDevice()->getUpStreamingMemoryTypeBits(); - auto uboBuffer = utilities->getLogicalDevice()->createBuffer(IGPUBuffer::SCreationParams({ .size = sizeof(SBasicViewParameters), .usage = UboUsage })); - - if (!uboBuffer) - return false; - - for (auto it : { uboBuffer }) - { - IDeviceMemoryBacked::SDeviceMemoryRequirements reqs = it->getMemoryReqs(); - reqs.memoryTypeBits &= mask; - - utilities->getLogicalDevice()->allocate(reqs, it.get()); - } - - scratch.ubo = { .offset = 0u, .buffer = std::move(uboBuffer) }; - } - - return true; - } - - struct GeometriesCpu - { - enum GeometryShader - { - GP_BASIC = 0, - GP_CONE, - GP_ICO, - - GP_COUNT - }; - - struct ReferenceObjectCpu - { - ObjectMeta meta; - GeometryShader shadersType; - nbl::asset::CGeometryCreator::return_type data; - }; - - GeometriesCpu(const nbl::asset::IGeometryCreator* _gc) - : gc(_gc), - objects - ({ - ReferenceObjectCpu {.meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }, - ReferenceObjectCpu {.meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }, - ReferenceObjectCpu {.meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }, - ReferenceObjectCpu {.meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }, - ReferenceObjectCpu {.meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }, - ReferenceObjectCpu {.meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }, - ReferenceObjectCpu {.meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }, - ReferenceObjectCpu {.meta = {.type = OT_ICOSPHERE, .name = "Icoshpere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) } - }) - { - gc = nullptr; // one shot - } - - private: - const nbl::asset::IGeometryCreator* gc; - - public: - const std::array objects; - }; - - using resources_bundle_base_t = ResourcesBundleBase; - - struct ResourcesBundleScratch : public resources_bundle_base_t - { - using Types = resources_bundle_base_t::Types; - - ResourcesBundleScratch() - : resources_bundle_base_t() {} - - struct Shaders - { - nbl::core::smart_refctd_ptr vertex = nullptr, fragment = nullptr; - }; - - nbl::core::smart_refctd_ptr descriptorSetLayout; - nbl::core::smart_refctd_ptr pipelineLayout; - std::array shaders; - }; - - // TODO: we could make those params templated with default values like below - static constexpr auto FramebufferW = 1280u, FramebufferH = 720u; - static constexpr auto ColorFboAttachmentFormat = nbl::asset::EF_R8G8B8A8_SRGB, DepthFboAttachmentFormat = nbl::asset::EF_D16_UNORM; - static constexpr auto Samples = nbl::video::IGPUImage::ESCF_1_BIT; - - ResourcesBundleScratch scratch; - - nbl::video::IUtilities* const utilities; - nbl::video::IGPUCommandBuffer* const commandBuffer; - nbl::system::ILogger* const logger; - GeometriesCpu geometries; -}; - -#undef TYPES_IMPL_BOILERPLATE - -struct ObjectDrawHookCpu -{ - nbl::core::matrix3x4SIMD model; - nbl::asset::SBasicViewParameters viewParameters; - ObjectMeta meta; -}; - -/* - Rendering to offline framebuffer which we don't present, color - scene attachment texture we use for second UI renderpass - sampling it & rendering into desired GUI area. - - The scene can be created from simple geometry - using our Geomtry Creator class. -*/ - -class CScene final : public nbl::core::IReferenceCounted -{ -public: - ObjectDrawHookCpu object; // TODO: this could be a vector (to not complicate the example I leave it single object), we would need a better system for drawing then to make only 1 max 2 indirect draw calls (indexed and not indexed objects) - - struct - { - const uint32_t startedValue = 0, finishedValue = 0x45; - nbl::core::smart_refctd_ptr progress; - } semaphore; - - struct CreateResourcesDirectlyWithDevice { using Builder = ResourceBuilder; }; - struct CreateResourcesWithAssetConverter { using Builder = ResourceBuilder; }; - - ~CScene() {} - - static inline nbl::core::smart_refctd_ptr createCommandBuffer(nbl::video::ILogicalDevice* const device, nbl::system::ILogger* const logger, const uint32_t familyIx) - { - EXPOSE_NABLA_NAMESPACES(); - auto pool = device->createCommandPool(familyIx, IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT); - - if (!pool) - { - logger->log("Couldn't create Command Pool!", ILogger::ELL_ERROR); - return nullptr; - } - - nbl::core::smart_refctd_ptr cmd; - - if (!pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY, { &cmd , 1 })) - { - logger->log("Couldn't create Command Buffer!", ILogger::ELL_ERROR); - return nullptr; - } - - return cmd; - } - - template - static auto create(Args&&... args) -> decltype(auto) - { - EXPOSE_NABLA_NAMESPACES(); - - /* - user should call the constructor's args without last argument explicitly, this is a trick to make constructor templated, - eg.create(smart_refctd_ptr(device), smart_refctd_ptr(logger), queuePointer, geometryPointer) - */ - - auto* scene = new CScene(std::forward(args)..., CreateWith {}); - smart_refctd_ptr smart(scene, dont_grab); - - return smart; - } - - inline void begin() - { - EXPOSE_NABLA_NAMESPACES(); - - m_commandBuffer->reset(IGPUCommandBuffer::RESET_FLAGS::RELEASE_RESOURCES_BIT); - m_commandBuffer->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); - m_commandBuffer->beginDebugMarker("UISampleApp Offline Scene Frame"); - - semaphore.progress = m_utilities->getLogicalDevice()->createSemaphore(semaphore.startedValue); - } - - inline void record() - { - EXPOSE_NABLA_NAMESPACES(); - - const struct - { - const uint32_t width, height; - } fbo = { .width = m_frameBuffer->getCreationParameters().width, .height = m_frameBuffer->getCreationParameters().height }; - - SViewport viewport; - { - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = fbo.width; - viewport.height = fbo.height; - } - - m_commandBuffer->setViewport(0u, 1u, &viewport); - - VkRect2D scissor = {}; - scissor.offset = { 0, 0 }; - scissor.extent = { fbo.width, fbo.height }; - m_commandBuffer->setScissor(0u, 1u, &scissor); - - const VkRect2D renderArea = - { - .offset = { 0,0 }, - .extent = { fbo.width, fbo.height } - }; - - const IGPUCommandBuffer::SRenderpassBeginInfo info = - { - .framebuffer = m_frameBuffer.get(), - .colorClearValues = &clear.color, - .depthStencilClearValues = &clear.depth, - .renderArea = renderArea - }; - - m_commandBuffer->beginRenderPass(info, IGPUCommandBuffer::SUBPASS_CONTENTS::INLINE); - - const auto& [hook, meta] = resources.objects[object.meta.type]; - auto* rawPipeline = hook.pipeline.get(); - - SBufferBinding vertex = hook.bindings.vertex, index = hook.bindings.index; - - m_commandBuffer->bindGraphicsPipeline(rawPipeline); - m_commandBuffer->bindDescriptorSets(EPBP_GRAPHICS, rawPipeline->getLayout(), 1, 1, &resources.descriptorSet.get()); - m_commandBuffer->bindVertexBuffers(0, 1, &vertex); - - if (index.buffer && hook.indexType != EIT_UNKNOWN) - { - m_commandBuffer->bindIndexBuffer(index, hook.indexType); - m_commandBuffer->drawIndexed(hook.indexCount, 1, 0, 0, 0); - } - else - m_commandBuffer->draw(hook.indexCount, 1, 0, 0); - - m_commandBuffer->endRenderPass(); - } - - inline void end() - { - m_commandBuffer->end(); - } - - inline bool submit() - { - EXPOSE_NABLA_NAMESPACES(); - - const IQueue::SSubmitInfo::SCommandBufferInfo buffers[] = - { - { .cmdbuf = m_commandBuffer.get() } - }; - - const IQueue::SSubmitInfo::SSemaphoreInfo signals[] = { {.semaphore = semaphore.progress.get(),.value = semaphore.finishedValue,.stageMask = PIPELINE_STAGE_FLAGS::FRAMEBUFFER_SPACE_BITS} }; - - const IQueue::SSubmitInfo infos[] = - { - { - .waitSemaphores = {}, - .commandBuffers = buffers, - .signalSemaphores = signals - } - }; - - return queue->submit(infos) == IQueue::RESULT::SUCCESS; - } - - // note: must be updated outside render pass - inline void update() - { - EXPOSE_NABLA_NAMESPACES(); - - SBufferRange range; - range.buffer = smart_refctd_ptr(resources.ubo.buffer); - range.size = resources.ubo.buffer->getSize(); - - m_commandBuffer->updateBuffer(range, &object.viewParameters); - } - - inline decltype(auto) getResources() - { - return (resources); // note: do not remove "()" - it makes the return type lvalue reference instead of copy - } - -private: - template // TODO: enforce constraints, only those 2 above are valid - CScene(nbl::core::smart_refctd_ptr _utilities, nbl::core::smart_refctd_ptr _logger, nbl::video::CThreadSafeQueueAdapter* _graphicsQueue, const nbl::asset::IGeometryCreator* _geometryCreator, CreateWith createWith = {}) - : m_utilities(nbl::core::smart_refctd_ptr(_utilities)), m_logger(nbl::core::smart_refctd_ptr(_logger)), queue(_graphicsQueue) - { - EXPOSE_NABLA_NAMESPACES(); - using Builder = typename CreateWith::Builder; - - m_commandBuffer = createCommandBuffer(m_utilities->getLogicalDevice(), m_utilities->getLogger(), queue->getFamilyIndex()); - Builder builder(m_utilities.get(), m_commandBuffer.get(), m_logger.get(), _geometryCreator); - - // gpu resources - if (builder.build()) - { - if (!builder.finalize(resources, queue)) - m_logger->log("Could not finalize resource objects to gpu objects!", ILogger::ELL_ERROR); - } - else - m_logger->log("Could not build resource objects!", ILogger::ELL_ERROR); - - // frame buffer - { - const auto extent = resources.attachments.color->getCreationParameters().image->getCreationParameters().extent; - - IGPUFramebuffer::SCreationParams params = - { - { - .renderpass = smart_refctd_ptr(resources.renderpass), - .depthStencilAttachments = &resources.attachments.depth.get(), - .colorAttachments = &resources.attachments.color.get(), - .width = extent.width, - .height = extent.height, - .layers = 1u - } - }; - - m_frameBuffer = m_utilities->getLogicalDevice()->createFramebuffer(std::move(params)); - - if (!m_frameBuffer) - { - m_logger->log("Could not create frame buffer!", ILogger::ELL_ERROR); - return; - } - } - } - - nbl::core::smart_refctd_ptr m_utilities; - nbl::core::smart_refctd_ptr m_logger; - - nbl::video::CThreadSafeQueueAdapter* queue; - nbl::core::smart_refctd_ptr m_commandBuffer; - - nbl::core::smart_refctd_ptr m_frameBuffer; - - ResourcesBundle resources; -}; - -} // nbl::scene::geometrycreator - -#endif // _NBL_GEOMETRY_CREATOR_SCENE_H_INCLUDED_ \ No newline at end of file diff --git a/common/include/SBasicViewParameters.hlsl b/common/include/SBasicViewParameters.hlsl deleted file mode 100644 index 0d0990186..000000000 --- a/common/include/SBasicViewParameters.hlsl +++ /dev/null @@ -1,17 +0,0 @@ -#ifndef _S_BASIC_VIEW_PARAMETERS_COMMON_HLSL_ -#define _S_BASIC_VIEW_PARAMETERS_COMMON_HLSL_ - -#ifdef __HLSL_VERSION -struct SBasicViewParameters //! matches CPU version size & alignment (160, 4) -{ - float4x4 MVP; - float3x4 MV; - float3x3 normalMat; -}; -#endif // _S_BASIC_VIEW_PARAMETERS_COMMON_HLSL_ - -#endif - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ \ No newline at end of file diff --git a/common/include/nbl/examples/PCH.hpp b/common/include/nbl/examples/PCH.hpp new file mode 100644 index 000000000..a20984464 --- /dev/null +++ b/common/include/nbl/examples/PCH.hpp @@ -0,0 +1,29 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_PCH_HPP_ +#define _NBL_EXAMPLES_PCH_HPP_ + +//! Precompiled header (PCH) for Nabla Examples +/* + NOTE: currently our whole public and private interface is broken + and private headers leak to public includes +*/ + +//! Nabla declarations +#include "nabla.h" + +//! Common example interface headers +#include "nbl/examples/common/build/spirv/keys.hpp" +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/MonoWindowApplication.hpp" +#include "nbl/examples/common/InputSystem.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +#include "nbl/examples/cameras/CCamera.hpp" + +#include "nbl/examples/geometry/CGeometryCreatorScene.hpp" +#include "nbl/examples/geometry/CSimpleDebugRenderer.hpp" + + +#endif // _NBL_EXAMPLES_COMMON_PCH_HPP_ \ No newline at end of file diff --git a/common/include/CCamera.hpp b/common/include/nbl/examples/cameras/CCamera.hpp similarity index 99% rename from common/include/CCamera.hpp rename to common/include/nbl/examples/cameras/CCamera.hpp index 1b0fe9c0f..3b3cd38d8 100644 --- a/common/include/CCamera.hpp +++ b/common/include/nbl/examples/cameras/CCamera.hpp @@ -1,16 +1,18 @@ // Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. // This file is part of the "Nabla Engine". // For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_COMMON_CAMERA_IMPL_ +#define _NBL_COMMON_CAMERA_IMPL_ -#ifndef _CAMERA_IMPL_ -#define _CAMERA_IMPL_ #include + #include #include #include #include + class Camera { public: @@ -322,5 +324,4 @@ class Camera std::chrono::microseconds nextPresentationTimeStamp, lastVirtualUpTimeStamp; }; - -#endif // _CAMERA_IMPL_ \ No newline at end of file +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp b/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp new file mode 100644 index 000000000..19a5482a0 --- /dev/null +++ b/common/include/nbl/examples/common/BuiltinResourcesApplication.hpp @@ -0,0 +1,83 @@ +// Copyright (C) 2023-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_BUILTIN_RESOURCE_APPLICATION_HPP_INCLUDED_ +#define _NBL_EXAMPLES_BUILTIN_RESOURCE_APPLICATION_HPP_INCLUDED_ + +// we need a system, logger and an asset manager +#include "nbl/application_templates/MonoAssetManagerApplication.hpp" + +#ifdef NBL_EMBED_BUILTIN_RESOURCES + #include "nbl/builtin/examples/include/CArchive.h" + #include "nbl/builtin/examples/src/CArchive.h" + #include "nbl/builtin/examples/build/CArchive.h" + #if __has_include("nbl/this_example/builtin/CArchive.h") + #include "nbl/this_example/builtin/CArchive.h" + #endif + #if __has_include("nbl/this_example/builtin/build/CArchive.h") + #include "nbl/this_example/builtin/build/CArchive.h" + #endif +#endif + +namespace nbl::examples +{ + +// Virtual Inheritance because apps might end up doing diamond inheritance +class BuiltinResourcesApplication : public virtual application_templates::MonoAssetManagerApplication +{ + using base_t = MonoAssetManagerApplication; + + public: + using base_t::base_t; + + protected: + // need this one for skipping passing all args into ApplicationFramework + BuiltinResourcesApplication() = default; + + virtual bool onAppInitialized(core::smart_refctd_ptr&& system) override + { + if (!base_t::onAppInitialized(std::move(system))) + return false; + + using namespace core; + + smart_refctd_ptr examplesHeaderArch,examplesSourceArch,examplesBuildArch,thisExampleArch, thisExampleBuildArch; + #ifdef NBL_EMBED_BUILTIN_RESOURCES + examplesHeaderArch = core::make_smart_refctd_ptr(smart_refctd_ptr(m_logger)); + examplesSourceArch = core::make_smart_refctd_ptr(smart_refctd_ptr(m_logger)); + examplesBuildArch = core::make_smart_refctd_ptr(smart_refctd_ptr(m_logger)); + + #ifdef _NBL_THIS_EXAMPLE_BUILTIN_C_ARCHIVE_H_ + thisExampleArch = make_smart_refctd_ptr(smart_refctd_ptr(m_logger)); + #endif + + #ifdef _NBL_THIS_EXAMPLE_BUILTIN_BUILD_C_ARCHIVE_H_ + thisExampleBuildArch = make_smart_refctd_ptr(smart_refctd_ptr(m_logger)); + #endif + + #else + examplesHeaderArch = make_smart_refctd_ptr(localInputCWD/"../common/include/nbl/examples",smart_refctd_ptr(m_logger),m_system.get()); + examplesSourceArch = make_smart_refctd_ptr(localInputCWD/"../common/src/nbl/examples",smart_refctd_ptr(m_logger),m_system.get()); + examplesBuildArch = make_smart_refctd_ptr(NBL_EXAMPLES_BUILD_MOUNT_POINT, smart_refctd_ptr(m_logger), m_system.get()); + thisExampleArch = make_smart_refctd_ptr(localInputCWD/"app_resources",smart_refctd_ptr(m_logger),m_system.get()); + #ifdef NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT + thisExampleBuildArch = make_smart_refctd_ptr(NBL_THIS_EXAMPLE_BUILD_MOUNT_POINT, smart_refctd_ptr(m_logger), m_system.get()); + #endif + #endif + // yes all 3 aliases are meant to be the same + m_system->mount(std::move(examplesHeaderArch),"nbl/examples"); + m_system->mount(std::move(examplesSourceArch),"nbl/examples"); + m_system->mount(std::move(examplesBuildArch),"nbl/examples"); + if (thisExampleArch) + m_system->mount(std::move(thisExampleArch),"app_resources"); + + if(thisExampleBuildArch) + m_system->mount(std::move(thisExampleBuildArch), "app_resources"); + + return true; + } +}; + +} + +#endif // _NBL_EXAMPLES_BUILTIN_RESOURCE_APPLICATION_HPP_INCLUDED_ \ No newline at end of file diff --git a/common/include/nbl/examples/common/CEventCallback.hpp b/common/include/nbl/examples/common/CEventCallback.hpp new file mode 100644 index 000000000..cae6dc7de --- /dev/null +++ b/common/include/nbl/examples/common/CEventCallback.hpp @@ -0,0 +1,54 @@ +#ifndef _NBL_EXAMPLES_COMMON_C_EVENT_CALLBACK_HPP_INCLUDED_ +#define _NBL_EXAMPLES_COMMON_C_EVENT_CALLBACK_HPP_INCLUDED_ + + +#include "nbl/video/utilities/CSimpleResizeSurface.h" + +#include "nbl/examples/common/InputSystem.hpp" + + +namespace nbl::examples +{ +class CEventCallback : public nbl::video::ISimpleManagedSurface::ICallback +{ + public: + CEventCallback(nbl::core::smart_refctd_ptr&& m_inputSystem, nbl::system::logger_opt_smart_ptr&& logger) : m_inputSystem(std::move(m_inputSystem)), m_logger(std::move(logger)) {} + CEventCallback() {} + + void setLogger(nbl::system::logger_opt_smart_ptr& logger) + { + m_logger = logger; + } + void setInputSystem(nbl::core::smart_refctd_ptr&& m_inputSystem) + { + m_inputSystem = std::move(m_inputSystem); + } + + private: + void onMouseConnected_impl(nbl::core::smart_refctd_ptr&& mch) override + { + m_logger.log("A mouse %p has been connected", nbl::system::ILogger::ELL_INFO, mch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_mouse, std::move(mch)); + } + void onMouseDisconnected_impl(nbl::ui::IMouseEventChannel* mch) override + { + m_logger.log("A mouse %p has been disconnected", nbl::system::ILogger::ELL_INFO, mch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_mouse, mch); + } + void onKeyboardConnected_impl(nbl::core::smart_refctd_ptr&& kbch) override + { + m_logger.log("A keyboard %p has been connected", nbl::system::ILogger::ELL_INFO, kbch.get()); + m_inputSystem.get()->add(m_inputSystem.get()->m_keyboard, std::move(kbch)); + } + void onKeyboardDisconnected_impl(nbl::ui::IKeyboardEventChannel* kbch) override + { + m_logger.log("A keyboard %p has been disconnected", nbl::system::ILogger::ELL_INFO, kbch); + m_inputSystem.get()->remove(m_inputSystem.get()->m_keyboard, kbch); + } + + private: + nbl::core::smart_refctd_ptr m_inputSystem = nullptr; + nbl::system::logger_opt_smart_ptr m_logger = nullptr; +}; +} +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/common/CSwapchainFramebuffersAndDepth.hpp b/common/include/nbl/examples/common/CSwapchainFramebuffersAndDepth.hpp new file mode 100644 index 000000000..c7d780fdf --- /dev/null +++ b/common/include/nbl/examples/common/CSwapchainFramebuffersAndDepth.hpp @@ -0,0 +1,108 @@ +// Copyright (C) 2023-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_COMMON_C_SWAPCHAIN_FRAMEBUFFERS_AND_DEPTH_HPP_INCLUDED_ +#define _NBL_EXAMPLES_COMMON_C_SWAPCHAIN_FRAMEBUFFERS_AND_DEPTH_HPP_INCLUDED_ + +// Build on top of the previous one +#include "nbl/application_templates/BasicMultiQueueApplication.hpp" + +namespace nbl::examples +{ + +class CSwapchainFramebuffersAndDepth final : public video::CDefaultSwapchainFramebuffers +{ + using base_t = CDefaultSwapchainFramebuffers; + + public: + template + inline CSwapchainFramebuffersAndDepth(video::ILogicalDevice* device, const asset::E_FORMAT _desiredDepthFormat, Args&&... args) : base_t(device,std::forward(args)...) + { + // user didn't want any depth + if (_desiredDepthFormat==asset::EF_UNKNOWN) + return; + + using namespace nbl::asset; + using namespace nbl::video; + const IPhysicalDevice::SImageFormatPromotionRequest req = { + .originalFormat = _desiredDepthFormat, + .usages = {IGPUImage::EUF_RENDER_ATTACHMENT_BIT} + }; + m_depthFormat = m_device->getPhysicalDevice()->promoteImageFormat(req,IGPUImage::TILING::OPTIMAL); + + const static IGPURenderpass::SCreationParams::SDepthStencilAttachmentDescription depthAttachments[] = { + {{ + { + .format = m_depthFormat, + .samples = IGPUImage::ESCF_1_BIT, + .mayAlias = false + }, + /*.loadOp = */{IGPURenderpass::LOAD_OP::CLEAR}, + /*.storeOp = */{IGPURenderpass::STORE_OP::STORE}, + /*.initialLayout = */{IGPUImage::LAYOUT::UNDEFINED}, // because we clear we don't care about contents + /*.finalLayout = */{IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL} + }}, + IGPURenderpass::SCreationParams::DepthStencilAttachmentsEnd + }; + m_params.depthStencilAttachments = depthAttachments; + + static IGPURenderpass::SCreationParams::SSubpassDescription subpasses[] = { + m_params.subpasses[0], + IGPURenderpass::SCreationParams::SubpassesEnd + }; + subpasses[0].depthStencilAttachment.render = { .attachmentIndex = 0,.layout = IGPUImage::LAYOUT::ATTACHMENT_OPTIMAL }; + m_params.subpasses = subpasses; + } + + protected: + inline bool onCreateSwapchain_impl(const uint8_t qFam) override + { + using namespace nbl::asset; + using namespace nbl::video; + if (m_depthFormat!=asset::EF_UNKNOWN) + { + // DOCS: why are we not using `m_device` here? any particular reason? + auto device = const_cast(m_renderpass->getOriginDevice()); + + const auto depthFormat = m_renderpass->getCreationParameters().depthStencilAttachments[0].format; + const auto& sharedParams = getSwapchain()->getCreationParameters().sharedParams; + auto image = device->createImage({ IImage::SCreationParams{ + .type = IGPUImage::ET_2D, + .samples = IGPUImage::ESCF_1_BIT, + .format = depthFormat, + .extent = {sharedParams.width,sharedParams.height,1}, + .mipLevels = 1, + .arrayLayers = 1, + .depthUsage = IGPUImage::EUF_RENDER_ATTACHMENT_BIT + } }); + + device->allocate(image->getMemoryReqs(), image.get()); + + m_depthBuffer = device->createImageView({ + .flags = IGPUImageView::ECF_NONE, + .subUsages = IGPUImage::EUF_RENDER_ATTACHMENT_BIT, + .image = std::move(image), + .viewType = IGPUImageView::ET_2D, + .format = depthFormat, + .subresourceRange = {IGPUImage::EAF_DEPTH_BIT,0,1,0,1} + }); + } + const auto retval = base_t::onCreateSwapchain_impl(qFam); + m_depthBuffer = nullptr; + return retval; + } + + inline core::smart_refctd_ptr createFramebuffer(video::IGPUFramebuffer::SCreationParams&& params) override + { + if (m_depthBuffer) + params.depthStencilAttachments = &m_depthBuffer.get(); + return m_device->createFramebuffer(std::move(params)); + } + + asset::E_FORMAT m_depthFormat = asset::EF_UNKNOWN; + // only used to pass a parameter from `onCreateSwapchain_impl` to `createFramebuffer` + core::smart_refctd_ptr m_depthBuffer; +}; + +} +#endif \ No newline at end of file diff --git a/common/include/InputSystem.hpp b/common/include/nbl/examples/common/InputSystem.hpp similarity index 84% rename from common/include/InputSystem.hpp rename to common/include/nbl/examples/common/InputSystem.hpp index c42b738d0..c30fc1212 100644 --- a/common/include/InputSystem.hpp +++ b/common/include/nbl/examples/common/InputSystem.hpp @@ -4,16 +4,19 @@ #ifndef _NBL_EXAMPLES_COMMON_INPUT_SYSTEM_HPP_INCLUDED_ #define _NBL_EXAMPLES_COMMON_INPUT_SYSTEM_HPP_INCLUDED_ -class InputSystem : public nbl::core::IReferenceCounted +namespace nbl::examples +{ + +class InputSystem : public core::IReferenceCounted { public: template struct Channels { - nbl::core::mutex lock; + core::mutex lock; std::condition_variable added; - nbl::core::vector> channels; - nbl::core::vector timeStamps; + core::vector> channels; + core::vector timeStamps; uint32_t defaultChannelIndex = 0; }; // TODO: move to "nbl/ui/InputEventChannel.h" once the interface of this utility struct matures, also maybe rename to `Consumer` ? @@ -21,7 +24,7 @@ class InputSystem : public nbl::core::IReferenceCounted struct ChannelReader { template - inline void consumeEvents(F&& processFunc, nbl::system::logger_opt_ptr logger=nullptr) + inline void consumeEvents(F&& processFunc, system::logger_opt_ptr logger=nullptr) { auto events = channel->getEvents(); const auto frontBufferCapacity = channel->getFrontBufferCapacity(); @@ -29,7 +32,7 @@ class InputSystem : public nbl::core::IReferenceCounted { logger.log( "Detected overflow, %d unconsumed events in channel of size %d!", - nbl::system::ILogger::ELL_ERROR,events.size()-consumedCounter,frontBufferCapacity + system::ILogger::ELL_ERROR,events.size()-consumedCounter,frontBufferCapacity ); consumedCounter = events.size()-frontBufferCapacity; } @@ -38,22 +41,22 @@ class InputSystem : public nbl::core::IReferenceCounted consumedCounter = events.size(); } - nbl::core::smart_refctd_ptr channel = nullptr; + core::smart_refctd_ptr channel = nullptr; uint64_t consumedCounter = 0ull; }; - InputSystem(nbl::system::logger_opt_smart_ptr&& logger) : m_logger(std::move(logger)) {} + InputSystem(system::logger_opt_smart_ptr&& logger) : m_logger(std::move(logger)) {} - void getDefaultMouse(ChannelReader* reader) + void getDefaultMouse(ChannelReader* reader) { getDefault(m_mouse,reader); } - void getDefaultKeyboard(ChannelReader* reader) + void getDefaultKeyboard(ChannelReader* reader) { getDefault(m_keyboard,reader); } template - void add(Channels& channels, nbl::core::smart_refctd_ptr&& channel) + void add(Channels& channels, core::smart_refctd_ptr&& channel) { std::unique_lock lock(channels.lock); channels.channels.push_back(std::move(channel)); @@ -94,7 +97,7 @@ class InputSystem : public nbl::core::IReferenceCounted std::unique_lock lock(channels.lock); while (channels.channels.empty()) { - m_logger.log("Waiting For Input Device to be connected...",nbl::system::ILogger::ELL_INFO); + m_logger.log("Waiting For Input Device to be connected...",system::ILogger::ELL_INFO); channels.added.wait(lock); } @@ -159,7 +162,7 @@ class InputSystem : public nbl::core::IReferenceCounted } if(defaultIdx != newDefaultIdx) { - m_logger.log("Default InputChannel for ChannelType changed from %u to %u",nbl::system::ILogger::ELL_INFO, defaultIdx, newDefaultIdx); + m_logger.log("Default InputChannel for ChannelType changed from %u to %u",system::ILogger::ELL_INFO, defaultIdx, newDefaultIdx); defaultIdx = newDefaultIdx; channels.defaultChannelIndex = newDefaultIdx; @@ -177,10 +180,10 @@ class InputSystem : public nbl::core::IReferenceCounted reader->consumedCounter = consumedCounter; } - nbl::system::logger_opt_smart_ptr m_logger; - Channels m_mouse; - Channels m_keyboard; + system::logger_opt_smart_ptr m_logger; + Channels m_mouse; + Channels m_keyboard; }; - +} #endif diff --git a/common/include/nbl/examples/common/MonoWindowApplication.hpp b/common/include/nbl/examples/common/MonoWindowApplication.hpp new file mode 100644 index 000000000..0f18012c0 --- /dev/null +++ b/common/include/nbl/examples/common/MonoWindowApplication.hpp @@ -0,0 +1,189 @@ +// Copyright (C) 2023-2023 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_COMMON_MONO_WINDOW_APPLICATION_HPP_INCLUDED_ +#define _NBL_EXAMPLES_COMMON_MONO_WINDOW_APPLICATION_HPP_INCLUDED_ + +// Build on top of the previous one +#include "nbl/examples/common/SimpleWindowedApplication.hpp" +#include "nbl/examples/common/CSwapchainFramebuffersAndDepth.hpp" +#include "nbl/examples/common/CEventCallback.hpp" + +namespace nbl::examples +{ + +// Virtual Inheritance because apps might end up doing diamond inheritance +class MonoWindowApplication : public virtual SimpleWindowedApplication +{ + using base_t = SimpleWindowedApplication; + + public: + // Maximum frames which can be simultaneously submitted, used to cycle through our per-frame resources like command buffers + constexpr static inline uint8_t MaxFramesInFlight = 3; + + template + MonoWindowApplication(const hlsl::uint16_t2 _initialResolution, const asset::E_FORMAT _depthFormat, Args&&... args) : + base_t(std::forward(args)...), m_initialResolution(_initialResolution), m_depthFormat(_depthFormat) {} + + // + inline core::vector getSurfaces() const override final + { + if (!m_surface) + { + using namespace nbl::core; + using namespace nbl::ui; + using namespace nbl::video; + { + auto windowCallback = make_smart_refctd_ptr(smart_refctd_ptr(m_inputSystem),smart_refctd_ptr(m_logger)); + IWindow::SCreationParams params = {}; + params.callback = make_smart_refctd_ptr(); + params.width = m_initialResolution[0]; + params.height = m_initialResolution[1]; + params.x = 32; + params.y = 32; + params.flags = ui::IWindow::ECF_HIDDEN | IWindow::ECF_BORDERLESS | IWindow::ECF_RESIZABLE; + params.windowCaption = "MonoWindowApplication"; + params.callback = windowCallback; + const_cast&>(m_window) = m_winMgr->createWindow(std::move(params)); + } + + auto surface = CSurfaceVulkanWin32::create(smart_refctd_ptr(m_api), smart_refctd_ptr_static_cast(m_window)); + const_cast&>(m_surface) = CSimpleResizeSurface::create(std::move(surface)); + } + + if (m_surface) + return { {m_surface->getSurface()/*,EQF_NONE*/} }; + + return {}; + } + + virtual inline bool onAppInitialized(core::smart_refctd_ptr&& system) override + { + using namespace nbl::core; + using namespace nbl::video; + // want to have a usable system and logger first + if (!MonoSystemMonoLoggerApplication::onAppInitialized(std::move(system))) + return false; + + m_inputSystem = make_smart_refctd_ptr(system::logger_opt_smart_ptr(smart_refctd_ptr(m_logger))); + if (!base_t::onAppInitialized(std::move(system))) + return false; + + ISwapchain::SCreationParams swapchainParams = { .surface = smart_refctd_ptr(m_surface->getSurface()) }; + if (!swapchainParams.deduceFormat(m_physicalDevice)) + return logFail("Could not choose a Surface Format for the Swapchain!"); + + // TODO: option without depth + auto scResources = std::make_unique(m_device.get(),m_depthFormat,swapchainParams.surfaceFormat.format,getDefaultSubpassDependencies()); + auto* renderpass = scResources->getRenderpass(); + + if (!renderpass) + return logFail("Failed to create Renderpass!"); + + auto gQueue = getGraphicsQueue(); + if (!m_surface || !m_surface->init(gQueue,std::move(scResources),swapchainParams.sharedParams)) + return logFail("Could not create Window & Surface or initialize the Surface!"); + + m_winMgr->setWindowSize(m_window.get(),m_initialResolution[0],m_initialResolution[1]); + m_surface->recreateSwapchain(); + + return true; + } + + // we do slight inversion of control here + inline void workLoopBody() override final + { + using namespace nbl::core; + using namespace nbl::video; + // framesInFlight: ensuring safe execution of command buffers and acquires, `framesInFlight` only affect semaphore waits, don't use this to index your resources because it can change with swapchain recreation. + const uint32_t framesInFlightCount = hlsl::min(MaxFramesInFlight,m_surface->getMaxAcquiresInFlight()); + // We block for semaphores for 2 reasons here: + // A) Resource: Can't use resource like a command buffer BEFORE previous use is finished! [MaxFramesInFlight] + // B) Acquire: Can't have more acquires in flight than a certain threshold returned by swapchain or your surface helper class. [MaxAcquiresInFlight] + if (m_framesInFlight.size()>=framesInFlightCount) + { + const ISemaphore::SWaitInfo framesDone[] = + { + { + .semaphore = m_framesInFlight.front().semaphore.get(), + .value = m_framesInFlight.front().value + } + }; + if (m_device->blockForSemaphores(framesDone)!=ISemaphore::WAIT_RESULT::SUCCESS) + return; + m_framesInFlight.pop_front(); + } + + auto updatePresentationTimestamp = [&]() + { + m_currentImageAcquire = m_surface->acquireNextImage(); + + // TODO: better frame pacing than this + oracle.reportEndFrameRecord(); + const auto timestamp = oracle.getNextPresentationTimeStamp(); + oracle.reportBeginFrameRecord(); + + return timestamp; + }; + + const auto nextPresentationTimestamp = updatePresentationTimestamp(); + + if (!m_currentImageAcquire) + return; + + const IQueue::SSubmitInfo::SSemaphoreInfo rendered[] = {renderFrame(nextPresentationTimestamp)}; + m_surface->present(m_currentImageAcquire.imageIndex,rendered); + if (rendered->semaphore) + m_framesInFlight.emplace_back(smart_refctd_ptr(rendered->semaphore),rendered->value); + } + + // + virtual inline bool keepRunning() override + { + if (m_surface->irrecoverable()) + return false; + + return true; + } + + // + virtual inline bool onAppTerminated() + { + m_inputSystem = nullptr; + m_device->waitIdle(); + m_framesInFlight.clear(); + m_surface = nullptr; + m_window = nullptr; + return base_t::onAppTerminated(); + } + + protected: + inline void onAppInitializedFinish() + { + m_winMgr->show(m_window.get()); + oracle.reportBeginFrameRecord(); + } + inline const auto& getCurrentAcquire() const {return m_currentImageAcquire;} + + virtual const video::IGPURenderpass::SCreationParams::SSubpassDependency* getDefaultSubpassDependencies() const = 0; + virtual video::IQueue::SSubmitInfo::SSemaphoreInfo renderFrame(const std::chrono::microseconds nextPresentationTimestamp) = 0; + + const hlsl::uint16_t2 m_initialResolution; + const asset::E_FORMAT m_depthFormat; + core::smart_refctd_ptr m_inputSystem; + core::smart_refctd_ptr m_window; + core::smart_refctd_ptr> m_surface; + + private: + struct SSubmittedFrame + { + core::smart_refctd_ptr semaphore; + uint64_t value; + }; + core::deque m_framesInFlight; + video::ISimpleManagedSurface::SAcquireResult m_currentImageAcquire = {}; + video::CDumbPresentationOracle oracle; +}; + +} +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/common/SBasicViewParameters.hlsl b/common/include/nbl/examples/common/SBasicViewParameters.hlsl new file mode 100644 index 000000000..b7ad31cb6 --- /dev/null +++ b/common/include/nbl/examples/common/SBasicViewParameters.hlsl @@ -0,0 +1,29 @@ +#ifndef _NBL_EXAMPLES_S_BASIC_VIEW_PARAMETERS_HLSL_ +#define _NBL_EXAMPLES_S_BASIC_VIEW_PARAMETERS_HLSL_ + + +#include "nbl/builtin/hlsl/cpp_compat/matrix.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace examples +{ + +struct SBasicViewParameters +{ + float32_t4x4 MVP; + float32_t3x4 MV; + float32_t3x3 normalMat; +}; + +} +} +} +#endif + +/* + do not remove this text, WAVE is so bad that you can get errors if no proper ending xD +*/ \ No newline at end of file diff --git a/common/include/SimpleWindowedApplication.hpp b/common/include/nbl/examples/common/SimpleWindowedApplication.hpp similarity index 99% rename from common/include/SimpleWindowedApplication.hpp rename to common/include/nbl/examples/common/SimpleWindowedApplication.hpp index 802a93188..ddb510eb7 100644 --- a/common/include/SimpleWindowedApplication.hpp +++ b/common/include/nbl/examples/common/SimpleWindowedApplication.hpp @@ -88,5 +88,4 @@ class SimpleWindowedApplication : public virtual application_templates::BasicMul }; } - -#endif // _CAMERA_IMPL_ \ No newline at end of file +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/examples.hpp b/common/include/nbl/examples/examples.hpp new file mode 100644 index 000000000..1450abc2a --- /dev/null +++ b/common/include/nbl/examples/examples.hpp @@ -0,0 +1,23 @@ +// Copyright (C) 2018-2025 - DevSH Graphics Programming Sp. z O.O. +// This file is part of the "Nabla Engine". +// For conditions of distribution and use, see copyright notice in nabla.h +#ifndef _NBL_EXAMPLES_HPP_ +#define _NBL_EXAMPLES_HPP_ + + +//! Precompiled header shared across all examples +#include "nbl/examples/PCH.hpp" + +//! Example specific headers that must not be included in the PCH +/* + NOTE: Add here if they depend on preprocessor definitions + or macros that are specific to individual example targets + (eg. defined in CMake) +*/ + +// #include "..." + +// cannot be in PCH because depens on definition of `this_example` for Example's builtins +#include "nbl/examples/common/BuiltinResourcesApplication.hpp" + +#endif // _NBL_EXAMPLES_HPP_ \ No newline at end of file diff --git a/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp new file mode 100644 index 000000000..2993725a0 --- /dev/null +++ b/common/include/nbl/examples/geometry/CGeometryCreatorScene.hpp @@ -0,0 +1,191 @@ +#ifndef _NBL_EXAMPLES_C_GEOMETRY_CREATOR_SCENE_H_INCLUDED_ +#define _NBL_EXAMPLES_C_GEOMETRY_CREATOR_SCENE_H_INCLUDED_ + + +#include +#include "nbl/asset/utils/CGeometryCreator.h" + + +namespace nbl::examples +{ + +class CGeometryCreatorScene : public core::IReferenceCounted +{ +#define EXPOSE_NABLA_NAMESPACES \ + using namespace nbl::core; \ + using namespace nbl::system; \ + using namespace nbl::asset; \ + using namespace nbl::video + public: + // + struct SCreateParams + { + video::IQueue* transferQueue; + video::IUtilities* utilities; + system::ILogger* logger; + std::span addtionalBufferOwnershipFamilies = {}; + }; + static inline core::smart_refctd_ptr create(SCreateParams&& params, const video::CAssetConverter::patch_t& geometryPatch) + { + EXPOSE_NABLA_NAMESPACES; + auto* logger = params.logger; + assert(logger); + if (!params.transferQueue) + { + logger->log("Pass a non-null `IQueue* transferQueue`!",ILogger::ELL_ERROR); + return nullptr; + } + if (!params.utilities) + { + logger->log("Pass a non-null `IUtilities* utilities`!",ILogger::ELL_ERROR); + return nullptr; + } + + + SInitParams init = {}; + core::vector> geometries; + // create out geometries + { + auto addGeometry = [&init,&geometries](const std::string_view name, smart_refctd_ptr&& geom)->void + { + init.geometryNames.emplace_back(name); + geometries.push_back(std::move(geom)); + }; + + auto creator = core::make_smart_refctd_ptr(); + /* TODO: others + ReferenceObjectCpu {.meta = {.type = OT_CUBE, .name = "Cube Mesh" }, .shadersType = GP_BASIC, .data = gc->createCubeMesh(nbl::core::vector3df(1.f, 1.f, 1.f)) }, + ReferenceObjectCpu {.meta = {.type = OT_SPHERE, .name = "Sphere Mesh" }, .shadersType = GP_BASIC, .data = gc->createSphereMesh(2, 16, 16) }, + ReferenceObjectCpu {.meta = {.type = OT_CYLINDER, .name = "Cylinder Mesh" }, .shadersType = GP_BASIC, .data = gc->createCylinderMesh(2, 2, 20) }, + ReferenceObjectCpu {.meta = {.type = OT_RECTANGLE, .name = "Rectangle Mesh" }, .shadersType = GP_BASIC, .data = gc->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)) }, + ReferenceObjectCpu {.meta = {.type = OT_DISK, .name = "Disk Mesh" }, .shadersType = GP_BASIC, .data = gc->createDiskMesh(2, 30) }, + ReferenceObjectCpu {.meta = {.type = OT_ARROW, .name = "Arrow Mesh" }, .shadersType = GP_BASIC, .data = gc->createArrowMesh() }, + ReferenceObjectCpu {.meta = {.type = OT_CONE, .name = "Cone Mesh" }, .shadersType = GP_CONE, .data = gc->createConeMesh(2, 3, 10) }, + ReferenceObjectCpu {.meta = {.type = OT_ICOSPHERE, .name = "Icoshpere Mesh" }, .shadersType = GP_ICO, .data = gc->createIcoSphere(1, 3, true) } + */ + addGeometry("Cube",creator->createCube({1.f,1.f,1.f})); + addGeometry("Rectangle",creator->createRectangle({1.5f,3.f})); + addGeometry("Disk",creator->createDisk(2.f,30)); + addGeometry("Sphere", creator->createSphere(2, 16, 16)); + addGeometry("Cylinder", creator->createCylinder(2, 2, 20)); + addGeometry("Cone", creator->createCone(2, 3, 10)); + addGeometry("Icosphere", creator->createIcoSphere(1, 4, true)); + } + init.geometries.reserve(init.geometryNames.size()); + + // convert the geometries + { + auto device = params.utilities->getLogicalDevice(); + smart_refctd_ptr converter = CAssetConverter::create({.device=device}); + + + const auto transferFamily = params.transferQueue->getFamilyIndex(); + + struct SInputs : CAssetConverter::SInputs + { + virtual inline std::span getSharedOwnershipQueueFamilies(const size_t groupCopyID, const asset::ICPUBuffer* buffer, const CAssetConverter::patch_t& patch) const + { + return sharedBufferOwnership; + } + + core::vector sharedBufferOwnership; + } inputs = {}; + core::vector> patches(geometries.size(),geometryPatch); + { + inputs.logger = logger; + std::get>(inputs.assets) = {&geometries.front().get(),geometries.size()}; + std::get>(inputs.patches) = patches; + // set up shared ownership so we don't have to + core::unordered_set families; + families.insert(transferFamily); + families.insert(params.addtionalBufferOwnershipFamilies.begin(),params.addtionalBufferOwnershipFamilies.end()); + if (families.size()>1) + for (const auto fam : families) + inputs.sharedBufferOwnership.push_back(fam); + } + + // reserve + auto reservation = converter->reserve(inputs); + if (!reservation) + { + logger->log("Failed to reserve GPU objects for CPU->GPU conversion!",ILogger::ELL_ERROR); + return nullptr; + } + + // convert + { + auto semaphore = device->createSemaphore(0u); + + constexpr auto MultiBuffering = 2; + std::array,MultiBuffering> commandBuffers = {}; + { + auto pool = device->createCommandPool(transferFamily,IGPUCommandPool::CREATE_FLAGS::RESET_COMMAND_BUFFER_BIT|IGPUCommandPool::CREATE_FLAGS::TRANSIENT_BIT); + pool->createCommandBuffers(IGPUCommandPool::BUFFER_LEVEL::PRIMARY,commandBuffers,smart_refctd_ptr(logger)); + } + commandBuffers.front()->begin(IGPUCommandBuffer::USAGE::ONE_TIME_SUBMIT_BIT); + + std::array commandBufferSubmits; + for (auto i=0; ilog("Failed to await submission feature!", ILogger::ELL_ERROR); + return nullptr; + } + } + + // assign outputs + { + auto inIt = reservation.getGPUObjects().data(); + for (auto outIt=init.geometryNames.begin(); outIt!=init.geometryNames.end(); inIt++) + { + if (inIt->value) + { + init.geometries.push_back(inIt->value); + outIt++; + } + else + { + logger->log("Failed to convert ICPUPolygonGeometry %s to GPU!",ILogger::ELL_ERROR,outIt->c_str()); + outIt = init.geometryNames.erase(outIt); + } + } + } + } + + return smart_refctd_ptr(new CGeometryCreatorScene(std::move(init)),dont_grab); + } + + // + struct SInitParams + { + core::vector> geometries; + core::vector geometryNames; + }; + const SInitParams& getInitParams() const {return m_init;} + + protected: + inline CGeometryCreatorScene(SInitParams&& _init) : m_init(std::move(_init)) {} + + SInitParams m_init; +#undef EXPOSE_NABLA_NAMESPACES +}; + +} +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp new file mode 100644 index 000000000..9a9e5c966 --- /dev/null +++ b/common/include/nbl/examples/geometry/CSimpleDebugRenderer.hpp @@ -0,0 +1,419 @@ +#ifndef _NBL_EXAMPLES_C_SIMPLE_DEBUG_RENDERER_H_INCLUDED_ +#define _NBL_EXAMPLES_C_SIMPLE_DEBUG_RENDERER_H_INCLUDED_ + +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" +#include "nbl/examples/geometry/SPushConstants.hlsl" + +namespace nbl::examples +{ + +class CSimpleDebugRenderer final : public core::IReferenceCounted +{ +#define EXPOSE_NABLA_NAMESPACES \ + using namespace nbl::core; \ + using namespace nbl::system; \ + using namespace nbl::asset; \ + using namespace nbl::video + + public: + // + constexpr static inline uint16_t VertexAttrubUTBDescBinding = 0; + // + struct SViewParams + { + inline SViewParams(const hlsl::float32_t3x4& _view, const hlsl::float32_t4x4& _viewProj) + { + view = _view; + viewProj = _viewProj; + using namespace nbl::hlsl; + normal = transpose(inverse(float32_t3x3(view))); + } + + inline auto computeForInstance(hlsl::float32_t3x4 world) const + { + using namespace nbl::hlsl; + hlsl::examples::geometry_creator_scene::SInstanceMatrices retval = { + .worldViewProj = float32_t4x4(math::linalg::promoted_mul(float64_t4x4(viewProj),float64_t3x4(world))) + }; + const auto sub3x3 = mul(float64_t3x3(viewProj),float64_t3x3(world)); + retval.normal = float32_t3x3(transpose(inverse(sub3x3))); + return retval; + } + + hlsl::float32_t3x4 view; + hlsl::float32_t4x4 viewProj; + hlsl::float32_t3x3 normal; + }; + // + struct SPackedGeometry + { + core::smart_refctd_ptr pipeline = {}; + asset::SBufferBinding indexBuffer = {}; + uint32_t elementCount = 0; + // indices into the descriptor set + constexpr static inline auto MissingView = hlsl::examples::geometry_creator_scene::SPushConstants::DescriptorCount; + uint16_t positionView = MissingView; + uint16_t normalView = MissingView; + asset::E_INDEX_TYPE indexType = asset::EIT_UNKNOWN; + }; + // + struct SInstance + { + using SPushConstants = hlsl::examples::geometry_creator_scene::SPushConstants; + inline SPushConstants computePushConstants(const SViewParams& viewParams) const + { + using namespace hlsl; + return { + .matrices = viewParams.computeForInstance(world), + .positionView = packedGeo->positionView, + .normalView = packedGeo->normalView + }; + } + + hlsl::float32_t3x4 world; + const SPackedGeometry* packedGeo; + }; + + // + constexpr static inline auto DefaultPolygonGeometryPatch = []()->video::CAssetConverter::patch_t + { + // we want to use the vertex data through UTBs + using usage_f = video::IGPUBuffer::E_USAGE_FLAGS; + video::CAssetConverter::patch_t patch = {}; + patch.positionBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + patch.indexBufferUsages = usage_f::EUF_INDEX_BUFFER_BIT; + patch.otherBufferUsages = usage_f::EUF_UNIFORM_TEXEL_BUFFER_BIT; + return patch; + }(); + + // + static inline core::smart_refctd_ptr create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX) + { + EXPOSE_NABLA_NAMESPACES; + + if (!renderpass) + return nullptr; + auto device = const_cast(renderpass->getOriginDevice()); + auto logger = device->getLogger(); + + if (!assMan) + return nullptr; + + // load shader + smart_refctd_ptr shader; + { + auto key = "nbl/examples/" + nbl::builtin::examples::build::get_spirv_key<"shaders/geometry/unified">(device); + const auto bundle = assMan->getAsset(key.data(), {}); + + const auto contents = bundle.getContents(); + if (contents.empty() || bundle.getAssetType()!=IAsset::ET_SHADER) + return nullptr; + shader = IAsset::castDown(contents[0]); + + if (!shader) + return nullptr; + } + + SInitParams init; + + // create descriptor set + { + // create Descriptor Set Layout + smart_refctd_ptr dsLayout; + { + using binding_flags_t = IGPUDescriptorSetLayout::SBinding::E_CREATE_FLAGS; + const IGPUDescriptorSetLayout::SBinding bindings[] = + { + { + .binding = VertexAttrubUTBDescBinding, + .type = IDescriptor::E_TYPE::ET_UNIFORM_TEXEL_BUFFER, + // need this trifecta of flags for `SubAllocatedDescriptorSet` to accept the binding as suballocatable + .createFlags = binding_flags_t::ECF_UPDATE_AFTER_BIND_BIT|binding_flags_t::ECF_UPDATE_UNUSED_WHILE_PENDING_BIT |binding_flags_t::ECF_PARTIALLY_BOUND_BIT, + .stageFlags = IShader::E_SHADER_STAGE::ESS_VERTEX|IShader::E_SHADER_STAGE::ESS_FRAGMENT, + .count = SPackedGeometry::MissingView + } + }; + dsLayout = device->createDescriptorSetLayout(bindings); + if (!dsLayout) + { + logger->log("Could not create descriptor set layout!",ILogger::ELL_ERROR); + return nullptr; + } + } + + // create Descriptor Set + auto pool = device->createDescriptorPoolForDSLayouts(IDescriptorPool::ECF_UPDATE_AFTER_BIND_BIT,{&dsLayout.get(),1}); + auto ds = pool->createDescriptorSet(std::move(dsLayout)); + if (!ds) + { + logger->log("Could not descriptor set!",ILogger::ELL_ERROR); + return nullptr; + } + init.subAllocDS = make_smart_refctd_ptr(std::move(ds)); + } + + // create pipeline layout + const SPushConstantRange ranges[] = {{ + .stageFlags = hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT, + .offset = 0, + .size = sizeof(SInstance::SPushConstants), + }}; + init.layout = device->createPipelineLayout(ranges,smart_refctd_ptr(init.subAllocDS->getDescriptorSet()->getLayout())); + + // create pipelines + using pipeline_e = SInitParams::PipelineType; + { + IGPUGraphicsPipeline::SCreationParams params[pipeline_e::Count] = {}; + params[pipeline_e::BasicTriangleList].vertexShader = {.shader=shader.get(),.entryPoint="BasicVS"}; + params[pipeline_e::BasicTriangleList].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; + params[pipeline_e::BasicTriangleFan].vertexShader = {.shader=shader.get(),.entryPoint="BasicVS"}; + params[pipeline_e::BasicTriangleFan].fragmentShader = {.shader=shader.get(),.entryPoint="BasicFS"}; + params[pipeline_e::Cone].vertexShader = {.shader=shader.get(),.entryPoint="ConeVS"}; + params[pipeline_e::Cone].fragmentShader = {.shader=shader.get(),.entryPoint="ConeFS"}; + for (auto i=0; i(i); + switch (type) + { + case pipeline_e::BasicTriangleFan: + primitiveAssembly.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_FAN; + break; + default: + primitiveAssembly.primitiveType = E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_LIST; + break; + } + primitiveAssembly.primitiveRestartEnable = false; + primitiveAssembly.tessPatchVertCount = 3; + rasterization.faceCullingMode = EFCM_NONE; + params[i].cached.subpassIx = subpassIX; + params[i].renderpass = renderpass; + } + if (!device->createGraphicsPipelines(nullptr,params,init.pipelines)) + { + logger->log("Could not create Graphics Pipelines!",ILogger::ELL_ERROR); + return nullptr; + } + } + + return smart_refctd_ptr(new CSimpleDebugRenderer(std::move(init)),dont_grab); + } + + // + static inline core::smart_refctd_ptr create(asset::IAssetManager* assMan, video::IGPURenderpass* renderpass, const uint32_t subpassIX, const std::span geometries) + { + auto retval = create(assMan,renderpass,subpassIX); + if (retval) + retval->addGeometries(geometries); + return retval; + } + + // + struct SInitParams + { + enum PipelineType : uint8_t + { + BasicTriangleList, + BasicTriangleFan, + Cone, // special case + Count + }; + + core::smart_refctd_ptr subAllocDS; + core::smart_refctd_ptr layout; + core::smart_refctd_ptr pipelines[PipelineType::Count]; + }; + inline const SInitParams& getInitParams() const {return m_params;} + + // + inline bool addGeometries(const std::span geometries) + { + EXPOSE_NABLA_NAMESPACES; + if (geometries.empty()) + return false; + auto device = const_cast(m_params.layout->getOriginDevice()); + + core::vector writes; + core::vector infos; + bool anyFailed = false; + auto allocateUTB = [&](const IGeometry::SDataView& view)->decltype(SubAllocatedDescriptorSet::invalid_value) + { + if (!view) + return SPackedGeometry::MissingView; + auto index = SubAllocatedDescriptorSet::invalid_value; + if (m_params.subAllocDS->multi_allocate(VertexAttrubUTBDescBinding,1,&index)!=0) + { + anyFailed = true; + return SPackedGeometry::MissingView; + } + const auto infosOffset = infos.size(); + infos.emplace_back().desc = device->createBufferView(view.src,view.composed.format); + writes.emplace_back() = { + .dstSet = m_params.subAllocDS->getDescriptorSet(), + .binding = VertexAttrubUTBDescBinding, + .arrayElement = index, + .count = 1, + .info = reinterpret_cast(infosOffset) + }; + return index; + }; + if (anyFailed) + device->getLogger()->log("Failed to allocate a UTB for some geometries, probably ran out of space in Descriptor Set!",system::ILogger::ELL_ERROR); + + auto sizeToSet = m_geoms.size(); + auto resetGeoms = core::makeRAIIExiter([&]()->void + { + for (auto& write : writes) + immediateDealloc(write.arrayElement); + m_geoms.resize(sizeToSet); + } + ); + for (const auto geom : geometries) + { + // could also check device origin on all buffers + if (!geom->valid()) + return false; + auto& out = m_geoms.emplace_back(); + using pipeline_e = SInitParams::PipelineType; + switch (geom->getIndexingCallback()->knownTopology()) + { + case E_PRIMITIVE_TOPOLOGY::EPT_TRIANGLE_FAN: + out.pipeline = m_params.pipelines[pipeline_e::BasicTriangleFan]; + break; + default: + out.pipeline = m_params.pipelines[pipeline_e::BasicTriangleList]; + break; + } + if (const auto& view=geom->getIndexView(); view) + { + out.indexBuffer.offset = view.src.offset; + out.indexBuffer.buffer = view.src.buffer; + switch (view.composed.format) + { + case E_FORMAT::EF_R16_UINT: + out.indexType = EIT_16BIT; + break; + case E_FORMAT::EF_R32_UINT: + out.indexType = EIT_32BIT; + break; + default: + return false; + } + } + out.elementCount = geom->getVertexReferenceCount(); + out.positionView = allocateUTB(geom->getPositionView()); + out.normalView = allocateUTB(geom->getNormalView()); + } + + // no geometry + if (infos.empty()) + return false; + + // unbase our pointers + for (auto& write : writes) + write.info = infos.data()+reinterpret_cast(write.info); + if (!device->updateDescriptorSets(writes,{})) + return false; + + // retain + writes.clear(); + sizeToSet = m_geoms.size(); + return true; + } + + // + inline void removeGeometry(const uint32_t ix, const video::ISemaphore::SWaitInfo& info) + { + EXPOSE_NABLA_NAMESPACES; + if (ix>=m_geoms.size()) + return; + + core::vector deferredFree; + deferredFree.reserve(3); + auto deallocate = [&](SubAllocatedDescriptorSet::value_type index)->void + { + if (index>=SPackedGeometry::MissingView) + return; + if (info.semaphore) + deferredFree.push_back(index); + else + immediateDealloc(index); + }; + auto geo = m_geoms.begin() + ix; + deallocate(geo->positionView); + deallocate(geo->normalView); + m_geoms.erase(geo); + + if (deferredFree.empty()) + return; + m_params.subAllocDS->multi_deallocate(VertexAttrubUTBDescBinding,deferredFree.size(),deferredFree.data(),info); + } + + // + inline void clearGeometries(const video::ISemaphore::SWaitInfo& info) + { + // back to front to avoid O(n^2) resize + while (!m_geoms.empty()) + removeGeometry(m_geoms.size()-1,info); + } + + // + inline const auto& getGeometries() const {return m_geoms;} + inline auto& getGeometry(const uint32_t ix) {return m_geoms[ix];} + + // + inline void render(video::IGPUCommandBuffer* cmdbuf, const SViewParams& viewParams) const + { + EXPOSE_NABLA_NAMESPACES; + + cmdbuf->beginDebugMarker("CSimpleDebugRenderer::render"); + + const auto* layout = m_params.layout.get(); + const auto ds = m_params.subAllocDS->getDescriptorSet(); + cmdbuf->bindDescriptorSets(E_PIPELINE_BIND_POINT::EPBP_GRAPHICS,layout,0,1,&ds); + + for (const auto& instance : m_instances) + { + const auto* geo = instance.packedGeo; + cmdbuf->bindGraphicsPipeline(geo->pipeline.get()); + const auto pc = instance.computePushConstants(viewParams); + cmdbuf->pushConstants(layout,hlsl::ShaderStage::ESS_VERTEX|hlsl::ShaderStage::ESS_FRAGMENT,0,sizeof(pc),&pc); + if (geo->indexBuffer) + { + cmdbuf->bindIndexBuffer(geo->indexBuffer,geo->indexType); + cmdbuf->drawIndexed(geo->elementCount,1,0,0,0); + } + else + cmdbuf->draw(geo->elementCount,1,0,0); + } + cmdbuf->endDebugMarker(); + } + + core::vector m_instances; + + protected: + inline CSimpleDebugRenderer(SInitParams&& _params) : m_params(std::move(_params)) {} + inline ~CSimpleDebugRenderer() + { + // clean shutdown, can also make SubAllocatedDescriptorSet resillient against that, and issue `device->waitIdle` if not everything is freed + const_cast(m_params.layout->getOriginDevice())->waitIdle(); + clearGeometries({}); + } + + inline void immediateDealloc(video::SubAllocatedDescriptorSet::value_type index) + { + video::IGPUDescriptorSet::SDropDescriptorSet dummy[1]; + m_params.subAllocDS->multi_deallocate(dummy,VertexAttrubUTBDescBinding,1,&index); + } + + SInitParams m_params; + core::vector m_geoms; +#undef EXPOSE_NABLA_NAMESPACES +}; + +} +#endif \ No newline at end of file diff --git a/common/include/nbl/examples/geometry/SPushConstants.hlsl b/common/include/nbl/examples/geometry/SPushConstants.hlsl new file mode 100644 index 000000000..74cbfd565 --- /dev/null +++ b/common/include/nbl/examples/geometry/SPushConstants.hlsl @@ -0,0 +1,40 @@ +#ifndef _NBL_EXAMPLES_S_PUSH_CONSTANTS_HLSL_ +#define _NBL_EXAMPLES_S_PUSH_CONSTANTS_HLSL_ + + +#include "nbl/builtin/hlsl/cpp_compat.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace examples +{ +namespace geometry_creator_scene +{ + +struct SInstanceMatrices +{ + float32_t4x4 worldViewProj; + float32_t3x3 normal; +}; + +struct SPushConstants +{ + NBL_CONSTEXPR_STATIC_INLINE uint32_t DescriptorCount = (0x1<<16)-1; + + SInstanceMatrices matrices; + uint32_t positionView : 16; + uint32_t normalView : 16; +}; + +} +} +} +} +#endif + +/* + do not remove this text, WAVE is so bad that you can get errors if no proper ending xD +*/ \ No newline at end of file diff --git a/common/include/nbl/examples/workgroup/DataAccessors.hlsl b/common/include/nbl/examples/workgroup/DataAccessors.hlsl new file mode 100644 index 000000000..ca5915f2c --- /dev/null +++ b/common/include/nbl/examples/workgroup/DataAccessors.hlsl @@ -0,0 +1,131 @@ +#ifndef _NBL_EXAMPLES_WORKGROUP_DATA_ACCESSORS_HLSL_ +#define _NBL_EXAMPLES_WORKGROUP_DATA_ACCESSORS_HLSL_ + + +#include "nbl/builtin/hlsl/bda/legacy_bda_accessor.hlsl" + + +namespace nbl +{ +namespace hlsl +{ +namespace examples +{ +namespace workgroup +{ + +struct ScratchProxy +{ + template + void get(const uint32_t ix, NBL_REF_ARG(AccessType) value) + { + value = scratch[ix]; + } + template + void set(const uint32_t ix, const AccessType value) + { + scratch[ix] = value; + } + + uint32_t atomicOr(const uint32_t ix, const uint32_t value) + { + return glsl::atomicOr(scratch[ix],value); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + //glsl::memoryBarrierShared(); implied by the above + } +}; + +template +struct DataProxy +{ + using dtype_t = vector; + // function template AccessType should be the same as dtype_t + + static DataProxy create(const uint64_t inputBuf, const uint64_t outputBuf) + { + DataProxy retval; + const uint32_t workgroupOffset = glsl::gl_WorkGroupID().x * VirtualWorkgroupSize * sizeof(dtype_t); + retval.accessor = DoubleLegacyBdaAccessor::create(inputBuf + workgroupOffset, outputBuf + workgroupOffset); + return retval; + } + + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + accessor.get(ix, value); + } + template + void set(const IndexType ix, const AccessType value) + { + accessor.set(ix, value); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + //glsl::memoryBarrierShared(); implied by the above + } + + DoubleLegacyBdaAccessor accessor; +}; + +template +struct PreloadedDataProxy +{ + using dtype_t = vector; + + NBL_CONSTEXPR_STATIC_INLINE uint16_t WorkgroupSize = uint16_t(1u) << WorkgroupSizeLog2; + NBL_CONSTEXPR_STATIC_INLINE uint16_t PreloadedDataCount = VirtualWorkgroupSize / WorkgroupSize; + + static PreloadedDataProxy create(const uint64_t inputBuf, const uint64_t outputBuf) + { + PreloadedDataProxy retval; + retval.data = DataProxy::create(inputBuf, outputBuf); + return retval; + } + + template + void get(const IndexType ix, NBL_REF_ARG(AccessType) value) + { + value = preloaded[ix>>WorkgroupSizeLog2]; + } + template + void set(const IndexType ix, const AccessType value) + { + preloaded[ix>>WorkgroupSizeLog2] = value; + } + + void preload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data.template get(idx * WorkgroupSize + invocationIndex, preloaded[idx]); + } + void unload() + { + const uint16_t invocationIndex = hlsl::workgroup::SubgroupContiguousIndex(); + [unroll] + for (uint16_t idx = 0; idx < PreloadedDataCount; idx++) + data.template set(idx * WorkgroupSize + invocationIndex, preloaded[idx]); + } + + void workgroupExecutionAndMemoryBarrier() + { + glsl::barrier(); + //glsl::memoryBarrierShared(); implied by the above + } + + DataProxy data; + dtype_t preloaded[PreloadedDataCount]; +}; + +} +} +} +} +#endif diff --git a/common/src/CMakeLists.txt b/common/src/CMakeLists.txt deleted file mode 100644 index 1399b949e..000000000 --- a/common/src/CMakeLists.txt +++ /dev/null @@ -1,14 +0,0 @@ -# we add common libraries -# add_subdirectory(camera EXCLUDE_FROM_ALL) # header only currently -add_subdirectory(geometry EXCLUDE_FROM_ALL) - -# we get all available targets inclusive & below this directory -NBL_GET_ALL_TARGETS(NBL_SUBDIRECTORY_TARGETS) - -# then we expose common include search directories to all common libraries + create link interface -foreach(NBL_TARGET IN LISTS NBL_SUBDIRECTORY_TARGETS) - target_include_directories(${NBL_TARGET} PUBLIC $) - target_link_libraries(nblCommonAPI INTERFACE ${NBL_TARGET}) -endforeach() - -set(NBL_COMMON_API_TARGETS ${NBL_SUBDIRECTORY_TARGETS} PARENT_SCOPE) \ No newline at end of file diff --git a/common/src/camera/CMakeLists.txt b/common/src/camera/CMakeLists.txt deleted file mode 100644 index eedf690aa..000000000 --- a/common/src/camera/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -# header only currently - -#set(NBL_LIB_SOURCES -# "${CMAKE_CURRENT_SOURCE_DIR}/*.cpp" -#) - -#nbl_create_ext_library_project(Camera "" "${NBL_LIB_SOURCES}" "" "" "") \ No newline at end of file diff --git a/common/src/empty.cpp b/common/src/empty.cpp deleted file mode 100644 index e69de29bb..000000000 diff --git a/common/src/geometry/CMakeLists.txt b/common/src/geometry/CMakeLists.txt deleted file mode 100644 index fb33ec637..000000000 --- a/common/src/geometry/CMakeLists.txt +++ /dev/null @@ -1 +0,0 @@ -add_subdirectory(creator EXCLUDE_FROM_ALL) \ No newline at end of file diff --git a/common/src/geometry/creator/CMakeLists.txt b/common/src/geometry/creator/CMakeLists.txt deleted file mode 100644 index 336d32fe5..000000000 --- a/common/src/geometry/creator/CMakeLists.txt +++ /dev/null @@ -1,69 +0,0 @@ -# shaders IO directories -set(NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/shaders") -get_filename_component(_THIS_EXAMPLE_SPIRV_BR_BUNDLE_SEARCH_DIRECTORY_ "${CMAKE_CURRENT_BINARY_DIR}/shaders/include" ABSOLUTE) -get_filename_component(_THIS_EXAMPLE_SPIRV_BR_OUTPUT_DIRECTORY_HEADER_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/include" ABSOLUTE) -get_filename_component(_THIS_EXAMPLE_SPIRV_BR_OUTPUT_DIRECTORY_SOURCE_ "${CMAKE_CURRENT_BINARY_DIR}/builtin/src" ABSOLUTE) -set(NBL_THIS_EXAMPLE_OUTPUT_SPIRV_DIRECTORY "${_THIS_EXAMPLE_SPIRV_BR_BUNDLE_SEARCH_DIRECTORY_}/nbl/geometryCreator/spirv") - -# list of input source shaders -set(NBL_THIS_EXAMPLE_INPUT_SHADERS - # geometry creator - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/gc.basic.fragment.hlsl" - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/gc.basic.vertex.hlsl" - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/gc.cone.vertex.hlsl" - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/gc.ico.vertex.hlsl" - - # grid - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/grid.vertex.hlsl" - "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/grid.fragment.hlsl" -) - -file(GLOB_RECURSE NBL_THIS_EXAMPLE_INPUT_COMMONS CONFIGURE_DEPENDS "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}/template/*.hlsl") - -include("${NBL_ROOT_PATH}/src/nbl/builtin/utils.cmake") - -foreach(NBL_INPUT_SHADER IN LISTS NBL_THIS_EXAMPLE_INPUT_SHADERS) - cmake_path(GET NBL_INPUT_SHADER FILENAME NBL_INPUT_SHADER_FILENAME) - cmake_path(GET NBL_INPUT_SHADER_FILENAME STEM LAST_ONLY NBL_SHADER_STEM) # filename without .hlsl extension - cmake_path(GET NBL_SHADER_STEM EXTENSION LAST_ONLY NBL_SHADER_TYPE) # . - - set(NBL_OUTPUT_SPIRV_FILENAME "${NBL_SHADER_STEM}.spv") - set(NBL_OUTPUT_SPIRV_PATH "${NBL_THIS_EXAMPLE_OUTPUT_SPIRV_DIRECTORY}/${NBL_OUTPUT_SPIRV_FILENAME}") - - if(NBL_SHADER_TYPE STREQUAL .vertex) - set(NBL_NSC_COMPILE_OPTIONS -T vs_6_7 -E VSMain) - elseif(NBL_SHADER_TYPE STREQUAL .geometry) - set(NBL_NSC_COMPILE_OPTIONS -T gs_6_7 -E GSMain) - elseif(NBL_SHADER_TYPE STREQUAL .fragment) - set(NBL_NSC_COMPILE_OPTIONS -T ps_6_7 -E PSMain) - else() - message(FATAL_ERROR "Input shader is supposed to be ..hlsl!") - endif() - - set(NBL_NSC_COMPILE_COMMAND - "$" - -Fc "${NBL_OUTPUT_SPIRV_PATH}" - -I "${NBL_COMMON_API_INCLUDE_DIRECTORY}" - ${NBL_NSC_COMPILE_OPTIONS} # this should come from shader's [#pragma WAVE ] but our NSC doesn't seem to work properly currently - "${NBL_INPUT_SHADER}" - ) - - set(NBL_DEPENDS - "${NBL_INPUT_SHADER}" - ${NBL_THIS_EXAMPLE_INPUT_COMMONS} - ) - - add_custom_command(OUTPUT "${NBL_OUTPUT_SPIRV_PATH}" - COMMAND ${NBL_NSC_COMPILE_COMMAND} - DEPENDS ${NBL_DEPENDS} - WORKING_DIRECTORY "${NBL_THIS_EXAMPLE_INPUT_SHADERS_DIRECTORY}" - COMMENT "Generating \"${NBL_OUTPUT_SPIRV_PATH}\"" - VERBATIM - COMMAND_EXPAND_LISTS - ) - - list(APPEND NBL_THIS_EXAMPLE_OUTPUT_SPIRV_BUILTINS "${NBL_OUTPUT_SPIRV_PATH}") - LIST_BUILTIN_RESOURCE(GEOMETRY_CREATOR_SPIRV_RESOURCES_TO_EMBED "geometryCreator/spirv/${NBL_OUTPUT_SPIRV_FILENAME}") -endforeach() - -ADD_CUSTOM_BUILTIN_RESOURCES(geometryCreatorSpirvBRD GEOMETRY_CREATOR_SPIRV_RESOURCES_TO_EMBED "${_THIS_EXAMPLE_SPIRV_BR_BUNDLE_SEARCH_DIRECTORY_}" "nbl" "geometry::creator::spirv::builtin" "${_THIS_EXAMPLE_SPIRV_BR_OUTPUT_DIRECTORY_HEADER_}" "${_THIS_EXAMPLE_SPIRV_BR_OUTPUT_DIRECTORY_SOURCE_}" "STATIC" "INTERNAL") \ No newline at end of file diff --git a/common/src/geometry/creator/shaders/gc.basic.fragment.hlsl b/common/src/geometry/creator/shaders/gc.basic.fragment.hlsl deleted file mode 100644 index 3dc9b9f1d..000000000 --- a/common/src/geometry/creator/shaders/gc.basic.fragment.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -#include "template/gc.common.hlsl" - -float4 PSMain(PSInput input) : SV_Target0 -{ - return input.color; -} \ No newline at end of file diff --git a/common/src/geometry/creator/shaders/gc.basic.vertex.hlsl b/common/src/geometry/creator/shaders/gc.basic.vertex.hlsl deleted file mode 100644 index 1afd468d9..000000000 --- a/common/src/geometry/creator/shaders/gc.basic.vertex.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -#include "template/gc.basic.vertex.input.hlsl" -#include "template/gc.vertex.hlsl" - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/gc.cone.vertex.hlsl b/common/src/geometry/creator/shaders/gc.cone.vertex.hlsl deleted file mode 100644 index ee0c42431..000000000 --- a/common/src/geometry/creator/shaders/gc.cone.vertex.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -#include "template/gc.cone.vertex.input.hlsl" -#include "template/gc.vertex.hlsl" - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/gc.ico.vertex.hlsl b/common/src/geometry/creator/shaders/gc.ico.vertex.hlsl deleted file mode 100644 index d63fdc809..000000000 --- a/common/src/geometry/creator/shaders/gc.ico.vertex.hlsl +++ /dev/null @@ -1,6 +0,0 @@ -#include "template/gc.ico.vertex.input.hlsl" -#include "template/gc.vertex.hlsl" - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/grid.fragment.hlsl b/common/src/geometry/creator/shaders/grid.fragment.hlsl deleted file mode 100644 index 4b4c1e691..000000000 --- a/common/src/geometry/creator/shaders/grid.fragment.hlsl +++ /dev/null @@ -1,12 +0,0 @@ -#include "template/grid.common.hlsl" - -float4 PSMain(PSInput input) : SV_Target0 -{ - float2 uv = (input.uv - float2(0.5, 0.5)) + 0.5 / 30.0; - float grid = gridTextureGradBox(uv, ddx(input.uv), ddy(input.uv)); - float4 fragColor = float4(1.0 - grid, 1.0 - grid, 1.0 - grid, 1.0); - fragColor *= 0.25; - fragColor *= 0.3 + 0.6 * smoothstep(0.0, 0.1, 1.0 - length(input.uv) / 5.5); - - return fragColor; -} \ No newline at end of file diff --git a/common/src/geometry/creator/shaders/grid.vertex.hlsl b/common/src/geometry/creator/shaders/grid.vertex.hlsl deleted file mode 100644 index 167b981d3..000000000 --- a/common/src/geometry/creator/shaders/grid.vertex.hlsl +++ /dev/null @@ -1,17 +0,0 @@ -#include "template/grid.common.hlsl" - -// set 1, binding 0 -[[vk::binding(0, 1)]] -cbuffer CameraData -{ - SBasicViewParameters params; -}; - -PSInput VSMain(VSInput input) -{ - PSInput output; - output.position = mul(params.MVP, float4(input.position, 1.0)); - output.uv = (input.uv - float2(0.5, 0.5)) * abs(input.position.xy); - - return output; -} \ No newline at end of file diff --git a/common/src/geometry/creator/shaders/template/gc.basic.vertex.input.hlsl b/common/src/geometry/creator/shaders/template/gc.basic.vertex.input.hlsl deleted file mode 100644 index d9e2fa172..000000000 --- a/common/src/geometry/creator/shaders/template/gc.basic.vertex.input.hlsl +++ /dev/null @@ -1,16 +0,0 @@ -#ifndef _THIS_EXAMPLE_GC_BASIC_VERTEX_INPUT_HLSL_ -#define _THIS_EXAMPLE_GC_BASIC_VERTEX_INPUT_HLSL_ - -struct VSInput -{ - [[vk::location(0)]] float3 position : POSITION; - [[vk::location(1)]] float4 color : COLOR; - [[vk::location(2)]] float2 uv : TEXCOORD; - [[vk::location(3)]] float3 normal : NORMAL; -}; - -#endif // _THIS_EXAMPLE_GC_BASIC_VERTEX_INPUT_HLSL_ - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/template/gc.common.hlsl b/common/src/geometry/creator/shaders/template/gc.common.hlsl deleted file mode 100644 index 4590cd4a3..000000000 --- a/common/src/geometry/creator/shaders/template/gc.common.hlsl +++ /dev/null @@ -1,18 +0,0 @@ -#ifndef _THIS_EXAMPLE_GC_COMMON_HLSL_ -#define _THIS_EXAMPLE_GC_COMMON_HLSL_ - -#ifdef __HLSL_VERSION - struct PSInput - { - float4 position : SV_Position; - float4 color : COLOR0; - }; -#endif // __HLSL_VERSION - -#include "SBasicViewParameters.hlsl" - -#endif // _THIS_EXAMPLE_GC_COMMON_HLSL_ - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ \ No newline at end of file diff --git a/common/src/geometry/creator/shaders/template/gc.cone.vertex.input.hlsl b/common/src/geometry/creator/shaders/template/gc.cone.vertex.input.hlsl deleted file mode 100644 index 66221fef1..000000000 --- a/common/src/geometry/creator/shaders/template/gc.cone.vertex.input.hlsl +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _THIS_EXAMPLE_GC_CONE_VERTEX_INPUT_HLSL_ -#define _THIS_EXAMPLE_GC_CONE_VERTEX_INPUT_HLSL_ - -struct VSInput -{ - [[vk::location(0)]] float3 position : POSITION; - [[vk::location(1)]] float4 color : COLOR; - [[vk::location(2)]] float3 normal : NORMAL; -}; - -#endif // _THIS_EXAMPLE_GC_CONE_VERTEX_INPUT_HLSL_ - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/template/gc.ico.vertex.input.hlsl b/common/src/geometry/creator/shaders/template/gc.ico.vertex.input.hlsl deleted file mode 100644 index 6b85486d9..000000000 --- a/common/src/geometry/creator/shaders/template/gc.ico.vertex.input.hlsl +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef _THIS_EXAMPLE_GC_ICO_VERTEX_INPUT_HLSL_ -#define _THIS_EXAMPLE_GC_ICO_VERTEX_INPUT_HLSL_ - -struct VSInput -{ - [[vk::location(0)]] float3 position : POSITION; - [[vk::location(1)]] float3 normal : NORMAL; - [[vk::location(2)]] float2 uv : TEXCOORD; -}; - -#endif // _THIS_EXAMPLE_GC_ICO_VERTEX_INPUT_HLSL_ - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/template/gc.vertex.hlsl b/common/src/geometry/creator/shaders/template/gc.vertex.hlsl deleted file mode 100644 index 5a8f26722..000000000 --- a/common/src/geometry/creator/shaders/template/gc.vertex.hlsl +++ /dev/null @@ -1,22 +0,0 @@ -#include "gc.common.hlsl" - -// set 1, binding 0 -[[vk::binding(0, 1)]] -cbuffer CameraData -{ - SBasicViewParameters params; -}; - -PSInput VSMain(VSInput input) -{ - PSInput output; - - output.position = mul(params.MVP, float4(input.position, 1.0)); - output.color = float4(input.normal * 0.5 + 0.5, 1.0); - - return output; -} - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ diff --git a/common/src/geometry/creator/shaders/template/grid.common.hlsl b/common/src/geometry/creator/shaders/template/grid.common.hlsl deleted file mode 100644 index bc6516600..000000000 --- a/common/src/geometry/creator/shaders/template/grid.common.hlsl +++ /dev/null @@ -1,40 +0,0 @@ -#ifndef _THIS_EXAMPLE_GRID_COMMON_HLSL_ -#define _THIS_EXAMPLE_GRID_COMMON_HLSL_ - -#ifdef __HLSL_VERSION - struct VSInput - { - [[vk::location(0)]] float3 position : POSITION; - [[vk::location(1)]] float4 color : COLOR; - [[vk::location(2)]] float2 uv : TEXCOORD; - [[vk::location(3)]] float3 normal : NORMAL; - }; - - struct PSInput - { - float4 position : SV_Position; - float2 uv : TEXCOORD0; - }; - - float gridTextureGradBox(float2 p, float2 ddx, float2 ddy) - { - float N = 30.0; // grid ratio - float2 w = max(abs(ddx), abs(ddy)) + 0.01; // filter kernel - - // analytic (box) filtering - float2 a = p + 0.5 * w; - float2 b = p - 0.5 * w; - float2 i = (floor(a) + min(frac(a) * N, 1.0) - floor(b) - min(frac(b) * N, 1.0)) / (N * w); - - // pattern - return (1.0 - i.x) * (1.0 - i.y); - } -#endif // __HLSL_VERSION - -#include "SBasicViewParameters.hlsl" - -#endif // _THIS_EXAMPLE_GRID_COMMON_HLSL_ - -/* - do not remove this text, WAVE is so bad that you can get errors if no proper ending xD -*/ \ No newline at end of file diff --git a/common/src/nbl/examples/CMakeLists.txt b/common/src/nbl/examples/CMakeLists.txt new file mode 100644 index 000000000..e486b2b22 --- /dev/null +++ b/common/src/nbl/examples/CMakeLists.txt @@ -0,0 +1,89 @@ +set(OUTPUT_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/auto-gen") +set(ARGS + # meta INTERFACE target with NSC compilation rules + TARGET NblExtExamplesAPISPIRV + + # build directory for its SPIRV outputs + BINARY_DIR ${OUTPUT_DIRECTORY} + + # preprocessor #define for BINARY_DIR bind point + MOUNT_POINT_DEFINE NBL_EXAMPLES_BUILD_MOUNT_POINT + + # extra NSC compile options + COMMON_OPTIONS -I "${COMMON_INCLUDE_DIRECTORY}" + + # out variable to which SPIRV access keys are appended to (including permutations), relative to BINARY_DIR + OUTPUT_VAR KEYS + + # include file with inline template key getters, use with #include directive on downstream targets + INCLUDE nbl/examples/common/build/spirv/keys.hpp + + # namespace for key getters in the include file + NAMESPACE nbl::builtin::examples::build +) + +# note json is array of objects, you can register all rules at once +set(JSON [=[ +[ + { + "INPUT": "shaders/geometry/unified.hlsl", + "KEY": "shaders/geometry/unified", + "COMPILE_OPTIONS": ["-T", "lib_6_6"], + "DEPENDS": [], + "CAPS": [] + } +] +]=]) + +NBL_CREATE_NSC_COMPILE_RULES(${ARGS} INPUTS ${JSON}) + +set(JSON [=[ +[ + { + "INPUT": "shaders/geometry/unified.hlsl", + "KEY": "shaders/geometry/unified-caps", + "COMPILE_OPTIONS": ["-T", "lib_6_6"], + "DEPENDS": [], + "CAPS": [ + { + "name": "shaderFloat64", + "type": "bool", + "values": [1, 0] + }, + { + "name": "subgroupSize", + "type": "uint16_t", + "values": [32, 64] + } + ] + }, + { + "INPUT": "shaders/geometry/unified.hlsl", + "KEY": "shaders/geometry/unified-caps-2", + "COMPILE_OPTIONS": ["-T", "lib_6_6"], + "DEPENDS": [], + "CAPS": [ + { + "name": "shaderFloat64", + "type": "bool", + "values": [1, 0] + } + ] + } +] +]=]) + +# it also supports incremental rule updates, uncomment to add rules with permutation caps (testing purposes, remove after review) +# NBL_CREATE_NSC_COMPILE_RULES(${ARGS} INPUTS ${JSON}) + +# note we can add more inputs from build dir which keys can be part of the same archive/mount point, +# ex. one could auto generate bc texture or whatever and add here like +# file(WRITE "${OUTPUT_DIRECTORY}/dummy.txt" "dummy, test") +# list(APPEND KEYS dummy.txt) + +NBL_CREATE_RESOURCE_ARCHIVE( + TARGET NblExtExamplesAPIBuiltinsBuild + BIND "${OUTPUT_DIRECTORY}" + BUILTINS ${KEYS} + NAMESPACE nbl::builtin::examples::build +) diff --git a/common/src/nbl/examples/pch.cpp b/common/src/nbl/examples/pch.cpp new file mode 100644 index 000000000..39a146f1d --- /dev/null +++ b/common/src/nbl/examples/pch.cpp @@ -0,0 +1 @@ +#include "nbl/examples/PCH.hpp" \ No newline at end of file diff --git a/common/src/nbl/examples/shaders/geometry/unified.hlsl b/common/src/nbl/examples/shaders/geometry/unified.hlsl new file mode 100644 index 000000000..07bdbbd5e --- /dev/null +++ b/common/src/nbl/examples/shaders/geometry/unified.hlsl @@ -0,0 +1,66 @@ +// +#include "nbl/examples/geometry/SPushConstants.hlsl" +using namespace nbl::hlsl; +using namespace nbl::hlsl::examples::geometry_creator_scene; + +// for dat sweet programmable pulling +[[vk::binding(0)]] Buffer utbs[SPushConstants::DescriptorCount]; + +// +[[vk::push_constant]] SPushConstants pc; + +// +struct SInterpolants +{ + float32_t4 ndc : SV_Position; + float32_t3 meta : COLOR1; +}; +#include "nbl/builtin/hlsl/math/linalg/fast_affine.hlsl" + +float32_t3 reconstructGeometricNormal(float32_t3 pos) +{ + const float32_t2x3 dPos_dScreen = float32_t2x3( + ddx(pos), + ddy(pos) + ); + return cross(dPos_dScreen[0],dPos_dScreen[1]); +} + +// +[shader("vertex")] +SInterpolants BasicVS(uint32_t VertexIndex : SV_VertexID) +{ + const float32_t3 position = utbs[pc.positionView][VertexIndex].xyz; + + SInterpolants output; + output.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj,position); + if (pc.normalView(0.5f),1.f); +} + +// TODO: do smooth normals on the cone +[shader("vertex")] +SInterpolants ConeVS(uint32_t VertexIndex : SV_VertexID) +{ + const float32_t3 position = utbs[pc.positionView][VertexIndex].xyz; + + SInterpolants output; + output.ndc = math::linalg::promoted_mul(pc.matrices.worldViewProj,position); + output.meta = mul(inverse(transpose(pc.matrices.normal)),position); + return output; +} +[shader("pixel")] +float32_t4 ConeFS(SInterpolants input) : SV_Target0 +{ + const float32_t3 normal = reconstructGeometricNormal(input.meta); + return float32_t4(normalize(normal)*0.5f+promote(0.5f),1.f); +} \ No newline at end of file diff --git a/media b/media index a98646358..c24f4e139 160000 --- a/media +++ b/media @@ -1 +1 @@ -Subproject commit a9864635879e5a616ac400eecd8b6451b498fbf1 +Subproject commit c24f4e13901554abc9fdf87081108cc7dca1db57 diff --git a/old_to_refactor/03_GPU_Mesh/CMakeLists.txt b/old_to_refactor/03_GPU_Mesh/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/old_to_refactor/03_GPU_Mesh/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/old_to_refactor/03_GPU_Mesh/main.cpp b/old_to_refactor/03_GPU_Mesh/main.cpp deleted file mode 100644 index cc871bb9f..000000000 --- a/old_to_refactor/03_GPU_Mesh/main.cpp +++ /dev/null @@ -1,244 +0,0 @@ - -#include "CCamera.hpp" - - -#include "nbl/nblpack.h" -struct VertexStruct -{ - /// every member needs to be at location aligned to its type size for GLSL - float Pos[3]; /// uses float hence need 4 byte alignment - uint8_t Col[2]; /// same logic needs 1 byte alignment - uint8_t uselessPadding[2]; /// so if there is a member with 4 byte alignment then whole struct needs 4 byte align, so pad it -} PACK_STRUCT; -#include "nbl/nblunpack.h" - -const char* vertexSource = R"===( -#version 430 core - -layout(location = 0) in vec4 vPos; //only a 3d position is passed from Nabla, but last (the W) coordinate gets filled with default 1.0 -layout(location = 1) in vec4 vCol; - -layout( push_constant, row_major ) uniform Block { - mat4 modelViewProj; -} PushConstants; - -layout(location = 0) out vec4 Color; //per vertex output color, will be interpolated across the triangle - -void main() -{ - gl_Position = PushConstants.modelViewProj*vPos; //only thing preventing the shader from being core-compliant - Color = vCol; -} -)==="; - -const char* fragmentSource = R"===( -#version 430 core - -layout(location = 0) in vec4 Color; //per vertex output color, will be interpolated across the triangle - -layout(location = 0) out vec4 pixelColor; - -void main() -{ - pixelColor = Color; -} -)==="; - -class GPUMesh : public ApplicationBase -{ - -public: - - nbl::core::smart_refctd_ptr gpuTransferFence; - nbl::core::smart_refctd_ptr gpuComputeFence; - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - Camera camera = Camera(vectorSIMDf(0, 0, 0), vectorSIMDf(0, 0, 0), matrix4SIMD()); - - int resourceIx = -1; - uint32_t acquiredNextFBO = {}; - std::chrono::system_clock::time_point lastTime; - bool frameDataFilled = false; - size_t frame_count = 0ull; - double time_sum = 0; - double dtList[NBL_FRAMES_TO_AVERAGE] = {}; - - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr commandBuffers[FRAMES_IN_FLIGHT]; - - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - - - - - void onAppInitialized_impl() override - { - - for (size_t i = 0ull; i < NBL_FRAMES_TO_AVERAGE; ++i) - dtList[i] = 0.0; - - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, 0.1, 1000); - camera = Camera(core::vectorSIMDf(-4, 0, 0), core::vectorSIMDf(0, 0, 0), projectionMatrix); - } - - void workLoopBody() override - { - - auto renderStart = std::chrono::system_clock::now(); - const auto renderDt = std::chrono::duration_cast(renderStart - lastTime).count(); - lastTime = renderStart; - { // Calculate Simple Moving Average for FrameTime - time_sum -= dtList[frame_count]; - time_sum += renderDt; - dtList[frame_count] = renderDt; - frame_count++; - if (frame_count >= NBL_FRAMES_TO_AVERAGE) - { - frameDataFilled = true; - frame_count = 0; - } - - } - const double averageFrameTime = frameDataFilled ? (time_sum / (double)NBL_FRAMES_TO_AVERAGE) : (time_sum / frame_count); - -#ifdef NBL_MORE_LOGS - logger->log("renderDt = %f ------ averageFrameTime = %f", system::ILogger::ELL_INFO, renderDt, averageFrameTime); -#endif // NBL_MORE_LOGS - - auto averageFrameTimeDuration = std::chrono::duration(averageFrameTime); - auto nextPresentationTime = renderStart + averageFrameTimeDuration; - auto nextPresentationTimeStamp = std::chrono::duration_cast(nextPresentationTime.time_since_epoch()); - - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - camera.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const ui::IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const ui::IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, logger.get()); - camera.endInputProcessing(nextPresentationTimeStamp); - - const auto& mvp = camera.getConcatenatedMatrix(); - - - - - - - - - asset::SViewport viewport; - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - commandBuffer->setViewport(0u, 1u, &viewport); - - - - - - - //! Stress test for memleaks aside from demo how to create meshes that live on the GPU RAM - { - VertexStruct vertices[8]; - vertices[0] = VertexStruct{ {-1.f,-1.f,-1.f},{ 0, 0} }; - vertices[1] = VertexStruct{ { 1.f,-1.f,-1.f},{127, 0} }; - vertices[2] = VertexStruct{ {-1.f, 1.f,-1.f},{255, 0} }; - vertices[3] = VertexStruct{ { 1.f, 1.f,-1.f},{ 0,127} }; - vertices[4] = VertexStruct{ {-1.f,-1.f, 1.f},{127,127} }; - vertices[5] = VertexStruct{ { 1.f,-1.f, 1.f},{255,127} }; - vertices[6] = VertexStruct{ {-1.f, 1.f, 1.f},{ 0,255} }; - vertices[7] = VertexStruct{ { 1.f, 1.f, 1.f},{127,255} }; - - uint16_t indices_indexed16[] = - { - 0,1,2,1,2,3, - 4,5,6,5,6,7, - 0,1,4,1,4,5, - 2,3,6,3,6,7, - 0,2,4,2,4,6, - 1,3,5,3,5,7 - }; - - // auto upStreamBuff = driver->getDefaultUpStreamingBuffer(); - // core::smart_refctd_ptr upStreamRef(upStreamBuff->getBuffer()); - - // const void* dataToPlace[2] = { vertices,indices_indexed16 }; - // uint32_t offsets[2] = { video::StreamingTransientDataBufferMT<>::invalid_address,video::StreamingTransientDataBufferMT<>::invalid_address }; - // uint32_t alignments[2] = { sizeof(decltype(vertices[0u])),sizeof(decltype(indices_indexed16[0u])) }; - // uint32_t sizes[2] = { sizeof(vertices),sizeof(indices_indexed16) }; - // upStreamBuff->multi_place(2u, (const void* const*)dataToPlace, (uint32_t*)offsets, (uint32_t*)sizes, (uint32_t*)alignments); - // if (upStreamBuff->needsManualFlushOrInvalidate()) - // { - // auto upStreamMem = upStreamBuff->getBuffer()->getBoundMemory(); - // driver->flushMappedMemoryRanges({ video::IDeviceMemoryAllocation::MappedMemoryRange(upStreamMem,offsets[0],sizes[0]),video::IDeviceMemoryAllocation::MappedMemoryRange(upStreamMem,offsets[1],sizes[1]) }); - // } - - // asset::SPushConstantRange range[1] = { asset::ISpecializedShader::ESS_VERTEX,0u,sizeof(core::matrix4SIMD) }; - - // auto createSpecializedShaderFromSource = [=](const char* source, asset::ISpecializedShader::E_SHADER_STAGE stage) - // { - // auto spirv = device->getAssetManager()->getGLSLCompiler()->createSPIRVFromGLSL(source, stage, "main", "runtimeID"); - // auto unspec = driver->createShader(std::move(spirv)); - // return driver->createSpecializedShader(unspec.get(), { nullptr,nullptr,"main",stage }); - // }; - // // origFilepath is only relevant when you have filesystem #includes in your shader - // auto createSpecializedShaderFromSourceWithIncludes = [&](const char* source, asset::ISpecializedShader::E_SHADER_STAGE stage, const char* origFilepath) - // { - // auto resolved_includes = device->getAssetManager()->getGLSLCompiler()->resolveIncludeDirectives(source, stage, origFilepath); - // return createSpecializedShaderFromSource(reinterpret_cast(resolved_includes->getContent()->getPointer()), stage); - // }; - // core::smart_refctd_ptr shaders[2] = - // { - // createSpecializedShaderFromSourceWithIncludes(vertexSource,asset::ISpecializedShader::ESS_VERTEX, "shader.vert"), - // createSpecializedShaderFromSource(fragmentSource,asset::ISpecializedShader::ESS_FRAGMENT) - // }; - // auto shadersPtr = reinterpret_cast(shaders); - - // asset::SVertexInputParams inputParams; - // inputParams.enabledAttribFlags = 0b11u; - // inputParams.enabledBindingFlags = 0b1u; - // inputParams.attributes[0].binding = 0u; - // inputParams.attributes[0].format = asset::EF_R32G32B32_SFLOAT; - // inputParams.attributes[0].relativeOffset = offsetof(VertexStruct, Pos[0]); - // inputParams.attributes[1].binding = 0u; - // inputParams.attributes[1].format = asset::EF_R8G8_UNORM; - // inputParams.attributes[1].relativeOffset = offsetof(VertexStruct, Col[0]); - // inputParams.bindings[0].stride = sizeof(VertexStruct); - // inputParams.bindings[0].inputRate = asset::EVIR_PER_VERTEX; - - // asset::SBlendParams blendParams; // defaults are sane - - // asset::SPrimitiveAssemblyParams assemblyParams = { asset::EPT_TRIANGLE_LIST,false,1u }; - - // asset::SStencilOpParams defaultStencil; - // asset::SRasterizationParams rasterParams; - // rasterParams.faceCullingMode = asset::EFCM_NONE; - // auto pipeline = driver->createRenderpassIndependentPipeline(nullptr, driver->createPipelineLayout(range, range + 1u, nullptr, nullptr, nullptr, nullptr), - // shadersPtr, shadersPtr + sizeof(shaders) / sizeof(core::smart_refctd_ptr), - // inputParams, blendParams, assemblyParams, rasterParams); - - // asset::SBufferBinding bindings[video::IGPUMeshBuffer::MAX_ATTR_BUF_BINDING_COUNT]; - // bindings[0u] = { offsets[0],upStreamRef }; - // auto mb = core::make_smart_refctd_ptr(std::move(pipeline), nullptr, bindings, asset::SBufferBinding{offsets[1], upStreamRef}); - // { - // mb->setIndexType(asset::EIT_16BIT); - // mb->setIndexCount(2 * 3 * 6); - // } - - // driver->bindGraphicsPipeline(mb->getPipeline()); - // driver->pushConstants(mb->getPipeline()->getLayout(), asset::ISpecializedShader::ESS_VERTEX, 0u, sizeof(core::matrix4SIMD), mvp.pointer()); - // driver->drawMeshBuffer(mb.get()); - - // upStreamBuff->multi_free(2u, (uint32_t*)&offsets, (uint32_t*)&sizes, driver->placeFence()); - //} - //driver->endScene(); - } - } -}; diff --git a/old_to_refactor/05_NablaTutorialExample/CMakeLists.txt b/old_to_refactor/05_NablaTutorialExample/CMakeLists.txt deleted file mode 100644 index a476b6203..000000000 --- a/old_to_refactor/05_NablaTutorialExample/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -nbl_create_executable_project("" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/old_to_refactor/05_NablaTutorialExample/config.json.template b/old_to_refactor/05_NablaTutorialExample/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/old_to_refactor/05_NablaTutorialExample/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/old_to_refactor/05_NablaTutorialExample/main.cpp b/old_to_refactor/05_NablaTutorialExample/main.cpp deleted file mode 100644 index abebb882c..000000000 --- a/old_to_refactor/05_NablaTutorialExample/main.cpp +++ /dev/null @@ -1,593 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - -#include "CCamera.hpp" -#include "../common/CommonAPI.h" - -/* - General namespaces. Entire engine consists of those bellow. -*/ - -using namespace nbl; -using namespace asset; -using namespace video; -using namespace core; - -/* - Uncomment for more detailed logging -*/ - -// #define NBL_MORE_LOGS - -class NablaTutorialExampleApp : public ApplicationBase -{ - /* - SIrrlichtCreationParameters holds some specific initialization information - about driver being used, size of window, stencil buffer or depth buffer. - Used to create a device. - */ - - constexpr static uint32_t WIN_W = 1280; - constexpr static uint32_t WIN_H = 720; - constexpr static uint32_t SC_IMG_COUNT = 3u; - constexpr static uint32_t FRAMES_IN_FLIGHT = 5u; - constexpr static uint64_t MAX_TIMEOUT = 99999999999999ull; - constexpr static size_t NBL_FRAMES_TO_AVERAGE = 100ull; - - static_assert(FRAMES_IN_FLIGHT > SC_IMG_COUNT); - -public: - /* - Most important objects to manage literally whole stuff are bellow. - By their usage you can create for example GPU objects, load or write - assets or manage objects on a scene. - */ - - nbl::core::smart_refctd_ptr windowManager; - nbl::core::smart_refctd_ptr window; - nbl::core::smart_refctd_ptr windowCb; - nbl::core::smart_refctd_ptr apiConnection; - nbl::core::smart_refctd_ptr surface; - nbl::core::smart_refctd_ptr utilities; - nbl::core::smart_refctd_ptr logicalDevice; - nbl::video::IPhysicalDevice* physicalDevice; - std::array queues = { nullptr, nullptr, nullptr, nullptr }; - nbl::core::smart_refctd_ptr swapchain; - nbl::core::smart_refctd_ptr renderpass; - nbl::core::smart_refctd_dynamic_array> fbo; - std::array, CommonAPI::InitOutput::MaxFramesInFlight>, CommonAPI::InitOutput::MaxQueuesCount> commandPools; // TODO: Multibuffer and reset the commandpools - nbl::core::smart_refctd_ptr system; - nbl::core::smart_refctd_ptr assetManager; - nbl::video::IGPUObjectFromAssetConverter::SParams cpu2gpuParams; - nbl::core::smart_refctd_ptr logger; - nbl::core::smart_refctd_ptr inputSystem; - - nbl::core::smart_refctd_ptr gpuTransferFence; - nbl::core::smart_refctd_ptr gpuComputeFence; - nbl::video::IGPUObjectFromAssetConverter cpu2gpu; - - core::smart_refctd_ptr gpuMeshBuffer; - core::smart_refctd_ptr gpuRenderpassIndependentPipeline; - core::smart_refctd_ptr gpuubo; - core::smart_refctd_ptr gpuDescriptorSet1; - core::smart_refctd_ptr gpuDescriptorSet3; - core::smart_refctd_ptr gpuGraphicsPipeline; - - core::smart_refctd_ptr frameComplete[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr imageAcquire[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr renderFinished[FRAMES_IN_FLIGHT] = { nullptr }; - core::smart_refctd_ptr commandBuffers[FRAMES_IN_FLIGHT]; - - nbl::video::ISwapchain::SCreationParams m_swapchainCreationParams; - - CommonAPI::InputSystem::ChannelReader mouse; - CommonAPI::InputSystem::ChannelReader keyboard; - Camera camera = Camera(vectorSIMDf(0, 0, 0), vectorSIMDf(0, 0, 0), matrix4SIMD()); - - uint32_t ds1UboBinding = 0; - int resourceIx; - uint32_t acquiredNextFBO = {}; - std::chrono::system_clock::time_point lastTime; - bool frameDataFilled = false; - size_t frame_count = 0ull; - double time_sum = 0; - double dtList[NBL_FRAMES_TO_AVERAGE] = {}; - - void setWindow(core::smart_refctd_ptr&& wnd) override - { - window = std::move(wnd); - } - void setSystem(core::smart_refctd_ptr&& s) override - { - system = std::move(s); - } - nbl::ui::IWindow* getWindow() override - { - return window.get(); - } - video::IAPIConnection* getAPIConnection() override - { - return apiConnection.get(); - } - video::ILogicalDevice* getLogicalDevice() override - { - return logicalDevice.get(); - } - video::IGPURenderpass* getRenderpass() override - { - return renderpass.get(); - } - void setSurface(core::smart_refctd_ptr&& s) override - { - surface = std::move(s); - } - void setFBOs(std::vector>& f) override - { - for (int i = 0; i < f.size(); i++) - { - fbo->begin()[i] = core::smart_refctd_ptr(f[i]); - } - } - void setSwapchain(core::smart_refctd_ptr&& s) override - { - swapchain = std::move(s); - } - uint32_t getSwapchainImageCount() override - { - return swapchain->getImageCount(); - } - virtual nbl::asset::E_FORMAT getDepthFormat() override - { - return nbl::asset::EF_D32_SFLOAT; - } - - APP_CONSTRUCTOR(NablaTutorialExampleApp) - - void onAppInitialized_impl() override - { - const auto swapchainImageUsage = static_cast(asset::IImage::EUF_COLOR_ATTACHMENT_BIT); - CommonAPI::InitParams initParams; - initParams.window = core::smart_refctd_ptr(window); - initParams.apiType = video::EAT_VULKAN; - initParams.appName = { _NBL_APP_NAME_ }; - initParams.framesInFlight = FRAMES_IN_FLIGHT; - initParams.windowWidth = WIN_W; - initParams.windowHeight = WIN_H; - initParams.swapchainImageCount = SC_IMG_COUNT; - initParams.swapchainImageUsage = swapchainImageUsage; - initParams.depthFormat = nbl::asset::EF_D32_SFLOAT; - auto initOutput = CommonAPI::InitWithDefaultExt(std::move(initParams)); - - window = std::move(initParams.window); - windowCb = std::move(initParams.windowCb); - apiConnection = std::move(initOutput.apiConnection); - surface = std::move(initOutput.surface); - utilities = std::move(initOutput.utilities); - logicalDevice = std::move(initOutput.logicalDevice); - physicalDevice = initOutput.physicalDevice; - queues = std::move(initOutput.queues); - renderpass = std::move(initOutput.renderToSwapchainRenderpass); - commandPools = std::move(initOutput.commandPools); - system = std::move(initOutput.system); - assetManager = std::move(initOutput.assetManager); - cpu2gpuParams = std::move(initOutput.cpu2gpuParams); - logger = std::move(initOutput.logger); - inputSystem = std::move(initOutput.inputSystem); - m_swapchainCreationParams = std::move(initOutput.swapchainCreationParams); - - CommonAPI::createSwapchain(std::move(logicalDevice), m_swapchainCreationParams, WIN_W, WIN_H, swapchain); - assert(swapchain); - fbo = CommonAPI::createFBOWithSwapchainImages( - swapchain->getImageCount(), WIN_W, WIN_H, - logicalDevice, swapchain, renderpass, - nbl::asset::EF_D32_SFLOAT - ); - - gpuTransferFence = logicalDevice->createFence(static_cast(0)); - gpuComputeFence = logicalDevice->createFence(static_cast(0)); - - /* - Helpfull class for managing basic geometry objects. - Thanks to it you can get half filled pipeline for your - geometries such as cubes, cones or spheres. - */ - - auto geometryCreator = assetManager->getGeometryCreator(); - auto rectangleGeometry = geometryCreator->createRectangleMesh(nbl::core::vector2df_SIMD(1.5, 3)); - - /* - Loading an asset bundle. You can specify some flags - and parameters to have an impact on extraordinary - tasks while loading for example. - */ - - asset::IAssetLoader::SAssetLoadParams loadingParams; - auto images_bundle = assetManager->getAsset("../../media/color_space_test/R8G8B8A8_1.png", loadingParams); - assert(!images_bundle.getContents().empty()); - auto image = images_bundle.getContents().begin()[0]; - - /* - By default an image that comes out of an image loader will only have the TRANSFER_DST usage flag. - We need to add more usages, as only we know what we'll do with the image farther along in the pipeline. - */ - auto image_raw = static_cast(image.get()); - image_raw->addImageUsageFlags(asset::IImage::EUF_SAMPLED_BIT); - - /* - Specifing gpu image view parameters to create a gpu - image view through the driver. - */ - - cpu2gpuParams.beginCommandBuffers(); - auto gpuImage = cpu2gpu.getGPUObjectsFromAssets(&image_raw, &image_raw + 1, cpu2gpuParams)->front(); - cpu2gpuParams.waitForCreationToComplete(); - auto& gpuParams = gpuImage->getCreationParameters(); - - IImageView::SCreationParams gpuImageViewParams = {}; - // Compute mipmap creation in Asset Converter tends to create some extra raw UINT views with STORAGE of the original image, - // so we need to declare that we won't be using STORAGE on this view or we wouldn't be able to use the SRGB format for it - gpuImageViewParams.subUsages = IGPUImage::EUF_SAMPLED_BIT; - gpuImageViewParams.image = gpuImage; - gpuImageViewParams.viewType = IGPUImageView::ET_2D; - gpuImageViewParams.format = gpuParams.format; - auto gpuImageView = logicalDevice->createImageView(std::move(gpuImageViewParams)); - - /* - Specifying cache key to default exsisting cached asset bundle - and specifying it's size where end is determined by - static_cast(0u) - */ - - const IAsset::E_TYPE types[]{ IAsset::E_TYPE::ET_SPECIALIZED_SHADER, IAsset::E_TYPE::ET_SPECIALIZED_SHADER, static_cast(0u) }; - - auto cpuVertexShader = core::smart_refctd_ptr_static_cast(assetManager->findAssets("nbl/builtin/material/lambertian/singletexture/specialized_shader.vert", types)->front().getContents().begin()[0]); - auto cpuFragmentShader = core::smart_refctd_ptr_static_cast(assetManager->findAssets("nbl/builtin/material/lambertian/singletexture/specialized_shader.frag", types)->front().getContents().begin()[0]); - - cpu2gpuParams.beginCommandBuffers(); - auto gpuVertexShader = cpu2gpu.getGPUObjectsFromAssets(&cpuVertexShader.get(), &cpuVertexShader.get() + 1, cpu2gpuParams)->front(); - auto gpuFragmentShader = cpu2gpu.getGPUObjectsFromAssets(&cpuFragmentShader.get(), &cpuFragmentShader.get() + 1, cpu2gpuParams)->front(); - cpu2gpuParams.waitForCreationToComplete(); - std::array gpuShaders = { gpuVertexShader.get(), gpuFragmentShader.get() }; - - size_t ds0SamplerBinding = 0, ds1UboBinding = 0; - { - /* - SBinding for the texture (sampler). - */ - - IGPUDescriptorSetLayout::SBinding gpuSamplerBinding; - gpuSamplerBinding.binding = ds0SamplerBinding; - gpuSamplerBinding.type = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; - gpuSamplerBinding.count = 1u; - gpuSamplerBinding.stageFlags = static_cast(IGPUShader::ESS_FRAGMENT); - gpuSamplerBinding.samplers = nullptr; - - /* - SBinding for UBO - basic view parameters. - */ - - IGPUDescriptorSetLayout::SBinding gpuUboBinding; - gpuUboBinding.count = 1u; - gpuUboBinding.binding = ds1UboBinding; - gpuUboBinding.stageFlags = static_cast(asset::ICPUShader::ESS_VERTEX | asset::ICPUShader::ESS_FRAGMENT); - gpuUboBinding.type = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - - /* - Creating specific descriptor set layouts from specialized bindings. - Those layouts needs to attached to pipeline layout if required by user. - IrrlichtBaW provides 4 places for descriptor set layout usage. - */ - - auto gpuDs1Layout = logicalDevice->createDescriptorSetLayout(&gpuUboBinding, &gpuUboBinding + 1); - auto gpuDs3Layout = logicalDevice->createDescriptorSetLayout(&gpuSamplerBinding, &gpuSamplerBinding + 1); - - /* - Creating gpu UBO with appropiate size. - - We know ahead of time that `SBasicViewParameters` struct is the expected structure of the only UBO block in the descriptor set nr. 1 of the shader. - */ - { - IGPUBuffer::SCreationParams creationParams = {}; - creationParams.usage = core::bitflag(asset::IBuffer::EUF_UNIFORM_BUFFER_BIT)|asset::IBuffer::EUF_TRANSFER_DST_BIT|asset::IBuffer::EUF_INLINE_UPDATE_VIA_CMDBUF; - creationParams.size = sizeof(SBasicViewParameters); - gpuubo = logicalDevice->createBuffer(std::move(creationParams)); - - IDeviceMemoryBacked::SDeviceMemoryRequirements memReq = gpuubo->getMemoryReqs(); - memReq.memoryTypeBits &= physicalDevice->getDeviceLocalMemoryTypeBits(); - logicalDevice->allocate(memReq, gpuubo.get()); - } - - /* - Creating descriptor sets - texture (sampler) and basic view parameters (UBO). - Specifying info and write parameters for updating certain descriptor set to the driver. - - We know ahead of time that `SBasicViewParameters` struct is the expected structure of the only UBO block in the descriptor set nr. 1 of the shader. - */ - - nbl::core::smart_refctd_ptr descriptorPool = nullptr; - { - constexpr uint32_t DescriptorSetCount = 2u; - - video::IDescriptorPool::SCreateInfo createInfo = {}; - createInfo.maxSets = DescriptorSetCount; - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER)] = 1; // DS1 uses one UBO descriptor. - createInfo.maxDescriptorCount[static_cast(asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER)] = 1; // DS3 uses one combined image sampler descriptor. - - descriptorPool = logicalDevice->createDescriptorPool(std::move(createInfo)); - } - - gpuDescriptorSet3 = descriptorPool->createDescriptorSet(gpuDs3Layout); - { - video::IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = gpuDescriptorSet3.get(); - write.binding = ds0SamplerBinding; - write.count = 1u; - write.arrayElement = 0u; - write.descriptorType = asset::IDescriptor::E_TYPE::ET_COMBINED_IMAGE_SAMPLER; - IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = std::move(gpuImageView); - ISampler::SParams samplerParams = { ISampler::ETC_CLAMP_TO_EDGE,ISampler::ETC_CLAMP_TO_EDGE,ISampler::ETC_CLAMP_TO_EDGE,ISampler::ETBC_FLOAT_OPAQUE_BLACK,ISampler::ETF_LINEAR,ISampler::ETF_LINEAR,ISampler::ESMM_LINEAR,0u,false,ECO_ALWAYS }; - info.info.image = { logicalDevice->createSampler(samplerParams),IGPUImage::EL_SHADER_READ_ONLY_OPTIMAL }; - } - write.info = &info; - logicalDevice->updateDescriptorSets(1u, &write, 0u, nullptr); - } - - gpuDescriptorSet1 = descriptorPool->createDescriptorSet(gpuDs1Layout); - { - video::IGPUDescriptorSet::SWriteDescriptorSet write; - write.dstSet = gpuDescriptorSet1.get(); - write.binding = ds1UboBinding; - write.count = 1u; - write.arrayElement = 0u; - write.descriptorType = asset::IDescriptor::E_TYPE::ET_UNIFORM_BUFFER; - video::IGPUDescriptorSet::SDescriptorInfo info; - { - info.desc = gpuubo; - info.info.buffer.offset = 0ull; - info.info.buffer.size = sizeof(SBasicViewParameters); - } - write.info = &info; - logicalDevice->updateDescriptorSets(1u, &write, 0u, nullptr); - } - - auto gpuPipelineLayout = logicalDevice->createPipelineLayout(nullptr, nullptr, nullptr, std::move(gpuDs1Layout), nullptr, std::move(gpuDs3Layout)); - - /* - Preparing required pipeline parameters and filling choosen one. - Note that some of them are returned from geometry creator according - to what I mentioned in returning half pipeline parameters. - */ - - asset::SBlendParams blendParams; - asset::SRasterizationParams rasterParams; - rasterParams.faceCullingMode = asset::EFCM_NONE; - - /* - Creating gpu pipeline with it's pipeline layout and specilized parameters. - Attaching vertex shader and fragment shaders. - */ - - gpuRenderpassIndependentPipeline = logicalDevice->createRenderpassIndependentPipeline(nullptr, std::move(gpuPipelineLayout), gpuShaders.data(), gpuShaders.data() + gpuShaders.size(), rectangleGeometry.inputParams, blendParams, rectangleGeometry.assemblyParams, rasterParams); - - nbl::video::IGPUGraphicsPipeline::SCreationParams graphicsPipelineParams; - graphicsPipelineParams.renderpassIndependent = core::smart_refctd_ptr(gpuRenderpassIndependentPipeline.get()); - graphicsPipelineParams.renderpass = core::smart_refctd_ptr(renderpass); - gpuGraphicsPipeline = logicalDevice->createGraphicsPipeline(nullptr, std::move(graphicsPipelineParams)); - - core::vectorSIMDf cameraPosition(-5, 0, 0); - matrix4SIMD projectionMatrix = matrix4SIMD::buildProjectionMatrixPerspectiveFovLH(core::radians(60.0f), float(WIN_W) / WIN_H, 0.01, 1000); - camera = Camera(cameraPosition, core::vectorSIMDf(0, 0, 0), projectionMatrix, 10.f, 1.f); - - /* - Creating gpu meshbuffer from parameters fetched from geometry creator return value. - */ - - constexpr auto MAX_ATTR_BUF_BINDING_COUNT = video::IGPUMeshBuffer::MAX_ATTR_BUF_BINDING_COUNT; - constexpr auto MAX_DATA_BUFFERS = MAX_ATTR_BUF_BINDING_COUNT + 1; - core::vector cpubuffers; - cpubuffers.reserve(MAX_DATA_BUFFERS); - for (auto i = 0; i < MAX_ATTR_BUF_BINDING_COUNT; i++) - { - auto buf = rectangleGeometry.bindings[i].buffer.get(); - if (buf) - cpubuffers.push_back(buf); - } - auto cpuindexbuffer = rectangleGeometry.indexBuffer.buffer.get(); - if (cpuindexbuffer) - cpubuffers.push_back(cpuindexbuffer); - - cpu2gpuParams.beginCommandBuffers(); - auto gpubuffers = cpu2gpu.getGPUObjectsFromAssets(cpubuffers.data(), cpubuffers.data() + cpubuffers.size(), cpu2gpuParams); - cpu2gpuParams.waitForCreationToComplete(); - - asset::SBufferBinding bindings[MAX_DATA_BUFFERS]; - for (auto i = 0, j = 0; i < MAX_ATTR_BUF_BINDING_COUNT; i++) - { - if (!rectangleGeometry.bindings[i].buffer) - continue; - auto buffPair = gpubuffers->operator[](j++); - bindings[i].offset = buffPair->getOffset(); - bindings[i].buffer = core::smart_refctd_ptr(buffPair->getBuffer()); - } - if (cpuindexbuffer) - { - auto buffPair = gpubuffers->back(); - bindings[MAX_ATTR_BUF_BINDING_COUNT].offset = buffPair->getOffset(); - bindings[MAX_ATTR_BUF_BINDING_COUNT].buffer = core::smart_refctd_ptr(buffPair->getBuffer()); - } - - gpuMeshBuffer = core::make_smart_refctd_ptr(core::smart_refctd_ptr(gpuRenderpassIndependentPipeline), nullptr, bindings, std::move(bindings[MAX_ATTR_BUF_BINDING_COUNT])); - { - gpuMeshBuffer->setIndexType(rectangleGeometry.indexType); - gpuMeshBuffer->setIndexCount(rectangleGeometry.indexCount); - gpuMeshBuffer->setBoundingBox(rectangleGeometry.bbox); - } - } - - const auto& graphicsCommandPools = commandPools[CommonAPI::InitOutput::EQT_GRAPHICS]; - for (uint32_t i = 0u; i < FRAMES_IN_FLIGHT; i++) - { - logicalDevice->createCommandBuffers(graphicsCommandPools[i].get(), video::IGPUCommandBuffer::EL_PRIMARY, 1, commandBuffers+i); - imageAcquire[i] = logicalDevice->createSemaphore(); - renderFinished[i] = logicalDevice->createSemaphore(); - } - } - - /* - Hot loop for rendering a scene. - */ - - void workLoopBody() override - { - ++resourceIx; - if (resourceIx >= FRAMES_IN_FLIGHT) - resourceIx = 0; - - auto& commandBuffer = commandBuffers[resourceIx]; - auto& fence = frameComplete[resourceIx]; - - if (fence) - { - logicalDevice->blockForFences(1u,&fence.get()); - logicalDevice->resetFences(1u,&fence.get()); - } - else - fence = logicalDevice->createFence(static_cast(0)); - - auto renderStart = std::chrono::system_clock::now(); - const auto renderDt = std::chrono::duration_cast(renderStart - lastTime).count(); - lastTime = renderStart; - { // Calculate Simple Moving Average for FrameTime - time_sum -= dtList[frame_count]; - time_sum += renderDt; - dtList[frame_count] = renderDt; - frame_count++; - if (frame_count >= NBL_FRAMES_TO_AVERAGE) - { - frameDataFilled = true; - frame_count = 0; - } - - } - const double averageFrameTime = frameDataFilled ? (time_sum / (double)NBL_FRAMES_TO_AVERAGE) : (time_sum / frame_count); - -#ifdef NBL_MORE_LOGS - logger->log("renderDt = %f ------ averageFrameTime = %f", system::ILogger::ELL_INFO, renderDt, averageFrameTime); -#endif // NBL_MORE_LOGS - - auto averageFrameTimeDuration = std::chrono::duration(averageFrameTime); - auto nextPresentationTime = renderStart + averageFrameTimeDuration; - auto nextPresentationTimeStamp = std::chrono::duration_cast(nextPresentationTime.time_since_epoch()); - - inputSystem->getDefaultMouse(&mouse); - inputSystem->getDefaultKeyboard(&keyboard); - - camera.beginInputProcessing(nextPresentationTimeStamp); - mouse.consumeEvents([&](const ui::IMouseEventChannel::range_t& events) -> void { camera.mouseProcess(events); }, logger.get()); - keyboard.consumeEvents([&](const ui::IKeyboardEventChannel::range_t& events) -> void { camera.keyboardProcess(events); }, logger.get()); - camera.endInputProcessing(nextPresentationTimeStamp); - - const auto& viewMatrix = camera.getViewMatrix(); - const auto& viewProjectionMatrix = camera.getConcatenatedMatrix(); - - commandBuffer->reset(nbl::video::IGPUCommandBuffer::ERF_RELEASE_RESOURCES_BIT); - commandBuffer->begin(IGPUCommandBuffer::EU_NONE); - - asset::SViewport viewport; - viewport.minDepth = 1.f; - viewport.maxDepth = 0.f; - viewport.x = 0u; - viewport.y = 0u; - viewport.width = WIN_W; - viewport.height = WIN_H; - commandBuffer->setViewport(0u, 1u, &viewport); - VkRect2D scissor; - scissor.offset = {0u,0u}; - scissor.extent = {WIN_W,WIN_H}; - commandBuffer->setScissor(0u,1u,&scissor); - - const auto viewProjection = camera.getConcatenatedMatrix(); - core::matrix3x4SIMD modelMatrix; - modelMatrix.setRotation(nbl::core::quaternion(0, 1, 0)); - - auto mv = core::concatenateBFollowedByA(camera.getViewMatrix(), modelMatrix); - auto mvp = core::concatenateBFollowedByA(viewProjection, modelMatrix); - core::matrix3x4SIMD normalMat; - mv.getSub3x3InverseTranspose(normalMat); - - /* - Updating UBO for basic view parameters and sending - updated data to staging buffer that will redirect - the data to graphics card - to vertex shader. - */ - - SBasicViewParameters uboData; - memcpy(uboData.MV, mv.pointer(), sizeof(mv)); - memcpy(uboData.MVP, mvp.pointer(), sizeof(mvp)); - memcpy(uboData.NormalMat, normalMat.pointer(), sizeof(normalMat)); - commandBuffer->updateBuffer(gpuubo.get(), 0ull, gpuubo->getSize(), &uboData); - - swapchain->acquireNextImage(MAX_TIMEOUT, imageAcquire[resourceIx].get(), nullptr, &acquiredNextFBO); - - nbl::video::IGPUCommandBuffer::SRenderpassBeginInfo beginInfo; - { - VkRect2D area; - area.offset = { 0,0 }; - area.extent = { WIN_W, WIN_H }; - asset::SClearValue clear[2] = {}; - clear[0].color.float32[0] = 0.f; - clear[0].color.float32[1] = 0.f; - clear[0].color.float32[2] = 0.f; - clear[0].color.float32[3] = 1.f; - clear[1].depthStencil.depth = 0.f; - - beginInfo.clearValueCount = 2u; - beginInfo.framebuffer = fbo->begin()[acquiredNextFBO]; - beginInfo.renderpass = renderpass; - beginInfo.renderArea = area; - beginInfo.clearValues = clear; - } - commandBuffer->beginRenderPass(&beginInfo, nbl::asset::ESC_INLINE); - - /* - Binding the most important objects needed to - render anything on the screen with textures: - - - gpu pipeline - - gpu descriptor sets - */ - - commandBuffer->bindGraphicsPipeline(gpuGraphicsPipeline.get()); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 1u, 1u, &gpuDescriptorSet1.get(), 0u); - commandBuffer->bindDescriptorSets(asset::EPBP_GRAPHICS, gpuRenderpassIndependentPipeline->getLayout(), 3u, 1u, &gpuDescriptorSet3.get(), 0u); - - /* - Drawing a mesh (created rectangle) with it's gpu mesh buffer usage. - */ - - commandBuffer->drawMeshBuffer(gpuMeshBuffer.get()); - - commandBuffer->endRenderPass(); - commandBuffer->end(); - - CommonAPI::Submit(logicalDevice.get(), commandBuffer.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], imageAcquire[resourceIx].get(), renderFinished[resourceIx].get(), fence.get()); - CommonAPI::Present(logicalDevice.get(), swapchain.get(), queues[CommonAPI::InitOutput::EQT_GRAPHICS], renderFinished[resourceIx].get(), acquiredNextFBO); - } - - bool keepRunning() override - { - return windowCb->isWindowOpen(); - } - - void onAppTerminated_impl() override { logicalDevice->waitIdle(); } -}; - -NBL_COMMON_API_MAIN(NablaTutorialExampleApp) diff --git a/old_to_refactor/05_NablaTutorialExample/pipeline.groovy b/old_to_refactor/05_NablaTutorialExample/pipeline.groovy deleted file mode 100644 index 31cadf9e9..000000000 --- a/old_to_refactor/05_NablaTutorialExample/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CNablaTutorialExampleBuilder extends IBuilder -{ - public CNablaTutorialExampleBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CNablaTutorialExampleBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/old_to_refactor/20_Megatexture/main.cpp b/old_to_refactor/20_Megatexture/main.cpp index 35d0692af..5c309ff24 100644 --- a/old_to_refactor/20_Megatexture/main.cpp +++ b/old_to_refactor/20_Megatexture/main.cpp @@ -684,7 +684,7 @@ APP_CONSTRUCTOR(MegaTextureApp) video::IGPUBuffer::SCreationParams bufferCreationParams; bufferCreationParams.usage = asset::IBuffer::EUF_STORAGE_BUFFER_BIT; bufferCreationParams.size = sizeof(video::IGPUVirtualTexture::SPrecomputedData); - core::smart_refctd_ptr utilities = core::make_smart_refctd_ptr(core::smart_refctd_ptr(logicalDevice)); + core::smart_refctd_ptr utilities = video::IUtilities::create(core::smart_refctd_ptr(logicalDevice)); core::smart_refctd_ptr buffer = utilities->createFilledDeviceLocalBufferOnDedMem(queues[CommonAPI::InitOutput::EQT_TRANSFER_UP], std::move(bufferCreationParams), &gpuvt->getPrecomputedData()); { diff --git a/old_to_refactor/49_ComputeFFT/CMakeLists.txt b/old_to_refactor/49_ComputeFFT/CMakeLists.txt deleted file mode 100644 index b591db9e9..000000000 --- a/old_to_refactor/49_ComputeFFT/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ - -include(common RESULT_VARIABLE RES) -if(NOT RES) - message(FATAL_ERROR "common.cmake not found. Should be in {repo_root}/cmake directory") -endif() - -set(EXAMPLE_SOURCES - ../../src/nbl/ext/FFT/FFT.cpp -) - -nbl_create_executable_project("${EXAMPLE_SOURCES}" "" "" "" "${NBL_EXECUTABLE_PROJECT_CREATION_PCH_TARGET}") \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/config.json.template b/old_to_refactor/49_ComputeFFT/config.json.template deleted file mode 100644 index f961745c1..000000000 --- a/old_to_refactor/49_ComputeFFT/config.json.template +++ /dev/null @@ -1,28 +0,0 @@ -{ - "enableParallelBuild": true, - "threadsPerBuildProcess" : 2, - "isExecuted": false, - "scriptPath": "", - "cmake": { - "configurations": [ "Release", "Debug", "RelWithDebInfo" ], - "buildModes": [], - "requiredOptions": [] - }, - "profiles": [ - { - "backend": "vulkan", - "platform": "windows", - "buildModes": [], - "runConfiguration": "Release", - "gpuArchitectures": [] - } - ], - "dependencies": [], - "data": [ - { - "dependencies": [], - "command": [""], - "outputs": [] - } - ] -} \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/extra_parameters.glsl b/old_to_refactor/49_ComputeFFT/extra_parameters.glsl deleted file mode 100644 index 032f4c363..000000000 --- a/old_to_refactor/49_ComputeFFT/extra_parameters.glsl +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#include "nbl/builtin/glsl/ext/FFT/parameters_struct.glsl" -struct convolve_parameters_t -{ - nbl_glsl_ext_FFT_Parameters_t fft; - vec2 kernel_half_pixel_size; -}; - -struct image_store_parameters_t -{ - nbl_glsl_ext_FFT_Parameters_t fft; - ivec2 unpad_offset; -}; \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/fft_convolve_ifft.comp b/old_to_refactor/49_ComputeFFT/fft_convolve_ifft.comp deleted file mode 100644 index 18702fe81..000000000 --- a/old_to_refactor/49_ComputeFFT/fft_convolve_ifft.comp +++ /dev/null @@ -1,109 +0,0 @@ -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; - -layout(set=0, binding=2) uniform sampler2D NormalizedKernel[3]; - -/* TODO: Hardcode the parameters for the frequent FFTs -uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions() -{ - return uvec3(1280u,1024u,1u); -} -bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse() -{ - return false; -} -uint nbl_glsl_ext_FFT_Parameters_t_getDirection() -{ - return 0u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel() -{ - return 2u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize() -{ - return 11u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType() -{ - return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_; -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides() -{ - return uvec4(1024u,1u,0u,1024u*1280u); -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides() -{ - return uvec4(1u,1280u,0u,1280u*1024u); -} -#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_ -*/ - -#include "extra_parameters.glsl" -layout(push_constant) uniform PushConstants -{ - convolve_parameters_t params; -} pc; -#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_ - -nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters() -{ - return pc.params.fft; -} -#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_ - -#define _NBL_GLSL_EXT_FFT_MAIN_DEFINED_ -#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" - -void convolve(in uint item_per_thread_count, in uint ch) -{ - // TODO: decouple kernel size from image size (can't get the math to work in my head) - for(uint t=0u; t>1u; - const uint shifted = tid-padding; - if (tid>=padding && shifted -nbl_glsl_complex nbl_glsl_ext_FFT_getPaddedData(in ivec3 coordinate, in uint channel) -{ - ivec2 inputImageSize = textureSize(inputImage, 0); - vec2 normalizedCoords = (vec2(coordinate.xy)+vec2(0.5f))/(vec2(inputImageSize)*KERNEL_SCALE); - vec4 texelValue = textureLod(inputImage, normalizedCoords+vec2(0.5-0.5/KERNEL_SCALE), -log2(KERNEL_SCALE)); - return nbl_glsl_complex(texelValue[channel], 0.0f); -} -#define _NBL_GLSL_EXT_FFT_GET_PADDED_DATA_DEFINED_ - - -/* TODO: Hardcode the parameters for the frequent FFTs -#if _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_>512 -uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions() -{ - return uvec3(1280u,720u,1u); -} -bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse() -{ - return false; -} -uint nbl_glsl_ext_FFT_Parameters_t_getDirection() -{ - return 1u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel() -{ - return 2u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize() -{ - return 10u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType() -{ - return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_; -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides() -{ - return uvec4(0xdeadbeefu); -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides() -{ - return uvec4(1024u,1u,0u,1024u*1280u); -} -#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_ -#endif -*/ - -#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/last_fft.comp b/old_to_refactor/49_ComputeFFT/last_fft.comp deleted file mode 100644 index 2183ef63c..000000000 --- a/old_to_refactor/49_ComputeFFT/last_fft.comp +++ /dev/null @@ -1,72 +0,0 @@ -layout(local_size_x=_NBL_GLSL_WORKGROUP_SIZE_, local_size_y=1, local_size_z=1) in; - -// Output Descriptor -layout(set=0, binding=1, rgba16f) uniform image2D outImage; -#define _NBL_GLSL_EXT_FFT_OUTPUT_DESCRIPTOR_DEFINED_ - -/* TODO: Hardcode the parameters for the frequent FFTs -uvec3 nbl_glsl_ext_FFT_Parameters_t_getDimensions() -{ - return uvec3(1280u,1024u,1u); -} -bool nbl_glsl_ext_FFT_Parameters_t_getIsInverse() -{ - return true; -} -uint nbl_glsl_ext_FFT_Parameters_t_getDirection() -{ - return 1u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getMaxChannel() -{ - return 2u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getLog2FFTSize() -{ - return 10u; -} -uint nbl_glsl_ext_FFT_Parameters_t_getPaddingType() -{ - return 3u; // _NBL_GLSL_EXT_FFT_PAD_MIRROR_; -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getInputStrides() -{ - return uvec4(1u,1280u,0u,1280u*1024u); -} -uvec4 nbl_glsl_ext_FFT_Parameters_t_getOutputStrides() -{ - return uvec4(0xdeadbeefu); -} -#define _NBL_GLSL_EXT_FFT_PARAMETERS_METHODS_DECLARED_ -*/ - -#include "extra_parameters.glsl" -layout(push_constant) uniform PushConstants -{ - image_store_parameters_t params; -} pc; -#define _NBL_GLSL_EXT_FFT_PUSH_CONSTANTS_DEFINED_ - -nbl_glsl_ext_FFT_Parameters_t nbl_glsl_ext_FFT_getParameters() -{ - return pc.params.fft; -} -#define _NBL_GLSL_EXT_FFT_GET_PARAMETERS_DEFINED_ - - -#include -void nbl_glsl_ext_FFT_setData(in uvec3 coordinate, in uint channel, in nbl_glsl_complex complex_value) -{ - const ivec2 coords = ivec2(coordinate.xy)-pc.params.unpad_offset; - - if (all(lessThanEqual(ivec2(0),coords)) && all(greaterThan(imageSize(outImage),coords))) - { - vec4 color_value = imageLoad(outImage, coords); - color_value[channel] = complex_value.x; - imageStore(outImage, coords, color_value); - } -} -#define _NBL_GLSL_EXT_FFT_SET_DATA_DEFINED_ - - -#include "nbl/builtin/glsl/ext/FFT/default_compute_fft.comp" \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/main.cpp b/old_to_refactor/49_ComputeFFT/main.cpp deleted file mode 100644 index ba2b7e33e..000000000 --- a/old_to_refactor/49_ComputeFFT/main.cpp +++ /dev/null @@ -1,753 +0,0 @@ -// Copyright (C) 2018-2020 - DevSH Graphics Programming Sp. z O.O. -// This file is part of the "Nabla Engine". -// For conditions of distribution and use, see copyright notice in nabla.h - -#define _NBL_STATIC_LIB_ -#include -#include -#include - -#include "nbl/ext/FFT/FFT.h" -#include "../common/QToQuitEventReceiver.h" - -using namespace nbl; -using namespace nbl::core; -using namespace nbl::asset; -using namespace nbl::video; - -using FFTClass = ext::FFT::FFT; - -constexpr uint32_t channelCountOverride = 3u; - -inline core::smart_refctd_ptr createShader( - video::IVideoDriver* driver, - const uint32_t maxFFTlen, - const bool useHalfStorage, - const char* includeMainName, - float kernelScale = 1.f) -{ - const char* sourceFmt = -R"===(#version 430 core - -#define _NBL_GLSL_WORKGROUP_SIZE_ %u -#define _NBL_GLSL_EXT_FFT_MAX_DIM_SIZE_ %u -#define _NBL_GLSL_EXT_FFT_HALF_STORAGE_ %u - -#define KERNEL_SCALE %f - -#include "%s" - -)==="; - - const size_t extraSize = 4u+8u+8u+128u; - - constexpr uint32_t DEFAULT_WORK_GROUP_SIZE = FFTClass::DEFAULT_WORK_GROUP_SIZE; - auto shader = core::make_smart_refctd_ptr(strlen(sourceFmt)+extraSize+1u); - snprintf( - reinterpret_cast(shader->getPointer()),shader->getSize(), sourceFmt, - DEFAULT_WORK_GROUP_SIZE, - maxFFTlen, - useHalfStorage ? 1u:0u, - kernelScale, - includeMainName - ); - - auto cpuSpecializedShader = core::make_smart_refctd_ptr( - core::make_smart_refctd_ptr(std::move(shader),ICPUShader::buffer_contains_glsl), - ISpecializedShader::SInfo{nullptr, nullptr, "main", asset::ISpecializedShader::ESS_COMPUTE} - ); - - auto gpuShader = driver->createShader(nbl::core::smart_refctd_ptr(cpuSpecializedShader->getUnspecialized())); - - auto gpuSpecializedShader = driver->createSpecializedShader(gpuShader.get(), cpuSpecializedShader->getSpecializationInfo()); - - return gpuSpecializedShader; -} - - - -inline void updateDescriptorSet_Convolution ( - video::IVideoDriver * driver, - video::IGPUDescriptorSet * set, - core::smart_refctd_ptr inputBufferDescriptor, - core::smart_refctd_ptr outputBufferDescriptor, - const core::smart_refctd_ptr* kernelNormalizedSpectrumImageDescriptors) -{ - constexpr uint32_t descCount = 3u; - video::IGPUDescriptorSet::SDescriptorInfo pInfos[descCount-1u+channelCountOverride]; - video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[descCount]; - - for (auto i = 0; i < descCount; i++) - { - pWrites[i].binding = i; - pWrites[i].dstSet = set; - pWrites[i].arrayElement = 0u; - pWrites[i].info = pInfos+i; - } - - // Input Buffer - pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[0].count = 1; - pInfos[0].desc = inputBufferDescriptor; - pInfos[0].buffer.size = inputBufferDescriptor->getSize(); - pInfos[0].buffer.offset = 0u; - - // - pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[1].count = 1; - pInfos[1].desc = outputBufferDescriptor; - pInfos[1].buffer.size = outputBufferDescriptor->getSize(); - pInfos[1].buffer.offset = 0u; - - // Kernel Buffer - pWrites[2].descriptorType = asset::EDT_COMBINED_IMAGE_SAMPLER; - pWrites[2].count = channelCountOverride; - for (uint32_t i=0u; iupdateDescriptorSets(descCount, pWrites, 0u, nullptr); -} -inline void updateDescriptorSet_LastFFT ( - video::IVideoDriver * driver, - video::IGPUDescriptorSet * set, - core::smart_refctd_ptr inputBufferDescriptor, - core::smart_refctd_ptr outputImageDescriptor) -{ - video::IGPUDescriptorSet::SDescriptorInfo pInfos[2]; - video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2]; - - for (auto i = 0; i< 2; i++) - { - pWrites[i].dstSet = set; - pWrites[i].arrayElement = 0u; - pWrites[i].count = 1u; - pWrites[i].info = pInfos+i; - } - - // Input Buffer - pWrites[0].binding = 0; - pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[0].count = 1; - pInfos[0].desc = inputBufferDescriptor; - pInfos[0].buffer.size = inputBufferDescriptor->getSize(); - pInfos[0].buffer.offset = 0u; - - // Output Buffer - pWrites[1].binding = 1; - pWrites[1].descriptorType = asset::EDT_STORAGE_IMAGE; - pWrites[1].count = 1; - pInfos[1].desc = outputImageDescriptor; - pInfos[1].image.sampler = nullptr; - pInfos[1].image.imageLayout = static_cast(0u);; - - driver->updateDescriptorSets(2u, pWrites, 0u, nullptr); -} - -using nbl_glsl_ext_FFT_Parameters_t = ext::FFT::FFT::Parameters_t; -struct vec2 -{ - float x,y; -}; -struct ivec2 -{ - int32_t x,y; -}; -#include "extra_parameters.glsl" - - -int main() -{ - nbl::SIrrlichtCreationParameters deviceParams; - deviceParams.Bits = 24; //may have to set to 32bit for some platforms - deviceParams.ZBufferBits = 24; //we'd like 32bit here - deviceParams.DriverType = EDT_OPENGL; //! Only Well functioning driver, software renderer left for sake of 2D image drawing - deviceParams.WindowSize = dimension2d(1280, 720); - deviceParams.Fullscreen = false; - deviceParams.Vsync = true; //! If supported by target platform - deviceParams.Doublebuffer = true; - deviceParams.Stencilbuffer = false; //! This will not even be a choice soon - - auto device = createDeviceEx(deviceParams); - if (!device) - return 1; // could not create selected driver. - - QToQuitEventReceiver receiver; - device->setEventReceiver(&receiver); - - IVideoDriver* driver = device->getVideoDriver(); - - nbl::io::IFileSystem* filesystem = device->getFileSystem(); - IAssetManager* am = device->getAssetManager(); - // Loading SrcImage and Kernel Image from File - - IAssetLoader::SAssetLoadParams lp; - auto srcImageBundle = am->getAsset("../../media/colorexr.exr", lp); - auto kerImageBundle = am->getAsset("../../media/kernels/physical_flare_256.exr", lp); - - // get GPU image views - smart_refctd_ptr srcImageView; - { - auto srcGpuImages = driver->getGPUObjectsFromAssets(srcImageBundle.getContents()); - - IGPUImageView::SCreationParams srcImgViewInfo; - srcImgViewInfo.flags = static_cast(0u); - srcImgViewInfo.image = srcGpuImages->operator[](0u); - srcImgViewInfo.viewType = IGPUImageView::ET_2D; - srcImgViewInfo.format = srcImgViewInfo.image->getCreationParameters().format; - srcImgViewInfo.subresourceRange.aspectMask = static_cast(0u); - srcImgViewInfo.subresourceRange.baseMipLevel = 0; - srcImgViewInfo.subresourceRange.levelCount = 1; - srcImgViewInfo.subresourceRange.baseArrayLayer = 0; - srcImgViewInfo.subresourceRange.layerCount = 1; - srcImageView = driver->createImageView(std::move(srcImgViewInfo)); - } - smart_refctd_ptr kerImageView; - { - auto kerGpuImages = driver->getGPUObjectsFromAssets(kerImageBundle.getContents()); - - IGPUImageView::SCreationParams kerImgViewInfo; - kerImgViewInfo.flags = static_cast(0u); - kerImgViewInfo.image = kerGpuImages->operator[](0u); - kerImgViewInfo.viewType = IGPUImageView::ET_2D; - kerImgViewInfo.format = kerImgViewInfo.image->getCreationParameters().format; - kerImgViewInfo.subresourceRange.aspectMask = static_cast(0u); - kerImgViewInfo.subresourceRange.baseMipLevel = 0; - kerImgViewInfo.subresourceRange.levelCount = kerImgViewInfo.image->getCreationParameters().mipLevels; - kerImgViewInfo.subresourceRange.baseArrayLayer = 0; - kerImgViewInfo.subresourceRange.layerCount = 1; - kerImageView = driver->createImageView(std::move(kerImgViewInfo)); - } - - // agree on formats - const E_FORMAT srcFormat = srcImageView->getCreationParameters().format; - uint32_t srcNumChannels = getFormatChannelCount(srcFormat); - uint32_t kerNumChannels = getFormatChannelCount(kerImageView->getCreationParameters().format); - //! OVERRIDE (we dont need alpha) - srcNumChannels = channelCountOverride; - kerNumChannels = channelCountOverride; - assert(srcNumChannels == kerNumChannels); // Just to make sure, because the other case is not handled in this example - - // Create Out Image - smart_refctd_ptr outImg; - smart_refctd_ptr outImgView; - { - auto dstImgViewInfo = srcImageView->getCreationParameters(); - - auto dstImgInfo = dstImgViewInfo.image->getCreationParameters(); - outImg = driver->createDeviceLocalGPUImageOnDedMem(std::move(dstImgInfo)); - - dstImgViewInfo.image = outImg; - outImgView = driver->createImageView(IGPUImageView::SCreationParams(dstImgViewInfo)); - } - - // input pipeline - auto imageFirstFFTPipelineLayout = [driver]() -> auto - { - IGPUDescriptorSetLayout::SBinding bnd[] = - { - { - 0u, - EDT_COMBINED_IMAGE_SAMPLER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 1u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - } - }; - - core::SRange pcRange = FFTClass::getDefaultPushConstantRanges(); - core::SRange bindings = {bnd,bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)}; - - return driver->createPipelineLayout( - pcRange.begin(),pcRange.end(), - driver->createDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr - ); - }(); - auto convolvePipelineLayout = [driver]() -> auto - { - IGPUSampler::SParams params = - { - { - ISampler::ETC_REPEAT, - ISampler::ETC_REPEAT, - ISampler::ETC_REPEAT, - ISampler::ETBC_FLOAT_OPAQUE_BLACK, - ISampler::ETF_LINEAR, // is it needed? - ISampler::ETF_LINEAR, - ISampler::ESMM_NEAREST, - 0u, - 0u, - ISampler::ECO_ALWAYS - } - }; - auto sampler = driver->createSampler(std::move(params)); - smart_refctd_ptr samplers[channelCountOverride]; - std::fill_n(samplers,channelCountOverride,sampler); - - IGPUDescriptorSetLayout::SBinding bnd[] = - { - { - 0u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 1u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 2u, - EDT_COMBINED_IMAGE_SAMPLER, - channelCountOverride, - ISpecializedShader::ESS_COMPUTE, - samplers - } - }; - - const asset::SPushConstantRange pcRange = {ISpecializedShader::ESS_COMPUTE,0u,sizeof(convolve_parameters_t)}; - core::SRange bindings = {bnd,bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)}; - - return driver->createPipelineLayout( - &pcRange,&pcRange+1, - driver->createDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr - ); - }(); - auto lastFFTPipelineLayout = [driver]() -> auto - { - IGPUDescriptorSetLayout::SBinding bnd[] = - { - { - 0u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 1u, - EDT_STORAGE_IMAGE, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - }; - - const asset::SPushConstantRange pcRange = {ISpecializedShader::ESS_COMPUTE,0u,sizeof(image_store_parameters_t)}; - core::SRange bindings = {bnd, bnd+sizeof(bnd)/sizeof(IGPUDescriptorSetLayout::SBinding)};; - - return driver->createPipelineLayout( - &pcRange,&pcRange+1, - driver->createDescriptorSetLayout(bindings.begin(),bindings.end()),nullptr,nullptr,nullptr - ); - }(); - - const float bloomRelativeScale = 0.25f; - const auto kerDim = kerImageView->getCreationParameters().image->getCreationParameters().extent; - const auto srcDim = srcImageView->getCreationParameters().image->getCreationParameters().extent; - const float bloomScale = core::min(float(srcDim.width)/float(kerDim.width),float(srcDim.height)/float(kerDim.height))*bloomRelativeScale; - if (bloomScale>1.f) - std::cout << "WARNING: Bloom Kernel will Clip and loose sharpness, increase resolution of bloom kernel!" << std::endl; - const auto marginSrcDim = [srcDim,kerDim,bloomScale]() -> auto - { - auto tmp = srcDim; - for (auto i=0u; i<3u; i++) - { - const auto coord = (&kerDim.width)[i]; - if (coord>1u) - (&tmp.width)[i] += core::max(coord*bloomScale,1u)-1u; - } - return tmp; - }(); - constexpr bool useHalfFloats = true; - // Allocate Output Buffer - auto fftOutputBuffer_0 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,marginSrcDim,srcNumChannels)); - auto fftOutputBuffer_1 = driver->createDeviceLocalGPUBufferOnDedMem(FFTClass::getOutputBufferSize(useHalfFloats,marginSrcDim,srcNumChannels)); - core::smart_refctd_ptr kernelNormalizedSpectrums[channelCountOverride]; - - auto updateDescriptorSet = [driver](video::IGPUDescriptorSet* set, core::smart_refctd_ptr inputImageDescriptor, asset::ISampler::E_TEXTURE_CLAMP textureWrap, core::smart_refctd_ptr outputBufferDescriptor) -> void - { - IGPUSampler::SParams params = - { - { - textureWrap, - textureWrap, - textureWrap, - ISampler::ETBC_FLOAT_OPAQUE_BLACK, - ISampler::ETF_LINEAR, - ISampler::ETF_LINEAR, - ISampler::ESMM_LINEAR, - 8u, - 0u, - ISampler::ECO_ALWAYS - } - }; - auto sampler = driver->createSampler(std::move(params)); - - constexpr auto kDescriptorCount = 2u; - video::IGPUDescriptorSet::SDescriptorInfo pInfos[kDescriptorCount]; - video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[kDescriptorCount]; - - for (auto i=0; i(0u); - - // Output Buffer - pWrites[1].binding = 1; - pWrites[1].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[1].count = 1; - pInfos[1].desc = outputBufferDescriptor; - pInfos[1].buffer.size = outputBufferDescriptor->getSize(); - pInfos[1].buffer.offset = 0u; - - driver->updateDescriptorSets(2u, pWrites, 0u, nullptr); - }; - - // Precompute Kernel FFT - { - const VkExtent3D paddedKerDim = FFTClass::padDimensions(kerDim); - - // create kernel spectrums - auto createKernelSpectrum = [&]() -> auto - { - video::IGPUImage::SCreationParams imageParams; - imageParams.flags = static_cast(0u); - imageParams.type = asset::IImage::ET_2D; - imageParams.format = useHalfFloats ? EF_R16G16_SFLOAT:EF_R32G32_SFLOAT; - imageParams.extent = { paddedKerDim.width,paddedKerDim.height,1u}; - imageParams.mipLevels = 1u; - imageParams.arrayLayers = 1u; - imageParams.samples = asset::IImage::ESCF_1_BIT; - - video::IGPUImageView::SCreationParams viewParams; - viewParams.flags = static_cast(0u); - viewParams.image = driver->createGPUImageOnDedMem(std::move(imageParams),driver->getDeviceLocalGPUMemoryReqs()); - viewParams.viewType = video::IGPUImageView::ET_2D; - viewParams.format = useHalfFloats ? EF_R16G16_SFLOAT:EF_R32G32_SFLOAT; - viewParams.components = {}; - viewParams.subresourceRange = {}; - viewParams.subresourceRange.levelCount = 1u; - viewParams.subresourceRange.layerCount = 1u; - return driver->createImageView(std::move(viewParams)); - }; - for (uint32_t i=0u; i fftPipeline_SSBOInput(core::make_smart_refctd_ptr(driver,0x1u<getDefaultPipeline()); - - // descriptor sets - core::smart_refctd_ptr fftDescriptorSet_Ker_FFT[2] = - { - driver->createDescriptorSet(core::smart_refctd_ptr(imageFirstFFTPipelineLayout->getDescriptorSetLayout(0u))), - driver->createDescriptorSet(core::smart_refctd_ptr(fftPipeline_SSBOInput->getLayout()->getDescriptorSetLayout(0u))) - }; - updateDescriptorSet(fftDescriptorSet_Ker_FFT[0].get(), kerImageView, ISampler::ETC_CLAMP_TO_BORDER, fftOutputBuffer_0); - FFTClass::updateDescriptorSet(driver,fftDescriptorSet_Ker_FFT[1].get(), fftOutputBuffer_0, fftOutputBuffer_1); - - // Normalization of FFT spectrum - struct NormalizationPushConstants - { - ext::FFT::uvec4 stride; - ext::FFT::uvec4 bitreverse_shift; - }; - auto fftPipelineLayout_KernelNormalization = [&]() -> auto - { - IGPUDescriptorSetLayout::SBinding bnd[] = - { - { - 0u, - EDT_STORAGE_BUFFER, - 1u, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - { - 1u, - EDT_STORAGE_IMAGE, - channelCountOverride, - ISpecializedShader::ESS_COMPUTE, - nullptr - }, - }; - SPushConstantRange pc_rng; - pc_rng.offset = 0u; - pc_rng.size = sizeof(NormalizationPushConstants); - pc_rng.stageFlags = ISpecializedShader::ESS_COMPUTE; - return driver->createPipelineLayout( - &pc_rng,&pc_rng+1u, - driver->createDescriptorSetLayout(bnd,bnd+2),nullptr,nullptr,nullptr - ); - }(); - auto fftDescriptorSet_KernelNormalization = [&]() -> auto - { - auto dset = driver->createDescriptorSet(core::smart_refctd_ptr(fftPipelineLayout_KernelNormalization->getDescriptorSetLayout(0u))); - - video::IGPUDescriptorSet::SDescriptorInfo pInfos[1+channelCountOverride]; - video::IGPUDescriptorSet::SWriteDescriptorSet pWrites[2]; - - for (auto i = 0; i < 2; i++) - { - pWrites[i].dstSet = dset.get(); - pWrites[i].arrayElement = 0u; - pWrites[i].count = 1u; - pWrites[i].info = pInfos + i; - } - - // In Buffer - pWrites[0].binding = 0; - pWrites[0].descriptorType = asset::EDT_STORAGE_BUFFER; - pWrites[0].count = 1; - pInfos[0].desc = fftOutputBuffer_1; - pInfos[0].buffer.size = fftOutputBuffer_1->getSize(); - pInfos[0].buffer.offset = 0u; - - // Out Buffer - pWrites[1].binding = 1; - pWrites[1].descriptorType = asset::EDT_STORAGE_IMAGE; - pWrites[1].count = channelCountOverride; - for (uint32_t i=0u; iupdateDescriptorSets(2u, pWrites, 0u, nullptr); - return dset; - }(); - - // Ker Image First Axis FFT - { - auto fftPipeline_ImageInput = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(imageFirstFFTPipelineLayout),createShader(driver,0x1u<bindComputePipeline(fftPipeline_ImageInput.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, imageFirstFFTPipelineLayout.get(), 0u, 1u, &fftDescriptorSet_Ker_FFT[0].get(), nullptr); - FFTClass::dispatchHelper(driver, imageFirstFFTPipelineLayout.get(), fftPushConstants[0], fftDispatchInfo[0]); - } - - // Ker Image Last Axis FFT - driver->bindComputePipeline(fftPipeline_SSBOInput.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, fftPipeline_SSBOInput->getLayout(), 0u, 1u, &fftDescriptorSet_Ker_FFT[1].get(), nullptr); - FFTClass::dispatchHelper(driver, fftPipeline_SSBOInput->getLayout(), fftPushConstants[1], fftDispatchInfo[1]); - - // Ker Normalization - auto fftPipeline_KernelNormalization = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(fftPipelineLayout_KernelNormalization),createShader(driver,0xdeadbeefu,useHalfFloats,"../normalization.comp")); - driver->bindComputePipeline(fftPipeline_KernelNormalization.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, fftPipelineLayout_KernelNormalization.get(), 0u, 1u, &fftDescriptorSet_KernelNormalization.get(), nullptr); - { - NormalizationPushConstants normalizationPC; - normalizationPC.stride = fftPushConstants[1].output_strides; - normalizationPC.bitreverse_shift.x = 32-core::findMSB(paddedKerDim.width); - normalizationPC.bitreverse_shift.y = 32-core::findMSB(paddedKerDim.height); - normalizationPC.bitreverse_shift.z = 0; - driver->pushConstants(fftPipelineLayout_KernelNormalization.get(),ICPUSpecializedShader::ESS_COMPUTE,0u,sizeof(normalizationPC),&normalizationPC); - } - { - const uint32_t dispatchSizeX = (paddedKerDim.width-1u)/16u+1u; - const uint32_t dispatchSizeY = (paddedKerDim.height-1u)/16u+1u; - driver->dispatch(dispatchSizeX,dispatchSizeY,kerNumChannels); - FFTClass::defaultBarrier(); - } - } - - FFTClass::Parameters_t fftPushConstants[3]; - FFTClass::DispatchInfo_t fftDispatchInfo[3]; - const ISampler::E_TEXTURE_CLAMP fftPadding[2] = {ISampler::ETC_MIRROR,ISampler::ETC_MIRROR}; - const auto passes = FFTClass::buildParameters(false,srcNumChannels,srcDim,fftPushConstants,fftDispatchInfo,fftPadding,marginSrcDim); - { - // override for less work and storage (dont need to store the extra padding of the last axis after iFFT) - fftPushConstants[1].output_strides.x = fftPushConstants[0].input_strides.x; - fftPushConstants[1].output_strides.y = fftPushConstants[0].input_strides.y; - fftPushConstants[1].output_strides.z = fftPushConstants[1].input_strides.z; - fftPushConstants[1].output_strides.w = fftPushConstants[1].input_strides.w; - // iFFT - fftPushConstants[2].input_dimensions = fftPushConstants[1].input_dimensions; - { - fftPushConstants[2].input_dimensions.w = fftPushConstants[0].input_dimensions.w^0x80000000u; - fftPushConstants[2].input_strides = fftPushConstants[1].output_strides; - fftPushConstants[2].output_strides = fftPushConstants[0].input_strides; - } - fftDispatchInfo[2] = fftDispatchInfo[0]; - } - assert(passes==2); - // pipelines - auto fftPipeline_ImageInput = driver->createComputePipeline(nullptr,core::smart_refctd_ptr(imageFirstFFTPipelineLayout),createShader(driver,0x1u<createComputePipeline(nullptr, std::move(convolvePipelineLayout), createShader(driver,0x1u<createComputePipeline(nullptr, std::move(lastFFTPipelineLayout), createShader(driver,0x1u<createDescriptorSet(core::smart_refctd_ptr(imageFirstFFTPipelineLayout->getDescriptorSetLayout(0u))); - updateDescriptorSet(fftDescriptorSet_Src_FirstFFT.get(), srcImageView, ISampler::ETC_MIRROR, fftOutputBuffer_0); - - // Convolution - auto convolveDescriptorSet = driver->createDescriptorSet(core::smart_refctd_ptr(convolvePipeline->getLayout()->getDescriptorSetLayout(0u))); - updateDescriptorSet_Convolution(driver, convolveDescriptorSet.get(), fftOutputBuffer_0, fftOutputBuffer_1, kernelNormalizedSpectrums); - - // Last Axis IFFT - auto lastFFTDescriptorSet = driver->createDescriptorSet(core::smart_refctd_ptr(lastFFTPipeline->getLayout()->getDescriptorSetLayout(0u))); - updateDescriptorSet_LastFFT(driver, lastFFTDescriptorSet.get(), fftOutputBuffer_1, outImgView); - - uint32_t outBufferIx = 0u; - auto lastPresentStamp = std::chrono::high_resolution_clock::now(); - bool savedToFile = false; - - auto downloadStagingArea = driver->getDefaultDownStreamingBuffer(); - - auto blitFBO = driver->addFrameBuffer(); - blitFBO->attach(video::EFAP_COLOR_ATTACHMENT0, std::move(outImgView)); - - while (device->run() && receiver.keepOpen()) - { - driver->beginScene(false, false); - - // Src Image First Axis FFT - driver->bindComputePipeline(fftPipeline_ImageInput.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, imageFirstFFTPipelineLayout.get(), 0u, 1u, &fftDescriptorSet_Src_FirstFFT.get(), nullptr); - FFTClass::dispatchHelper(driver, imageFirstFFTPipelineLayout.get(), fftPushConstants[0], fftDispatchInfo[0]); - - // Src Image Last Axis FFT + Convolution + Convolved Last Axis IFFT Y - driver->bindComputePipeline(convolvePipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, convolvePipeline->getLayout(), 0u, 1u, &convolveDescriptorSet.get(), nullptr); - { - const auto& kernelImgExtent = kernelNormalizedSpectrums[0]->getCreationParameters().image->getCreationParameters().extent; - vec2 kernel_half_pixel_size{0.5f,0.5f}; - kernel_half_pixel_size.x /= kernelImgExtent.width; - kernel_half_pixel_size.y /= kernelImgExtent.height; - driver->pushConstants(convolvePipeline->getLayout(),ISpecializedShader::ESS_COMPUTE,offsetof(convolve_parameters_t,kernel_half_pixel_size),sizeof(convolve_parameters_t::kernel_half_pixel_size),&kernel_half_pixel_size); - } - FFTClass::dispatchHelper(driver, convolvePipeline->getLayout(), fftPushConstants[1], fftDispatchInfo[1]); - - // Last FFT Padding and Copy to GPU Image - driver->bindComputePipeline(lastFFTPipeline.get()); - driver->bindDescriptorSets(EPBP_COMPUTE, lastFFTPipeline->getLayout(), 0u, 1u, &lastFFTDescriptorSet.get(), nullptr); - { - const auto paddedSrcDim = FFTClass::padDimensions(marginSrcDim); - ivec2 unpad_offset = { 0,0 }; - for (auto i=0u; i<2u; i++) - if (fftDispatchInfo[2].workGroupCount[i]==1u) - (&unpad_offset.x)[i] = ((&paddedSrcDim.width)[i]-(&srcDim.width)[i])>>1u; - driver->pushConstants(lastFFTPipeline->getLayout(),ISpecializedShader::ESS_COMPUTE,offsetof(image_store_parameters_t,unpad_offset),sizeof(image_store_parameters_t::unpad_offset),&unpad_offset); - } - FFTClass::dispatchHelper(driver, lastFFTPipeline->getLayout(), fftPushConstants[2], fftDispatchInfo[2]); - - if(!savedToFile) - { - savedToFile = true; - - core::smart_refctd_ptr imageView; - const uint32_t colorBufferBytesize = srcDim.height * srcDim.width * asset::getTexelOrBlockBytesize(srcFormat); - - // create image - ICPUImage::SCreationParams imgParams; - imgParams.flags = static_cast(0u); // no flags - imgParams.type = ICPUImage::ET_2D; - imgParams.format = srcFormat; - imgParams.extent = srcDim; - imgParams.mipLevels = 1u; - imgParams.arrayLayers = 1u; - imgParams.samples = ICPUImage::ESCF_1_BIT; - auto image = ICPUImage::create(std::move(imgParams)); - - constexpr uint64_t timeoutInNanoSeconds = 300000000000u; - const auto waitPoint = std::chrono::high_resolution_clock::now()+std::chrono::nanoseconds(timeoutInNanoSeconds); - - uint32_t address = std::remove_pointer::type::invalid_address; // remember without initializing the address to be allocated to invalid_address you won't get an allocation! - const uint32_t alignment = 4096u; // common page size - auto unallocatedSize = downloadStagingArea->multi_alloc(waitPoint, 1u, &address, &colorBufferBytesize, &alignment); - if (unallocatedSize) - { - os::Printer::log("Could not download the buffer from the GPU!", ELL_ERROR); - } - - // set up regions - auto regions = core::make_refctd_dynamic_array >(1u); - { - auto& region = regions->front(); - - region.bufferOffset = 0u; - region.bufferRowLength = 0u; - region.bufferImageHeight = 0u; - //region.imageSubresource.aspectMask = wait for Vulkan; - region.imageSubresource.mipLevel = 0u; - region.imageSubresource.baseArrayLayer = 0u; - region.imageSubresource.layerCount = 1u; - region.imageOffset = { 0u,0u,0u }; - region.imageExtent = imgParams.extent; - } - - driver->copyImageToBuffer(outImg.get(), downloadStagingArea->getBuffer(), 1, ®ions->front()); - - auto downloadFence = driver->placeFence(true); - - auto* data = reinterpret_cast(downloadStagingArea->getBufferPointer()) + address; - auto cpubufferalias = core::make_smart_refctd_ptr > >(colorBufferBytesize, data, core::adopt_memory); - image->setBufferAndRegions(std::move(cpubufferalias),regions); - - // wait for download fence and then invalidate the CPU cache - { - auto result = downloadFence->waitCPU(timeoutInNanoSeconds,true); - if (result==E_DRIVER_FENCE_RETVAL::EDFR_TIMEOUT_EXPIRED||result==E_DRIVER_FENCE_RETVAL::EDFR_FAIL) - { - os::Printer::log("Could not download the buffer from the GPU, fence not signalled!", ELL_ERROR); - downloadStagingArea->multi_free(1u, &address, &colorBufferBytesize, nullptr); - continue; - } - if (downloadStagingArea->needsManualFlushOrInvalidate()) - driver->invalidateMappedMemoryRanges({{downloadStagingArea->getBuffer()->getBoundMemory(),address,colorBufferBytesize}}); - } - - // create image view - ICPUImageView::SCreationParams imgViewParams; - imgViewParams.flags = static_cast(0u); - imgViewParams.format = image->getCreationParameters().format; - imgViewParams.image = std::move(image); - imgViewParams.viewType = ICPUImageView::ET_2D; - imgViewParams.subresourceRange = {static_cast(0u),0u,1u,0u,1u}; - imageView = ICPUImageView::create(std::move(imgViewParams)); - - IAssetWriter::SAssetWriteParams wp(imageView.get()); - volatile bool success = am->writeAsset("convolved_exr.exr", wp); - assert(success); - } - - driver->blitRenderTargets(blitFBO, nullptr, false, false); - - driver->endScene(); - } - - return 0; -} \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/normalization.comp b/old_to_refactor/49_ComputeFFT/normalization.comp deleted file mode 100644 index b3926090d..000000000 --- a/old_to_refactor/49_ComputeFFT/normalization.comp +++ /dev/null @@ -1,34 +0,0 @@ -layout(local_size_x=16, local_size_y=16, local_size_z=1) in; - -#include - -layout(set=0, binding=0) restrict readonly buffer InBuffer -{ - nbl_glsl_ext_FFT_storage_t in_data[]; -}; - -layout(set=0, binding=1, rg16f) uniform image2D NormalizedKernel[3]; - -layout(push_constant) uniform PushConstants -{ - uvec4 strides; - uvec4 bitreverse_shift; -} pc; - -#include - -void main() -{ - nbl_glsl_complex value = nbl_glsl_ext_FFT_storage_t_get(in_data[nbl_glsl_dot(gl_GlobalInvocationID,pc.strides.xyz)]); - - // imaginary component will be 0, image shall be positive - vec3 avg; - for (uint i=0u; i<3u; i++) - avg[i] = nbl_glsl_ext_FFT_storage_t_get(in_data[pc.strides.z*i]).x; - const float power = (nbl_glsl_scRGBtoXYZ*avg).y; - - const uvec2 coord = bitfieldReverse(gl_GlobalInvocationID.xy)>>pc.bitreverse_shift.xy; - const nbl_glsl_complex shift = nbl_glsl_expImaginary(-nbl_glsl_PI*float(coord.x+coord.y)); - value = nbl_glsl_complex_mul(value,shift)/power; - imageStore(NormalizedKernel[gl_WorkGroupID.z],ivec2(coord),vec4(value,0.0,0.0)); -} \ No newline at end of file diff --git a/old_to_refactor/49_ComputeFFT/pipeline.groovy b/old_to_refactor/49_ComputeFFT/pipeline.groovy deleted file mode 100644 index 64874da2a..000000000 --- a/old_to_refactor/49_ComputeFFT/pipeline.groovy +++ /dev/null @@ -1,50 +0,0 @@ -import org.DevshGraphicsProgramming.Agent -import org.DevshGraphicsProgramming.BuilderInfo -import org.DevshGraphicsProgramming.IBuilder - -class CComputeFFTBuilder extends IBuilder -{ - public CComputeFFTBuilder(Agent _agent, _info) - { - super(_agent, _info) - } - - @Override - public boolean prepare(Map axisMapping) - { - return true - } - - @Override - public boolean build(Map axisMapping) - { - IBuilder.CONFIGURATION config = axisMapping.get("CONFIGURATION") - IBuilder.BUILD_TYPE buildType = axisMapping.get("BUILD_TYPE") - - def nameOfBuildDirectory = getNameOfBuildDirectory(buildType) - def nameOfConfig = getNameOfConfig(config) - - agent.execute("cmake --build ${info.rootProjectPath}/${nameOfBuildDirectory}/${info.targetProjectPathRelativeToRoot} --target ${info.targetBaseName} --config ${nameOfConfig} -j12 -v") - - return true - } - - @Override - public boolean test(Map axisMapping) - { - return true - } - - @Override - public boolean install(Map axisMapping) - { - return true - } -} - -def create(Agent _agent, _info) -{ - return new CComputeFFTBuilder(_agent, _info) -} - -return this \ No newline at end of file diff --git a/60_ClusteredRendering/CMakeLists.txt b/old_to_refactor/60_ClusteredRendering/CMakeLists.txt similarity index 100% rename from 60_ClusteredRendering/CMakeLists.txt rename to old_to_refactor/60_ClusteredRendering/CMakeLists.txt diff --git a/42_FragmentShaderPathTracer/config.json.template b/old_to_refactor/60_ClusteredRendering/config.json.template similarity index 100% rename from 42_FragmentShaderPathTracer/config.json.template rename to old_to_refactor/60_ClusteredRendering/config.json.template diff --git a/60_ClusteredRendering/main.cpp b/old_to_refactor/60_ClusteredRendering/main.cpp similarity index 100% rename from 60_ClusteredRendering/main.cpp rename to old_to_refactor/60_ClusteredRendering/main.cpp diff --git a/60_ClusteredRendering/pipeline.groovy b/old_to_refactor/60_ClusteredRendering/pipeline.groovy similarity index 100% rename from 60_ClusteredRendering/pipeline.groovy rename to old_to_refactor/60_ClusteredRendering/pipeline.groovy