diff --git a/Include/NRD.h b/Include/NRD.h index 3907d6d2..2a3cfe9e 100644 --- a/Include/NRD.h +++ b/Include/NRD.h @@ -28,9 +28,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include #define NRD_VERSION_MAJOR 4 -#define NRD_VERSION_MINOR 9 -#define NRD_VERSION_BUILD 4 -#define NRD_VERSION_DATE "20 September 2024" +#define NRD_VERSION_MINOR 10 +#define NRD_VERSION_BUILD 0 +#define NRD_VERSION_DATE "9 October 2024" #if defined(_MSC_VER) #define NRD_CALL __fastcall diff --git a/Include/NRDDescs.h b/Include/NRDDescs.h index 1b93046d..41f8e907 100644 --- a/Include/NRDDescs.h +++ b/Include/NRDDescs.h @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #pragma once #define NRD_DESCS_VERSION_MAJOR 4 -#define NRD_DESCS_VERSION_MINOR 9 +#define NRD_DESCS_VERSION_MINOR 10 static_assert(NRD_VERSION_MAJOR == NRD_DESCS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_DESCS_VERSION_MINOR, "Please, update all NRD SDK files"); diff --git a/Include/NRDSettings.h b/Include/NRDSettings.h index c9cf1551..59e9e99f 100644 --- a/Include/NRDSettings.h +++ b/Include/NRDSettings.h @@ -11,7 +11,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #pragma once #define NRD_SETTINGS_VERSION_MAJOR 4 -#define NRD_SETTINGS_VERSION_MINOR 9 +#define NRD_SETTINGS_VERSION_MINOR 10 static_assert(NRD_VERSION_MAJOR == NRD_SETTINGS_VERSION_MAJOR && NRD_VERSION_MINOR == NRD_SETTINGS_VERSION_MINOR, "Please, update all NRD SDK files"); diff --git a/Integration/NRDIntegration.h b/Integration/NRDIntegration.h index ef09d654..386500ba 100644 --- a/Integration/NRDIntegration.h +++ b/Integration/NRDIntegration.h @@ -23,10 +23,11 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include #define NRD_INTEGRATION_MAJOR 1 -#define NRD_INTEGRATION_MINOR 12 -#define NRD_INTEGRATION_DATE "17 April 2024" +#define NRD_INTEGRATION_MINOR 13 +#define NRD_INTEGRATION_DATE "7 October 2024" #define NRD_INTEGRATION 1 +// Debugging #define NRD_INTEGRATION_DEBUG_LOGGING 0 #ifndef NRD_INTEGRATION_ASSERT @@ -34,56 +35,70 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define NRD_INTEGRATION_ASSERT(expr, msg) assert(msg && expr) #endif -// "state->texture" represents the resource, the rest represents the state. -struct NrdIntegrationTexture +#define NRD_INTEGRATION_ABORT_ON_FAILURE(result) if ((result) != nri::Result::SUCCESS) NRD_INTEGRATION_ASSERT(false, "Abort on failure!") + +namespace nrd { - nri::TextureBarrierDesc* state; - nri::Format format; -}; -typedef std::array NrdUserPool; +// "TextureBarrierDesc::texture" represents the resource, the rest represents the state +typedef std::array UserPool; -// User pool must contain valid entries only for resources, which are required for requested denoisers, but -// the entire pool must be zero-ed during initialization -inline void NrdIntegration_SetResource(NrdUserPool& pool, nrd::ResourceType slot, const NrdIntegrationTexture& texture) +// User pool must contain valid entries for resources, which are required for requested denoisers, +// but the entire pool must be zero-ed during initialization +inline void Integration_SetResource(UserPool& pool, ResourceType slot, nri::TextureBarrierDesc* texture) { - NRD_INTEGRATION_ASSERT( texture.state->texture != nullptr, "Invalid texture!" ); - NRD_INTEGRATION_ASSERT( texture.format != nri::Format::UNKNOWN, "Invalid format!" ); + NRD_INTEGRATION_ASSERT(texture != nullptr, "Invalid texture!"); pool[(size_t)slot] = texture; } -class NrdIntegration +struct IntegrationCreationDesc +{ + // Not so long name + const char* name = ""; + + // Resource dimensions + uint16_t resourceWidth = 0; + uint16_t resourceHeight = 0; + + // (1-3) the application must provide number of buffered frames, it's needed to guarantee + // that constant data and descriptor sets are not overwritten while being executed on the GPU + uint8_t bufferedFramesNum = 2; + + // true - enables descriptor caching for the whole lifetime of an Integration instance + // false - descriptors are cached only within a single "Denoise" call + bool enableDescriptorCaching = false; + + // Demote FP32 to FP16 (slightly improves performance in exchange of precision loss) + // (FP32 is used only for viewZ under the hood, all denoisers are FP16 compatible) + bool demoteFloat32to16 = false; + + // Promote FP16 to FP32 (overkill, kills performance) + bool promoteFloat16to32 = false; +}; + +class Integration { public: - // bufferedFramesNum (usually 2-3 frames): - // The application must provide number of buffered frames, it's needed to guarantee that - // constant data and descriptor sets are not overwritten while being executed on the GPU. - // enableDescriptorCaching: - // true - enables descriptor caching for the whole lifetime of an NrdIntegration instance - // false - descriptors are cached only within a single "Denoise" call - NrdIntegration(uint32_t bufferedFramesNum, bool enableDescriptorCaching, const char* persistentName = "") : - m_Name(persistentName) - , m_BufferedFramesNum(bufferedFramesNum) - , m_IsDescriptorCachingEnabled(enableDescriptorCaching) + inline Integration() {} - ~NrdIntegration() - { NRD_INTEGRATION_ASSERT( m_NRI == nullptr, "m_NRI must be NULL at this point!" ); } + inline ~Integration() + { NRD_INTEGRATION_ASSERT(m_NRI == nullptr, "m_NRI must be NULL at this point!"); } // There is no "Resize" functionality, because NRD full recreation costs nothing. // The main cost comes from render targets resizing, which needs to be done in any case // (call Destroy beforehand) - bool Initialize(uint16_t resourceWidth, uint16_t resourceHeight, const nrd::InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper); + bool Initialize(const IntegrationCreationDesc& nrdIntegrationDesc, const InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper); // Must be called once on a frame start void NewFrame(); // Explicitly calls eponymous NRD API functions - bool SetCommonSettings(const nrd::CommonSettings& commonSettings); - bool SetDenoiserSettings(nrd::Identifier denoiser, const void* denoiserSettings); + bool SetCommonSettings(const CommonSettings& commonSettings); + bool SetDenoiserSettings(Identifier denoiser, const void* denoiserSettings); - void Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool); + void Denoise(const Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const UserPool& userPool); // This function assumes that the device is in the IDLE state, i.e. there is no work in flight void Destroy(); @@ -102,17 +117,16 @@ class NrdIntegration { return double(m_TransientPoolSize) / (1024.0 * 1024.0); } private: - NrdIntegration(const NrdIntegration&) = delete; + Integration(const Integration&) = delete; void CreateResources(uint16_t resourceWidth, uint16_t resourceHeight); void AllocateAndBindMemory(); - void Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool); + void Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const DispatchDesc& dispatchDesc, const UserPool& userPool); private: - std::vector m_TexturePool; + std::vector m_TexturePool; std::map m_CachedDescriptors; std::vector> m_DescriptorsInFlight; - std::vector m_ResourceState; std::vector m_PipelineLayouts; std::vector m_Pipelines; std::vector m_MemoryAllocations; @@ -124,18 +138,20 @@ class NrdIntegration nri::Device* m_Device = nullptr; nri::Buffer* m_ConstantBuffer = nullptr; nri::Descriptor* m_ConstantBufferView = nullptr; - nrd::Instance* m_Instance = nullptr; - const char* m_Name = nullptr; + Instance* m_Instance = nullptr; uint64_t m_PermanentPoolSize = 0; uint64_t m_TransientPoolSize = 0; uint64_t m_ConstantBufferSize = 0; uint32_t m_ConstantBufferViewSize = 0; uint32_t m_ConstantBufferOffset = 0; - uint32_t m_BufferedFramesNum = 0; uint32_t m_DescriptorPoolIndex = 0; uint32_t m_FrameIndex = 0; - bool m_IsShadersReloadRequested = false; - bool m_IsDescriptorCachingEnabled = false; + uint8_t m_BufferedFramesNum = 0; + char m_Name[32] = {}; + bool m_ReloadShaders = false; + bool m_EnableDescriptorCaching = false; + bool m_DemoteFloat32to16 = false; + bool m_PromoteFloat16to32 = false; }; -#define NRD_INTEGRATION_ABORT_ON_FAILURE(result) if ((result) != nri::Result::SUCCESS) NRD_INTEGRATION_ASSERT(false, "Abort on failure!") +} diff --git a/Integration/NRDIntegration.hpp b/Integration/NRDIntegration.hpp index f1e87c0a..df3f13ae 100644 --- a/Integration/NRDIntegration.hpp +++ b/Integration/NRDIntegration.hpp @@ -10,14 +10,20 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #include "NRDIntegration.h" -static_assert(NRD_VERSION_MAJOR >= 4 && NRD_VERSION_MINOR >= 9, "Unsupported NRD version!"); -static_assert(NRI_VERSION_MAJOR >= 1 && NRI_VERSION_MINOR >= 151, "Unsupported NRI version!"); - +#include // strncpy #ifdef _WIN32 - #define alloca _alloca + #include +#else + #include #endif -constexpr std::array g_NRD_NrdToNriFormat = +static_assert(NRD_VERSION_MAJOR >= 4 && NRD_VERSION_MINOR >= 10, "Unsupported NRD version!"); +static_assert(NRI_VERSION_MAJOR >= 1 && NRI_VERSION_MINOR >= 152, "Unsupported NRI version!"); + +namespace nrd +{ + +constexpr std::array g_NrdFormatToNri = { nri::Format::R8_UNORM, nri::Format::R8_SNORM, @@ -65,13 +71,13 @@ constexpr std::array g_NRD_NrdToNriFo nri::Format::R9_G9_B9_E5_UFLOAT, }; -static inline uint16_t NRD_DivideUp(uint32_t x, uint16_t y) +static inline uint16_t DivideUp(uint32_t x, uint16_t y) { return uint16_t((x + y - 1) / y); } -static inline nri::Format NRD_GetNriFormat(nrd::Format format) -{ return g_NRD_NrdToNriFormat[(uint32_t)format]; } +static inline nri::Format GetNriFormat(Format format) +{ return g_NrdFormatToNri[(uint32_t)format]; } -static inline uint64_t NRD_CreateDescriptorKey(uint64_t texture, bool isStorage) +static inline uint64_t CreateDescriptorKey(uint64_t texture, bool isStorage) { uint64_t key = uint64_t(isStorage ? 1 : 0) << 63ull; key |= texture & ((1ull << 63ull) - 1); @@ -79,43 +85,52 @@ static inline uint64_t NRD_CreateDescriptorKey(uint64_t texture, bool isStorage) return key; } -template constexpr T NRD_GetAlignedSize(const T& size, A alignment) +template constexpr T GetAlignedSize(const T& size, A alignment) { return T(((size + alignment - 1) / alignment) * alignment); } -bool NrdIntegration::Initialize(uint16_t resourceWidth, uint16_t resourceHeight, const nrd::InstanceCreationDesc& instanceCreationDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper) +bool Integration::Initialize(const IntegrationCreationDesc& integrationDesc, const InstanceCreationDesc& instanceDesc, nri::Device& nriDevice, const nri::CoreInterface& nriCore, const nri::HelperInterface& nriHelper) { NRD_INTEGRATION_ASSERT(!m_Instance, "Already initialized! Did you forget to call 'Destroy'?"); + NRD_INTEGRATION_ASSERT(!integrationDesc.promoteFloat16to32 || !integrationDesc.demoteFloat32to16, "Can't be 'true' both"); const nri::DeviceDesc& deviceDesc = nriCore.GetDeviceDesc(nriDevice); if (deviceDesc.nriVersionMajor != NRI_VERSION_MAJOR || deviceDesc.nriVersionMinor != NRI_VERSION_MINOR) { NRD_INTEGRATION_ASSERT(false, "NRI version mismatch detected!"); + return false; } - const nrd::LibraryDesc& libraryDesc = nrd::GetLibraryDesc(); + const LibraryDesc& libraryDesc = GetLibraryDesc(); if (libraryDesc.versionMajor != NRD_VERSION_MAJOR || libraryDesc.versionMinor != NRD_VERSION_MINOR) { NRD_INTEGRATION_ASSERT(false, "NRD version mismatch detected!"); + return false; } - if (nrd::CreateInstance(instanceCreationDesc, m_Instance) != nrd::Result::SUCCESS) + if (CreateInstance(instanceDesc, m_Instance) != Result::SUCCESS) return false; + m_BufferedFramesNum = integrationDesc.bufferedFramesNum; + m_EnableDescriptorCaching = integrationDesc.enableDescriptorCaching; + m_PromoteFloat16to32 = integrationDesc.promoteFloat16to32; + m_DemoteFloat32to16 = integrationDesc.demoteFloat32to16; m_Device = &nriDevice; m_NRI = &nriCore; m_NRIHelper = &nriHelper; + strncpy(m_Name, integrationDesc.name, sizeof(m_Name)); + CreatePipelines(); - CreateResources(resourceWidth, resourceHeight); + CreateResources(integrationDesc.resourceWidth, integrationDesc.resourceHeight); return true; } -void NrdIntegration::CreatePipelines() +void Integration::CreatePipelines() { // Assuming that the device is in IDLE state for (nri::Pipeline* pipeline : m_Pipelines) @@ -126,7 +141,7 @@ void NrdIntegration::CreatePipelines() utils::ShaderCodeStorage shaderCodeStorage; #endif - const nrd::InstanceDesc& instanceDesc = nrd::GetInstanceDesc(*m_Instance); + const InstanceDesc& instanceDesc = GetInstanceDesc(*m_Instance); const nri::DeviceDesc& deviceDesc = m_NRI->GetDeviceDesc(*m_Device); uint32_t constantBufferOffset = 0; @@ -135,7 +150,7 @@ void NrdIntegration::CreatePipelines() uint32_t storageTextureAndBufferOffset = 0; if (m_NRI->GetDeviceDesc(*m_Device).graphicsAPI == nri::GraphicsAPI::VK) { - const nrd::LibraryDesc& nrdLibraryDesc = nrd::GetLibraryDesc(); + const LibraryDesc& nrdLibraryDesc = GetLibraryDesc(); constantBufferOffset = nrdLibraryDesc.spirvBindingOffsets.constantBufferOffset; samplerOffset = nrdLibraryDesc.spirvBindingOffsets.samplerOffset; textureOffset = nrdLibraryDesc.spirvBindingOffsets.textureOffset; @@ -163,7 +178,7 @@ void NrdIntegration::CreatePipelines() uint32_t resourceRangesNum = 0; for (uint32_t i = 0; i < instanceDesc.pipelinesNum; i++) { - const nrd::PipelineDesc& nrdPipelineDesc = instanceDesc.pipelines[i]; + const PipelineDesc& nrdPipelineDesc = instanceDesc.pipelines[i]; resourceRangesNum = std::max(resourceRangesNum, nrdPipelineDesc.resourceRangesNum); } resourceRangesNum += 1; // samplers @@ -187,15 +202,15 @@ void NrdIntegration::CreatePipelines() // Pipelines for (uint32_t i = 0; i < instanceDesc.pipelinesNum; i++) { - const nrd::PipelineDesc& nrdPipelineDesc = instanceDesc.pipelines[i]; - const nrd::ComputeShaderDesc& nrdComputeShader = (&nrdPipelineDesc.computeShaderDXBC)[std::max((int32_t)deviceDesc.graphicsAPI - 1, 0)]; + const PipelineDesc& nrdPipelineDesc = instanceDesc.pipelines[i]; + const ComputeShaderDesc& nrdComputeShader = (&nrdPipelineDesc.computeShaderDXBC)[std::max((int32_t)deviceDesc.graphicsAPI - 1, 0)]; // Resources for (uint32_t j = 0; j < nrdPipelineDesc.resourceRangesNum; j++) { - const nrd::ResourceRangeDesc& nrdResourceRange = nrdPipelineDesc.resourceRanges[j]; + const ResourceRangeDesc& nrdResourceRange = nrdPipelineDesc.resourceRanges[j]; - if (nrdResourceRange.descriptorType == nrd::DescriptorType::TEXTURE) + if (nrdResourceRange.descriptorType == DescriptorType::TEXTURE) { resourcesRanges[j].baseRegisterIndex = textureOffset + nrdResourceRange.baseRegisterIndex; resourcesRanges[j].descriptorType = nri::DescriptorType::TEXTURE; @@ -241,7 +256,7 @@ void NrdIntegration::CreatePipelines() // Pipeline nri::ShaderDesc computeShader = {}; #ifdef PROJECT_NAME - if (nrdComputeShader.bytecode && !m_IsShadersReloadRequested) + if (nrdComputeShader.bytecode && !m_ReloadShaders) { #endif computeShader.bytecode = nrdComputeShader.bytecode; @@ -263,30 +278,48 @@ void NrdIntegration::CreatePipelines() m_Pipelines.push_back(pipeline); } - m_IsShadersReloadRequested = true; + m_ReloadShaders = true; } -void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHeight) +void Integration::CreateResources(uint16_t resourceWidth, uint16_t resourceHeight) { - const nrd::InstanceDesc& instanceDesc = nrd::GetInstanceDesc(*m_Instance); + const InstanceDesc& instanceDesc = GetInstanceDesc(*m_Instance); const uint32_t poolSize = instanceDesc.permanentPoolSize + instanceDesc.transientPoolSize; - m_ResourceState.resize(poolSize); // No reallocation! - m_TexturePool.resize(poolSize); + m_TexturePool.resize(poolSize); // No reallocation! // Texture pool for (uint32_t i = 0; i < poolSize; i++) { // Create NRI texture - const nrd::TextureDesc& nrdTextureDesc = (i < instanceDesc.permanentPoolSize) ? instanceDesc.permanentPool[i] : instanceDesc.transientPool[i - instanceDesc.permanentPoolSize]; - const nri::Format format = NRD_GetNriFormat(nrdTextureDesc.format); + const TextureDesc& nrdTextureDesc = (i < instanceDesc.permanentPoolSize) ? instanceDesc.permanentPool[i] : instanceDesc.transientPool[i - instanceDesc.permanentPoolSize]; - uint16_t w = NRD_DivideUp(resourceWidth, nrdTextureDesc.downsampleFactor); - uint16_t h = NRD_DivideUp(resourceHeight, nrdTextureDesc.downsampleFactor); + nri::Format format = GetNriFormat(nrdTextureDesc.format); + if (m_PromoteFloat16to32) + { + if (format == nri::Format::R16_SFLOAT) + format = nri::Format::R32_SFLOAT; + else if (format == nri::Format::RG16_SFLOAT) + format = nri::Format::RG32_SFLOAT; + else if (format == nri::Format::RGBA16_SFLOAT) + format = nri::Format::RGBA32_SFLOAT; + } + else if (m_DemoteFloat32to16) + { + if (format == nri::Format::R32_SFLOAT) + format = nri::Format::R16_SFLOAT; + else if (format == nri::Format::RG32_SFLOAT) + format = nri::Format::RG16_SFLOAT; + else if (format == nri::Format::RGBA32_SFLOAT) + format = nri::Format::RGBA16_SFLOAT; + } + + uint16_t w = DivideUp(resourceWidth, nrdTextureDesc.downsampleFactor); + uint16_t h = DivideUp(resourceHeight, nrdTextureDesc.downsampleFactor); nri::TextureDesc textureDesc = {}; textureDesc.type = nri::TextureType::TEXTURE_2D; - textureDesc.usageMask = nri::TextureUsageBits::SHADER_RESOURCE | nri::TextureUsageBits::SHADER_RESOURCE_STORAGE; + textureDesc.usage = nri::TextureUsageBits::SHADER_RESOURCE | nri::TextureUsageBits::SHADER_RESOURCE_STORAGE; textureDesc.format = format; textureDesc.width = w; textureDesc.height = h; @@ -303,12 +336,8 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe m_NRI->SetTextureDebugName(*texture, name); // Construct NRD texture - NrdIntegrationTexture nrdTexture = {}; - nrdTexture.state = &m_ResourceState[i]; - nrdTexture.format = format; - m_TexturePool[i] = nrdTexture; - - nrdTexture.state[0] = nri::TextureBarrierFromUnknown(texture, {nri::AccessBits::UNKNOWN, nri::Layout::UNKNOWN}, 0, 1); + nri::TextureBarrierDesc& nrdTexture = m_TexturePool[i]; + nrdTexture = nri::TextureBarrierFromUnknown(texture, {nri::AccessBits::UNKNOWN, nri::Layout::UNKNOWN}, 0, 1); // Adjust memory usage nri::MemoryDesc memoryDesc = {}; @@ -331,12 +360,12 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe // Samplers for (uint32_t i = 0; i < instanceDesc.samplersNum; i++) { - nrd::Sampler nrdSampler = instanceDesc.samplers[i]; + Sampler nrdSampler = instanceDesc.samplers[i]; nri::SamplerDesc samplerDesc = {}; samplerDesc.addressModes = {nri::AddressMode::CLAMP_TO_EDGE, nri::AddressMode::CLAMP_TO_EDGE}; - samplerDesc.filters.min = nrdSampler == nrd::Sampler::NEAREST_CLAMP ? nri::Filter::NEAREST : nri::Filter::LINEAR; - samplerDesc.filters.mag = nrdSampler == nrd::Sampler::NEAREST_CLAMP ? nri::Filter::NEAREST : nri::Filter::LINEAR; + samplerDesc.filters.min = nrdSampler == Sampler::NEAREST_CLAMP ? nri::Filter::NEAREST : nri::Filter::LINEAR; + samplerDesc.filters.mag = nrdSampler == Sampler::NEAREST_CLAMP ? nri::Filter::NEAREST : nri::Filter::LINEAR; nri::Descriptor* descriptor = nullptr; NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateSampler(*m_Device, samplerDesc, descriptor)); @@ -345,12 +374,12 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe // Constant buffer const nri::DeviceDesc& deviceDesc = m_NRI->GetDeviceDesc(*m_Device); - m_ConstantBufferViewSize = NRD_GetAlignedSize(instanceDesc.constantBufferMaxDataSize, deviceDesc.constantBufferOffsetAlignment); + m_ConstantBufferViewSize = GetAlignedSize(instanceDesc.constantBufferMaxDataSize, deviceDesc.constantBufferOffsetAlignment); m_ConstantBufferSize = uint64_t(m_ConstantBufferViewSize) * instanceDesc.descriptorPoolDesc.setsMaxNum * m_BufferedFramesNum; nri::BufferDesc bufferDesc = {}; bufferDesc.size = m_ConstantBufferSize; - bufferDesc.usageMask = nri::BufferUsageBits::CONSTANT_BUFFER; + bufferDesc.usage = nri::BufferUsageBits::CONSTANT_BUFFER; NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateBuffer(*m_Device, bufferDesc, m_ConstantBuffer)); AllocateAndBindMemory(); @@ -380,11 +409,11 @@ void NrdIntegration::CreateResources(uint16_t resourceWidth, uint16_t resourceHe } } -void NrdIntegration::AllocateAndBindMemory() +void Integration::AllocateAndBindMemory() { std::vector textures(m_TexturePool.size(), nullptr); for (size_t i = 0; i < m_TexturePool.size(); i++) - textures[i] = (nri::Texture*)m_TexturePool[i].state->texture; + textures[i] = m_TexturePool[i].texture; nri::ResourceGroupDesc resourceGroupDesc = {}; resourceGroupDesc.memoryLocation = nri::MemoryLocation::DEVICE; @@ -406,7 +435,7 @@ void NrdIntegration::AllocateAndBindMemory() NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRIHelper->AllocateAndBindMemory(*m_Device, resourceGroupDesc, m_MemoryAllocations.data() + baseAllocation)); } -void NrdIntegration::NewFrame() +void Integration::NewFrame() { NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); @@ -422,7 +451,7 @@ void NrdIntegration::NewFrame() m_DescriptorSetSamplers[m_DescriptorPoolIndex] = nullptr; // Referenced by the GPU descriptors can't be destroyed... - if (!m_IsDescriptorCachingEnabled) + if (!m_EnableDescriptorCaching) { for (const auto& entry : m_DescriptorsInFlight[m_DescriptorPoolIndex]) m_NRI->DestroyDescriptor(*entry); @@ -432,36 +461,66 @@ void NrdIntegration::NewFrame() m_FrameIndex++; } -bool NrdIntegration::SetCommonSettings(const nrd::CommonSettings& commonSettings) +bool Integration::SetCommonSettings(const CommonSettings& commonSettings) { NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); - nrd::Result result = nrd::SetCommonSettings(*m_Instance, commonSettings); - NRD_INTEGRATION_ASSERT(result == nrd::Result::SUCCESS, "nrd::SetCommonSettings(): failed!"); + Result result = nrd::SetCommonSettings(*m_Instance, commonSettings); + NRD_INTEGRATION_ASSERT(result == Result::SUCCESS, "SetCommonSettings(): failed!"); - return result == nrd::Result::SUCCESS; + return result == Result::SUCCESS; } -bool NrdIntegration::SetDenoiserSettings(nrd::Identifier denoiser, const void* denoiserSettings) +bool Integration::SetDenoiserSettings(Identifier denoiser, const void* denoiserSettings) { NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); - nrd::Result result = nrd::SetDenoiserSettings(*m_Instance, denoiser, denoiserSettings); - NRD_INTEGRATION_ASSERT(result == nrd::Result::SUCCESS, "nrd::SetDenoiserSettings(): failed!"); + Result result = nrd::SetDenoiserSettings(*m_Instance, denoiser, denoiserSettings); + NRD_INTEGRATION_ASSERT(result == Result::SUCCESS, "SetDenoiserSettings(): failed!"); - return result == nrd::Result::SUCCESS; + return result == Result::SUCCESS; } -void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const NrdUserPool& userPool) +void Integration::Denoise(const Identifier* denoisers, uint32_t denoisersNum, nri::CommandBuffer& commandBuffer, const UserPool& userPool) { NRD_INTEGRATION_ASSERT(m_Instance, "Uninitialized! Did you forget to call 'Initialize'?"); - const nrd::DispatchDesc* dispatchDescs = nullptr; + // One time sanity check + if (m_FrameIndex == 0) + { + const nri::Texture* normalRoughnessTexture = userPool[(size_t)ResourceType::IN_NORMAL_ROUGHNESS]->texture; + const nri::TextureDesc& normalRoughnessDesc = m_NRI->GetTextureDesc(*normalRoughnessTexture); + const LibraryDesc& nrdLibraryDesc = GetLibraryDesc(); + + bool isNormalRoughnessFormatValid = false; + switch(nrdLibraryDesc.normalEncoding) + { + case NormalEncoding::RGBA8_UNORM: + isNormalRoughnessFormatValid = normalRoughnessDesc.format == nri::Format::RGBA8_UNORM; + break; + case NormalEncoding::RGBA8_SNORM: + isNormalRoughnessFormatValid = normalRoughnessDesc.format == nri::Format::RGBA8_SNORM; + break; + case NormalEncoding::R10_G10_B10_A2_UNORM: + isNormalRoughnessFormatValid = normalRoughnessDesc.format == nri::Format::R10_G10_B10_A2_UNORM; + break; + case NormalEncoding::RGBA16_UNORM: + isNormalRoughnessFormatValid = normalRoughnessDesc.format == nri::Format::RGBA16_UNORM; + break; + case NormalEncoding::RGBA16_SNORM: + isNormalRoughnessFormatValid = normalRoughnessDesc.format == nri::Format::RGBA16_SNORM || normalRoughnessDesc.format == nri::Format::RGBA16_SFLOAT || normalRoughnessDesc.format == nri::Format::RGBA32_SFLOAT; + break; + } + + NRD_INTEGRATION_ASSERT(isNormalRoughnessFormatValid, "IN_NORMAL_ROUGHNESS format doesn't match NRD normal encoding"); + } + + const DispatchDesc* dispatchDescs = nullptr; uint32_t dispatchDescsNum = 0; - nrd::GetComputeDispatches(*m_Instance, denoisers, denoisersNum, dispatchDescs, dispatchDescsNum); + GetComputeDispatches(*m_Instance, denoisers, denoisersNum, dispatchDescs, dispatchDescsNum); // Even if descriptor caching is disabled it's better to cache descriptors inside a single "Denoise" call - if (!m_IsDescriptorCachingEnabled) + if (!m_EnableDescriptorCaching) m_CachedDescriptors.clear(); nri::DescriptorPool* descriptorPool = m_DescriptorPools[m_DescriptorPoolIndex]; @@ -469,7 +528,7 @@ void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoiser for (uint32_t i = 0; i < dispatchDescsNum; i++) { - const nrd::DispatchDesc& dispatchDesc = dispatchDescs[i]; + const DispatchDesc& dispatchDesc = dispatchDescs[i]; m_NRI->CmdBeginAnnotation(commandBuffer, dispatchDesc.name); Dispatch(commandBuffer, *descriptorPool, dispatchDesc, userPool); @@ -478,10 +537,10 @@ void NrdIntegration::Denoise(const nrd::Identifier* denoisers, uint32_t denoiser } } -void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const nrd::DispatchDesc& dispatchDesc, const NrdUserPool& userPool) +void Integration::Dispatch(nri::CommandBuffer& commandBuffer, nri::DescriptorPool& descriptorPool, const DispatchDesc& dispatchDesc, const UserPool& userPool) { - const nrd::InstanceDesc& instanceDesc = nrd::GetInstanceDesc(*m_Instance); - const nrd::PipelineDesc& pipelineDesc = instanceDesc.pipelines[dispatchDesc.pipelineIndex]; + const InstanceDesc& instanceDesc = GetInstanceDesc(*m_Instance); + const PipelineDesc& pipelineDesc = instanceDesc.pipelines[dispatchDesc.pipelineIndex]; nri::Descriptor** descriptors = (nri::Descriptor**)alloca(sizeof(nri::Descriptor*) * dispatchDesc.resourcesNum); memset(descriptors, 0, sizeof(nri::Descriptor*) * dispatchDesc.resourcesNum); @@ -498,45 +557,46 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor uint32_t n = 0; for (uint32_t i = 0; i < pipelineDesc.resourceRangesNum; i++) { - const nrd::ResourceRangeDesc& resourceRange = pipelineDesc.resourceRanges[i]; - const bool isStorage = resourceRange.descriptorType == nrd::DescriptorType::STORAGE_TEXTURE; + const ResourceRangeDesc& resourceRange = pipelineDesc.resourceRanges[i]; + const bool isStorage = resourceRange.descriptorType == DescriptorType::STORAGE_TEXTURE; resourceRanges[i].descriptors = descriptors + n; resourceRanges[i].descriptorNum = resourceRange.descriptorsNum; for (uint32_t j = 0; j < resourceRange.descriptorsNum; j++) { - const nrd::ResourceDesc& nrdResource = dispatchDesc.resources[n]; + const ResourceDesc& nrdResource = dispatchDesc.resources[n]; - NrdIntegrationTexture* nrdTexture = nullptr; - if (nrdResource.type == nrd::ResourceType::TRANSIENT_POOL) + nri::TextureBarrierDesc* nrdTexture = nullptr; + if (nrdResource.type == ResourceType::TRANSIENT_POOL) nrdTexture = &m_TexturePool[nrdResource.indexInPool + instanceDesc.permanentPoolSize]; - else if (nrdResource.type == nrd::ResourceType::PERMANENT_POOL) + else if (nrdResource.type == ResourceType::PERMANENT_POOL) nrdTexture = &m_TexturePool[nrdResource.indexInPool]; else { - nrdTexture = (NrdIntegrationTexture*)&userPool[(uint32_t)nrdResource.type]; - - NRD_INTEGRATION_ASSERT(nrdTexture && nrdTexture->state && nrdTexture->state->texture, "'userPool' entry can't be NULL if it's in use!"); - NRD_INTEGRATION_ASSERT(nrdTexture->format != nri::Format::UNKNOWN, "Format must be valid!"); + nrdTexture = userPool[(uint32_t)nrdResource.type]; + NRD_INTEGRATION_ASSERT(nrdTexture && nrdTexture->texture, "'userPool' entry can't be NULL if it's in use!"); } - const nri::AccessBits nextAccess = nrdResource.descriptorType == nrd::DescriptorType::TEXTURE ? nri::AccessBits::SHADER_RESOURCE : nri::AccessBits::SHADER_RESOURCE_STORAGE; - const nri::Layout nextLayout = nrdResource.descriptorType == nrd::DescriptorType::TEXTURE ? nri::Layout::SHADER_RESOURCE : nri::Layout::SHADER_RESOURCE_STORAGE; - bool isStateChanged = nextAccess != nrdTexture->state->after.access || nextLayout != nrdTexture->state->after.layout; - bool isStorageBarrier = nextAccess == nri::AccessBits::SHADER_RESOURCE_STORAGE && nrdTexture->state->after.access == nri::AccessBits::SHADER_RESOURCE_STORAGE; + const nri::AccessBits nextAccess = nrdResource.descriptorType == DescriptorType::TEXTURE ? nri::AccessBits::SHADER_RESOURCE : nri::AccessBits::SHADER_RESOURCE_STORAGE; + const nri::Layout nextLayout = nrdResource.descriptorType == DescriptorType::TEXTURE ? nri::Layout::SHADER_RESOURCE : nri::Layout::SHADER_RESOURCE_STORAGE; + bool isStateChanged = nextAccess != nrdTexture->after.access || nextLayout != nrdTexture->after.layout; + bool isStorageBarrier = nextAccess == nri::AccessBits::SHADER_RESOURCE_STORAGE && nrdTexture->after.access == nri::AccessBits::SHADER_RESOURCE_STORAGE; if (isStateChanged || isStorageBarrier) - transitions[transitionBarriers.textureNum++] = nri::TextureBarrierFromState(*nrdTexture->state, {nextAccess, nextLayout}, 0, 1); + transitions[transitionBarriers.textureNum++] = nri::TextureBarrierFromState(*nrdTexture, {nextAccess, nextLayout}, 0, 1); - uint64_t resource = m_NRI->GetTextureNativeObject(*nrdTexture->state->texture); - uint64_t key = NRD_CreateDescriptorKey(resource, isStorage); + uint64_t resource = m_NRI->GetTextureNativeObject(*nrdTexture->texture); + uint64_t key = CreateDescriptorKey(resource, isStorage); const auto& entry = m_CachedDescriptors.find(key); nri::Descriptor* descriptor = nullptr; if (entry == m_CachedDescriptors.end()) { - nri::Texture2DViewDesc desc = {nrdTexture->state->texture, isStorage ? nri::Texture2DViewType::SHADER_RESOURCE_STORAGE_2D : nri::Texture2DViewType::SHADER_RESOURCE_2D, nrdTexture->format, 0, 1}; + const nri::TextureDesc& textureDesc = m_NRI->GetTextureDesc(*nrdTexture->texture); + + nri::Texture2DViewDesc desc = {nrdTexture->texture, isStorage ? nri::Texture2DViewType::SHADER_RESOURCE_STORAGE_2D : nri::Texture2DViewType::SHADER_RESOURCE_2D, textureDesc.format, 0, 1}; NRD_INTEGRATION_ABORT_ON_FAILURE(m_NRI->CreateTexture2DView(desc, descriptor)); + m_CachedDescriptors.insert( std::make_pair(key, descriptor) ); m_DescriptorsInFlight[m_DescriptorPoolIndex].push_back(descriptor); } @@ -620,15 +680,15 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor printf("Pipeline #%u : %s\n\t", dispatchDesc.pipelineIndex, dispatchDesc.name); for( uint32_t i = 0; i < dispatchDesc.resourcesNum; i++ ) { - const nrd::ResourceDesc& r = dispatchDesc.resources[i]; + const ResourceDesc& r = dispatchDesc.resources[i]; - if( r.type == nrd::ResourceType::PERMANENT_POOL ) + if( r.type == ResourceType::PERMANENT_POOL ) printf("P(%u) ", r.indexInPool); - else if( r.type == nrd::ResourceType::TRANSIENT_POOL ) + else if( r.type == ResourceType::TRANSIENT_POOL ) printf("T(%u) ", r.indexInPool); else { - const char* s = nrd::GetResourceTypeString(r.type); + const char* s = GetResourceTypeString(r.type); printf("%s ", s); } } @@ -636,12 +696,10 @@ void NrdIntegration::Dispatch(nri::CommandBuffer& commandBuffer, nri::Descriptor #endif } -void NrdIntegration::Destroy() +void Integration::Destroy() { NRD_INTEGRATION_ASSERT(m_Instance, "Already destroyed! Did you forget to call 'Initialize'?"); - m_ResourceState.clear(); - m_NRI->DestroyDescriptor(*m_ConstantBufferView); m_NRI->DestroyBuffer(*m_ConstantBuffer); @@ -654,8 +712,8 @@ void NrdIntegration::Destroy() m_DescriptorsInFlight.clear(); m_CachedDescriptors.clear(); - for (const NrdIntegrationTexture& nrdTexture : m_TexturePool) - m_NRI->DestroyTexture(*(nri::Texture*)nrdTexture.state->texture); + for (const nri::TextureBarrierDesc& nrdTexture : m_TexturePool) + m_NRI->DestroyTexture(*nrdTexture.texture); m_TexturePool.clear(); for (nri::Descriptor* descriptor : m_Samplers) @@ -679,7 +737,7 @@ void NrdIntegration::Destroy() m_DescriptorPools.clear(); m_DescriptorSetSamplers.clear(); - nrd::DestroyInstance(*m_Instance); + DestroyInstance(*m_Instance); m_NRI = nullptr; m_NRIHelper = nullptr; @@ -687,7 +745,6 @@ void NrdIntegration::Destroy() m_ConstantBuffer = nullptr; m_ConstantBufferView = nullptr; m_Instance = nullptr; - m_Name = nullptr; m_PermanentPoolSize = 0; m_TransientPoolSize = 0; m_ConstantBufferSize = 0; @@ -696,6 +753,8 @@ void NrdIntegration::Destroy() m_BufferedFramesNum = 0; m_DescriptorPoolIndex = 0; m_FrameIndex = 0; - m_IsShadersReloadRequested = false; - m_IsDescriptorCachingEnabled = false; + m_ReloadShaders = false; + m_EnableDescriptorCaching = false; +} + } diff --git a/README.md b/README.md index 08bdeb6e..87cd8ce0 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# NVIDIA REAL-TIME DENOISERS v4.9.4 (NRD) +# NVIDIA REAL-TIME DENOISERS v4.10.0 (NRD) [![Build NRD SDK](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml/badge.svg)](https://github.com/NVIDIAGameWorks/RayTracingDenoiser/actions/workflows/build.yml) @@ -490,7 +490,6 @@ NRI.CreateCommandBufferD3D12(*nriDevice, commandBufferDesc, nriCommandBuffer); // Wrap required textures (better do it only once on initialization) nri::TextureBarrierDesc entryDescs[N] = {}; -nri::Format entryFormat[N] = {}; for (uint32_t i = 0; i < N; i++) { @@ -540,11 +539,10 @@ NRD.SetDenoiserSettings(identifier2, &settings2); // Fill up the user pool NrdUserPool userPool = {}; { - // Fill only required "in-use" inputs and outputs in appropriate slots using entryDescs & entryFormat, - // applying remapping if necessary. Unused slots will be {nullptr, nri::Format::UNKNOWN} - NrdIntegration_SetResource(userPool, ...); + // Set "entryDescs" into the "in-use" slots, applying remapping if necessary + NrdIntegration_SetResource(userPool, nrd::ResourceType::IN_NORMAL_ROUGHNESS, &entryDescs[0]); + NrdIntegration_SetResource(userPool, nrd::ResourceType::IN_VIEWZ, &entryDescs[1]); ... - NrdIntegration_SetResource(userPool, ...); }; const nrd::Identifier denoisers[] = {identifier1, identifier2}; diff --git a/Resources/Version.h b/Resources/Version.h index fafa2e72..376edbad 100644 --- a/Resources/Version.h +++ b/Resources/Version.h @@ -22,7 +22,7 @@ Versioning rules: */ #define VERSION_MAJOR 4 -#define VERSION_MINOR 9 -#define VERSION_BUILD 4 +#define VERSION_MINOR 10 +#define VERSION_BUILD 0 #define VERSION_STRING STR(VERSION_MAJOR.VERSION_MINOR.VERSION_BUILD encoding=NRD_NORMAL_ENCODING.NRD_ROUGHNESS_ENCODING) diff --git a/Shaders/Include/Common.hlsli b/Shaders/Include/Common.hlsli index 955797f1..2a2c6ccb 100644 --- a/Shaders/Include/Common.hlsli +++ b/Shaders/Include/Common.hlsli @@ -58,17 +58,12 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define NRD_CURVATURE_Z_THRESHOLD 0.1 // normalized % #define NRD_MAX_ALLOWED_VIRTUAL_MOTION_ACCELERATION 15.0 // keep relatively high to avoid ruining concave mirrors -// IMPORTANT: if == 1, then for 0-roughness "GetEncodingAwareNormalWeight" can return values < 1 even for same normals due to data re-packing -// IMPORTANT: suits for REBLUR and RELAX because both use RGBA8 normals internally -#define NRD_NORMAL_ULP ( 1.5 / 255.0 ) - -// IMPORTANT: best fit is critical for non oct-packed variants! #if( NRD_NORMAL_ENCODING < NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) - #define NRD_NORMAL_ENCODING_ERROR ( 0.5 / 255.0 ) + #define NRD_NORMAL_ENCODING_ERROR ( 1.50 / 255.0 ) #elif( NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) - #define NRD_NORMAL_ENCODING_ERROR ( 0.5 / 1023.0 ) + #define NRD_NORMAL_ENCODING_ERROR ( 0.75 / 255.0 ) #else - #define NRD_NORMAL_ENCODING_ERROR ( 0.5 / 65535.0 ) + #define NRD_NORMAL_ENCODING_ERROR ( 0.50 / 255.0 ) #endif //================================================================================================================== @@ -201,10 +196,7 @@ static const float3 g_Special8[ 8 ] = #define CompareMaterials( m0, m, mask ) 1.0 #endif -float UnpackViewZ( float z ) -{ - return abs( z * gViewZScale ); -} +#define UnpackViewZ( z ) abs( z * gViewZScale ) float PixelRadiusToWorld( float unproject, float orthoMode, float pixelRadius, float viewZ ) { @@ -335,6 +327,21 @@ float GetSpecLobeTanHalfAngle( float roughness, float percentOfVolume = 0.75 ) return roughness * roughness * percentOfVolume / ( 1.0 - percentOfVolume + NRD_EPS ); } +float2 StochasticBilinear( float2 uv, float2 texSize ) +{ +#if( REBLUR_USE_STF == 1 ) + // Requires: Rng::Hash::Initialize( pixelPos, gFrameIndex ) + Filtering::Bilinear f = Filtering::GetBilinearFilter( uv, texSize ); + + float2 rnd = Rng::Hash::GetFloat2( ); + f.origin += step( rnd, f.weights ); + + return ( f.origin + 0.5 ) / texSize; +#else + return uv; +#endif +} + // Thin lens float ApplyThinLensEquation( float hitDist, float curvature ) @@ -481,17 +488,17 @@ float GetGaussianWeight( float r ) // Encoding precision aware weight functions ( for reprojection ) -float GetEncodingAwareNormalWeight( float3 Ncurr, float3 Nprev, float maxAngle, float angleThreshold = 0.0 ) +float GetEncodingAwareNormalWeight( float3 Ncurr, float3 Nprev, float maxAngle, float curvatureAngle, float thresholdAngle ) { - // Anything below "angleThreshold" is ignored - angleThreshold += NRD_NORMAL_ULP; + // Anything below "thresholdAngle" is ignored + curvatureAngle += thresholdAngle; float cosa = dot( Ncurr, Nprev ); float a = 1.0 / maxAngle; float d = Math::AcosApprox( cosa ); - float w = Math::SmoothStep01( 1.0 - ( d - angleThreshold ) * a ); + float w = Math::SmoothStep01( 1.0 - ( d - curvatureAngle ) * a ); // Needed to mitigate imprecision issues because prev normals are RGBA8 ( test 3, 43 if roughness is low ) w = Math::SmoothStep( 0.05, 0.95, w ); diff --git a/Shaders/Include/REBLUR_Blur.hlsli b/Shaders/Include/REBLUR_Blur.hlsli index 1dc659b3..5595dea5 100644 --- a/Shaders/Include/REBLUR_Blur.hlsli +++ b/Shaders/Include/REBLUR_Blur.hlsli @@ -17,9 +17,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : return; // Early out - float viewZ = UnpackViewZ( gIn_ViewZ[ WithRectOrigin( pixelPos ) ] ); - gOut_ViewZ[ pixelPos ] = REBLUR_PackViewZ( viewZ ); + float viewZpacked = gIn_ViewZ[ WithRectOrigin( pixelPos ) ]; + gOut_ViewZ[ pixelPos ] = viewZpacked; + float viewZ = UnpackViewZ( viewZpacked ); if( viewZ > gDenoisingRange ) return; diff --git a/Shaders/Include/REBLUR_Common.hlsli b/Shaders/Include/REBLUR_Common.hlsli index f099c7b0..75928046 100644 --- a/Shaders/Include/REBLUR_Common.hlsli +++ b/Shaders/Include/REBLUR_Common.hlsli @@ -22,24 +22,6 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Internal data ( from the previous frame ) -#define REBLUR_PackViewZ( p ) min( p * REBLUR_FP16_VIEWZ_SCALE, NRD_FP16_MAX ) -#define REBLUR_UnpackViewZ( p ) ( p / REBLUR_FP16_VIEWZ_SCALE ) - -float4 PackNormalRoughness( float4 p ) -{ - return float4( p.xyz * 0.5 + 0.5, p.w ); -} - -float4 UnpackNormalAndRoughness( float4 p, bool isNormalized = true ) -{ - p.xyz = p.xyz * 2.0 - 1.0; - - if( isNormalized ) - p.xyz = _NRD_SafeNormalize( p.xyz ); - - return p; -} - uint PackInternalData( float diffAccumSpeed, float specAccumSpeed, float materialID ) { float3 t = float3( diffAccumSpeed, specAccumSpeed, materialID ); @@ -151,6 +133,13 @@ float GetNonLinearAccumSpeed( float accumSpeed, float maxAccumSpeed, float confi return nonLinearAccumSpeed; } +float RemapRoughnessToResponsiveFactor( float roughness ) +{ + float amount = ( roughness + NRD_EPS ) / ( gResponsiveAccumulationRoughnessThreshold + NRD_EPS ); + + return Math::SmoothStep01( amount ); +} + // Misc ( templates ) // Hit distance is normalized @@ -283,13 +272,6 @@ float ComputeAntilag( REBLUR_TYPE history, REBLUR_TYPE signal, REBLUR_TYPE sigma // Kernel -float GetResponsiveAccumulationAmount( float roughness ) -{ - float amount = 1.0 - ( roughness + NRD_EPS ) / ( gResponsiveAccumulationRoughnessThreshold + NRD_EPS ); - - return Math::SmoothStep01( amount ); -} - float2x3 GetKernelBasis( float3 D, float3 N, float NoD, float roughness = 1.0, float anisoFade = 1.0 ) { float3x3 basis = Geometry::GetBasis( N ); @@ -320,7 +302,7 @@ float GetNormalWeightParams( float nonLinearAccumSpeed, float roughness = 1.0 ) float percentOfVolume = REBLUR_MAX_PERCENT_OF_LOBE_VOLUME * lerp( gLobeAngleFraction, 1.0, nonLinearAccumSpeed ); float angle = atan( ImportanceSampling::GetSpecularLobeTanHalfAngle( roughness, percentOfVolume ) ); - return 1.0 / max( angle, NRD_NORMAL_ULP ); + return 1.0 / max( angle, REBLUR_NORMAL_ULP ); } float2 GetTemporalAccumulationParams( float isInScreenMulFootprintQuality, float accumSpeed ) diff --git a/Shaders/Include/REBLUR_Common_DiffuseSpatialFilter.hlsli b/Shaders/Include/REBLUR_Common_DiffuseSpatialFilter.hlsli index 71ebe832..1a86a92a 100644 --- a/Shaders/Include/REBLUR_Common_DiffuseSpatialFilter.hlsli +++ b/Shaders/Include/REBLUR_Common_DiffuseSpatialFilter.hlsli @@ -112,7 +112,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Fetch data #if( REBLUR_SPATIAL_MODE == REBLUR_POST_BLUR ) - float zs = REBLUR_UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, uvScaled, 0 ) ); + float zs = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, uvScaled, 0 ) ); #else float zs = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, WithRectOffset( uvScaled ), 0 ) ); #endif diff --git a/Shaders/Include/REBLUR_Common_SpecularSpatialFilter.hlsli b/Shaders/Include/REBLUR_Common_SpecularSpatialFilter.hlsli index 141caf21..849ef272 100644 --- a/Shaders/Include/REBLUR_Common_SpecularSpatialFilter.hlsli +++ b/Shaders/Include/REBLUR_Common_SpecularSpatialFilter.hlsli @@ -123,7 +123,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. // Fetch data #if( REBLUR_SPATIAL_MODE == REBLUR_POST_BLUR ) - float zs = REBLUR_UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, uvScaled, 0 ) ); + float zs = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, uvScaled, 0 ) ); #else float zs = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, WithRectOffset( uvScaled ), 0 ) ); #endif diff --git a/Shaders/Include/REBLUR_Config.hlsli b/Shaders/Include/REBLUR_Config.hlsli index ae8fa95a..f1932c69 100644 --- a/Shaders/Include/REBLUR_Config.hlsli +++ b/Shaders/Include/REBLUR_Config.hlsli @@ -19,6 +19,7 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_USE_YCOCG 1 #define REBLUR_USE_ANTIFIREFLY 1 #define REBLUR_USE_CONFIDENCE_NON_LINEARLY 1 +#define REBLUR_USE_STF 1 // gives very minor IQ boost visible only in debug visualization // Switches ( default 0 ) #define REBLUR_USE_SCREEN_SPACE_SAMPLING 0 @@ -66,7 +67,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_HIT_DIST_MIN_WEIGHT( smc ) ( 0.1 * smc ) // was 0.1 -#define REBLUR_FP16_VIEWZ_SCALE ( gViewZScale * 0.125 ) // TODO: tuned for meters, i.e. gViewZScale = 1.0 +#define REBLUR_NORMAL_ULP NRD_NORMAL_ENCODING_ERROR // was 1.5 / 255.0 ( too much for 0 roughness ) +#define REBLUR_ALMOST_ZERO_ANGLE cos( Math::DegToRad( 89.0 ) ) #define REBLUR_MAX_PERCENT_OF_LOBE_VOLUME 0.75 #define REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM 1 #define REBLUR_FIREFLY_SUPPRESSOR_MAX_RELATIVE_INTENSITY 38.0 @@ -186,6 +188,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #undef REBLUR_USE_SCREEN_SPACE_SAMPLING #define REBLUR_USE_SCREEN_SPACE_SAMPLING 1 + #undef REBLUR_USE_STF + #define REBLUR_USE_STF 0 + #undef REBLUR_POISSON_SAMPLE_NUM #define REBLUR_POISSON_SAMPLE_NUM 6 diff --git a/Shaders/Include/REBLUR_PostBlur.hlsli b/Shaders/Include/REBLUR_PostBlur.hlsli index 1060cd34..70467474 100644 --- a/Shaders/Include/REBLUR_PostBlur.hlsli +++ b/Shaders/Include/REBLUR_PostBlur.hlsli @@ -14,26 +14,17 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Tile-based early out float isSky = gIn_Tiles[ pixelPos >> 4 ]; if( isSky != 0.0 || any( pixelPos > gRectSizeMinusOne ) ) - { - // ~0 normal is needed to allow bilinear filter in TA ( 0 can't be used due to "division by zero" in "UnpackNormalRoughness" ) - gOut_Normal_Roughness[ pixelPos ] = PackNormalRoughness( 1.0 / 255.0 ); - return; // IMPORTANT: no data output, must be rejected by the "viewZ" check! - } // Early out - float viewZ = REBLUR_UnpackViewZ( gIn_ViewZ[ pixelPos ] ); + float viewZ = UnpackViewZ( gIn_ViewZ[ pixelPos ] ); if( viewZ > gDenoisingRange ) - { - // ~0 normal is needed to allow bilinear filter in TA ( 0 can't be used due to "division by zero" in "UnpackNormalRoughness" ) - gOut_Normal_Roughness[ pixelPos ] = PackNormalRoughness( 1.0 / 255.0 ); - return; // IMPORTANT: no data output, must be rejected by the "viewZ" check! - } // Normal and roughness float materialID; - float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ WithRectOrigin( pixelPos ) ], materialID ); + float4 normalAndRoughnessPacked = gIn_Normal_Roughness[ WithRectOrigin( pixelPos ) ]; + float4 normalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( normalAndRoughnessPacked, materialID ); float3 N = normalAndRoughness.xyz; float3 Nv = Geometry::RotateVectorInverse( gViewToWorld, N ); float roughness = normalAndRoughness.w; @@ -49,7 +40,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float NoV = abs( dot( Nv, Vv ) ); // Output - gOut_Normal_Roughness[ pixelPos ] = PackNormalRoughness( normalAndRoughness ); + gOut_Normal_Roughness[ pixelPos ] = normalAndRoughnessPacked; #ifdef REBLUR_NO_TEMPORAL_STABILIZATION gOut_InternalData[ pixelPos ] = PackInternalData( data1.x + 1.0, data1.y + 1.0, materialID ); // increment history length #endif diff --git a/Shaders/Include/REBLUR_TemporalAccumulation.hlsli b/Shaders/Include/REBLUR_TemporalAccumulation.hlsli index ed69f0c0..abab886f 100644 --- a/Shaders/Include/REBLUR_TemporalAccumulation.hlsli +++ b/Shaders/Include/REBLUR_TemporalAccumulation.hlsli @@ -111,6 +111,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Hit distance for tracking ( tests 8, 110, 139, e3, e9 without normal map, e24 ) #ifdef REBLUR_SPECULAR + #if( REBLUR_USE_STF == 1 ) + Rng::Hash::Initialize( pixelPos, gFrameIndex ); + #endif + float hitDistNormalization = _REBLUR_GetHitDistanceNormalization( viewZ, gHitDistParams, roughness ); hitDistForTracking = hitDistForTracking == NRD_INF ? 0.0 : hitDistForTracking; @@ -168,17 +172,38 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float4 smbViewZ2 = gIn_Prev_ViewZ.GatherRed( gNearestClamp, smbCatromGatherUv, float2( 1, 3 ) ).wzxy; float4 smbViewZ3 = gIn_Prev_ViewZ.GatherRed( gNearestClamp, smbCatromGatherUv, float2( 3, 3 ) ).wzxy; - float3 prevViewZ0 = REBLUR_UnpackViewZ( smbViewZ0.yzw ); - float3 prevViewZ1 = REBLUR_UnpackViewZ( smbViewZ1.xzw ); - float3 prevViewZ2 = REBLUR_UnpackViewZ( smbViewZ2.xyw ); - float3 prevViewZ3 = REBLUR_UnpackViewZ( smbViewZ3.xyz ); + float3 prevViewZ0 = UnpackViewZ( smbViewZ0.yzw ); + float3 prevViewZ1 = UnpackViewZ( smbViewZ1.xzw ); + float3 prevViewZ2 = UnpackViewZ( smbViewZ2.xyw ); + float3 prevViewZ3 = UnpackViewZ( smbViewZ3.xyz ); // Previous normal averaged for all pixels in 2x2 footprint // IMPORTANT: bilinear filter can touch sky pixels, due to this reason "Post Blur" writes special values into sky-pixels Filtering::Bilinear smbBilinearFilter = Filtering::GetBilinearFilter( smbPixelUv, gRectSizePrev ); - float2 smbBilinearGatherUv = ( smbBilinearFilter.origin + 1.0 ) * gResourceSizeInvPrev; - float3 prevNavg = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, smbBilinearGatherUv, 0 ), false ).xyz; - prevNavg = Geometry::RotateVector( gWorldPrevToWorld, prevNavg ); + float3 smbNavg = 0; // TODO: the sum works well, but probably there is a potential to use just "1 smart sample" (see test 168) + { + uint2 p = uint2( smbBilinearFilter.origin ); + float sum = 0.0; + + float w = float( prevViewZ0.z < gDenoisingRange ); + smbNavg = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness[ p ] ).xyz * w; + sum += w; + + w = float( prevViewZ1.y < gDenoisingRange ); + smbNavg += NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness[ p + uint2( 1, 0 ) ] ).xyz * w; + sum += w; + + w = float( prevViewZ2.y < gDenoisingRange ); + smbNavg += NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness[ p + uint2( 0, 1 ) ] ).xyz * w; + sum += w; + + w = float( prevViewZ3.x < gDenoisingRange ); + smbNavg += NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness[ p + uint2( 1, 1 ) ] ).xyz * w; + sum += w; + + smbNavg /= sum == 0.0 ? 1.0 : sum; + } + smbNavg = Geometry::RotateVector( gWorldPrevToWorld, smbNavg ); // Disocclusion: threshold float pixelSize = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); @@ -196,7 +221,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float NoV = abs( dot( N, V ) ); float disocclusionThresholdSlopeScale = 1.0 / lerp( lerp( 0.05, 1.0, NoV ), 1.0, saturate( smbParallaxInPixels / 30.0 ) ); float4 smbDisocclusionThreshold = saturate( disocclusionThreshold * disocclusionThresholdSlopeScale ) * frustumSize; - smbDisocclusionThreshold *= float( dot( prevNavg, Navg ) > cos( Math::DegToRad( 89.0 ) ) ); + smbDisocclusionThreshold *= float( dot( smbNavg, Navg ) > REBLUR_ALMOST_ZERO_ANGLE ); // good for smb smbDisocclusionThreshold *= IsInScreenBilinear( smbBilinearFilter.origin, gRectSizePrev ); smbDisocclusionThreshold -= NRD_EPS; @@ -231,6 +256,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : uint4 smbInternalData = uint4( smbInternalData0.w, smbInternalData1.z, smbInternalData2.y, smbInternalData3.x ); #else + float2 smbBilinearGatherUv = ( smbBilinearFilter.origin + 1.0 ) * gResourceSizeInvPrev; uint4 smbInternalData = gIn_Prev_InternalData.GatherRed( gNearestClamp, smbBilinearGatherUv ).wzxy; #endif @@ -261,7 +287,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Footprint quality float3 smbVprev = GetViewVectorPrev( Xprev, gCameraDelta.xyz ); - float NoVprev = abs( dot( N, smbVprev ) ); // TODO: should be prevNavg ( normalized? ), but jittering breaks logic + float NoVprev = abs( dot( N, smbVprev ) ); // TODO: should be smbNavg ( normalized? ), but jittering breaks logic float sizeQuality = ( NoVprev + 1e-3 ) / ( NoV + 1e-3 ); // this order because we need to fix stretching only, shrinking is OK sizeQuality *= sizeQuality; sizeQuality = lerp( 0.1, 1.0, saturate( sizeQuality ) ); @@ -328,7 +354,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : - curvature on bumpy surfaces is just wrong, pulling virtual positions into a surface and introducing lags - suboptimal reprojection if curvature changes signs under motion */ - float curvature; + float curvature = 0.0; { // IMPORTANT: this code allows to get non-zero parallax on objects attached to the camera float2 uvForZeroParallax = gOrthoMode == 0.0 ? smbPixelUv : pixelUv; @@ -362,27 +388,16 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float2 motionUvHigh = pixelUv + deltaUvLenFixed * deltaUv * gRectSizeInv; if( NRD_USE_HIGH_PARALLAX_CURVATURE && deltaUvLenFixed > 1.0 && IsInScreenNearest( motionUvHigh ) ) { + float2 pos = StochasticBilinear( motionUvHigh, gRectSize ); + pos = WithRectOffset( pos * gResolutionScale ); + // Construct the other edge point "xHigh" - float zHigh = UnpackViewZ( gIn_ViewZ.SampleLevel( gLinearClamp, WithRectOffset( motionUvHigh * gResolutionScale ), 0 ) ); - float3 xHigh = Geometry::ReconstructViewPosition( motionUvHigh, gFrustum, zHigh, gOrthoMode ); + float zHigh = UnpackViewZ( gIn_ViewZ.SampleLevel( gNearestClamp, pos, 0 ) ); + float3 xHigh = Geometry::ReconstructViewPosition( pos, gFrustum, zHigh, gOrthoMode ); xHigh = Geometry::RotateVector( gViewToWorld, xHigh ); // Interpolate normal at "xHigh" - #if( NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) - f = Filtering::GetBilinearFilter( motionUvHigh, gRectSize ); - - f.origin = clamp( f.origin, 0, gRectSize - 2.0 ); - pos = gRectOrigin + int2( f.origin ); - - n00 = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pos ] ).xyz; - n10 = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pos + int2( 1, 0 ) ] ).xyz; - n01 = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pos + int2( 0, 1 ) ] ).xyz; - n11 = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness[ pos + int2( 1, 1 ) ] ).xyz; - - float3 nHigh = _NRD_SafeNormalize( Filtering::ApplyBilinearFilter( n00, n10, n01, n11, f ) ); - #else - float3 nHigh = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness.SampleLevel( gLinearClamp, WithRectOffset( motionUvHigh * gResolutionScale ), 0 ) ).xyz; - #endif + float3 nHigh = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Normal_Roughness.SampleLevel( gNearestClamp, pos, 0 ) ).xyz; // Replace if same surface float zError = abs( zHigh - viewZ ) * rcp( max( zHigh, viewZ ) ); @@ -397,16 +412,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float edgeLenSq = Math::LengthSquared( edge ); curvature = dot( n - N, edge ) * Math::PositiveRcp( edgeLenSq ); - // Correction #1 - values below this threshold get turned into garbage due to numerical imprecision - float d = Math::ManhattanDistance( N, n ); - float s = Math::LinearStep( NRD_NORMAL_ENCODING_ERROR, 2.0 * NRD_NORMAL_ENCODING_ERROR, d ); - curvature *= s; - - // Correction #2 - this is needed if camera is "inside" a concave mirror ( tests 133, 164, 171 - 176 ) + // Correction #1 - this is needed if camera is "inside" a concave mirror ( tests 133, 164, 171 - 176 ) if( length( X ) < -1.0 / curvature ) // TODO: test 78 curvature *= NoV; - // Correction #3 - very negative inconsistent with previous frame curvature blows up reprojection ( tests 164, 171 - 176 ) + // Correction #2 - very negative inconsistent with previous frame curvature blows up reprojection ( tests 164, 171 - 176 ) float2 uv1 = Geometry::GetScreenUv( gWorldToClipPrev, X - V * ApplyThinLensEquation( hitDistForTracking, curvature ) ); float2 uv2 = Geometry::GetScreenUv( gWorldToClipPrev, X ); float a = length( ( uv1 - uv2 ) * gRectSize ); @@ -425,29 +435,34 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : Filtering::Bilinear vmbBilinearFilter = Filtering::GetBilinearFilter( vmbPixelUv, gRectSizePrev ); float2 vmbBilinearGatherUv = ( vmbBilinearFilter.origin + 1.0 ) * gResourceSizeInvPrev; float2 relaxedRoughnessWeightParams = GetRelaxedRoughnessWeightParams( roughness * roughness, gRoughnessFraction, REBLUR_ROUGHNESS_SENSITIVITY_IN_TA ); + #if( NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) + float4 vmbRoughness = gIn_Prev_Normal_Roughness.GatherBlue( gNearestClamp, vmbBilinearGatherUv ).wzxy; + #else float4 vmbRoughness = gIn_Prev_Normal_Roughness.GatherAlpha( gNearestClamp, vmbBilinearGatherUv ).wzxy; + #endif float4 roughnessWeight = ComputeNonExponentialWeightWithSigma( vmbRoughness * vmbRoughness, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y, roughnessSigma ); roughnessWeight = lerp( Math::SmoothStep( 1.0, 0.0, smbParallaxInPixels ), 1.0, roughnessWeight ); // jitter friendly float virtualHistoryRoughnessBasedConfidence = Filtering::ApplyBilinearFilter( roughnessWeight.x, roughnessWeight.y, roughnessWeight.z, roughnessWeight.w, vmbBilinearFilter ); // Virtual motion - normal: parallax ( test 132 ) - float4 vmbNormalAndRoughness = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, vmbPixelUv * gResolutionScalePrev, 0 ) ); + float4 vmbNormalAndRoughness = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gNearestClamp, StochasticBilinear( vmbPixelUv, gRectSizePrev ) * gResolutionScalePrev, 0 ) ); float3 vmbN = Geometry::RotateVector( gWorldPrevToWorld, vmbNormalAndRoughness.xyz ); - float virtualHistoryNormalBasedConfidence = 1.0 / ( 1.0 + 0.5 * Dfactor * saturate( length( N - vmbN ) - NRD_NORMAL_ULP ) * vmbPixelsTraveled ); + float virtualHistoryNormalBasedConfidence = 1.0 / ( 1.0 + 0.5 * Dfactor * saturate( length( N - vmbN ) - REBLUR_NORMAL_ULP ) * vmbPixelsTraveled ); // Virtual motion - disocclusion: plane distance and roughness float4 vmbOcclusion; { float4 vmbOcclusionThreshold = disocclusionThreshold * frustumSize; vmbOcclusionThreshold *= lerp( 0.25, 1.0, NoV ); // yes, "*" not "/" // TODO: it's from commit "fixed suboptimal "vmb" reprojection behavior in disocclusions", but is it really needed? - vmbOcclusionThreshold *= float( dot( vmbN, N ) > 0.0 ); // TODO: Navg? + vmbOcclusionThreshold *= float( dot( vmbN, N ) > REBLUR_ALMOST_ZERO_ANGLE ); // good for vmb + vmbOcclusionThreshold *= float( dot( smbNavg, vmbN ) > REBLUR_ALMOST_ZERO_ANGLE ); // bonus check for test 168 vmbOcclusionThreshold *= IsInScreenBilinear( vmbBilinearFilter.origin, gRectSizePrev ); vmbOcclusionThreshold -= NRD_EPS; - float4 vmbViewZ = REBLUR_UnpackViewZ( gIn_Prev_ViewZ.GatherRed( gNearestClamp, vmbBilinearGatherUv ).wzxy ); + float4 vmbViewZ = UnpackViewZ( gIn_Prev_ViewZ.GatherRed( gNearestClamp, vmbBilinearGatherUv ).wzxy ); float3 vmbVv = Geometry::ReconstructViewPosition( vmbPixelUv, gFrustumPrev, 1.0 ); // unnormalized, orthoMode = 0 float3 vmbV = Geometry::RotateVectorInverse( gWorldToViewPrev, vmbVv ); - float NoXcurr = dot( N, X - gCameraDelta.xyz ); + float NoXcurr = dot( N, Xprev - gCameraDelta.xyz ); float4 NoXprev = ( N.x * vmbV.x + N.y * vmbV.y ) * ( gOrthoMode == 0 ? vmbViewZ : gOrthoMode ) + N.z * vmbV.z * vmbViewZ; float4 vmbPlaneDist = abs( NoXprev - NoXcurr ); @@ -490,15 +505,16 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // angle is PI ( most left and most right points on a hemisphere ), it can be achieved by using "tan" instead of angle. float curvatureAngleTan = pixelSize * abs( curvature ); // tana = pixelSize / curvatureRadius = pixelSize * curvature curvatureAngleTan *= max( vmbPixelsTraveled / max( NoV, 0.01 ), 1.0 ); // path length + curvatureAngleTan *= 2.0; // TODO: why it's here? it's needed to allow REBLUR_NORMAL_ULP values < "1.5 / 255.0" + float curvatureAngle = atan( curvatureAngleTan ); float lobeTanHalfAngle = ImportanceSampling::GetSpecularLobeTanHalfAngle( roughnessModified, REBLUR_MAX_PERCENT_OF_LOBE_VOLUME ); - lobeTanHalfAngle *= 1.0 / ( 1.0 + vmbSpecAccumSpeed ); // make more strict if history is long - + lobeTanHalfAngle *= 1.0 / ( 1.0 + vmbSpecAccumSpeed ); // make more strict if history is long, modifying "tan" works better float lobeHalfAngle = atan( lobeTanHalfAngle ); - float curvatureAngle = atan( curvatureAngleTan ); + lobeHalfAngle = max( lobeHalfAngle, REBLUR_NORMAL_ULP ); // Virtual motion - normal: lobe overlapping ( test 107 ) - float normalWeight = GetEncodingAwareNormalWeight( N, vmbN, lobeHalfAngle, curvatureAngle ); + float normalWeight = GetEncodingAwareNormalWeight( N, vmbN, lobeHalfAngle, curvatureAngle, REBLUR_NORMAL_ULP ); normalWeight = lerp( Math::SmoothStep( 1.0, 0.0, vmbPixelsTraveled ), 1.0, normalWeight ); // jitter friendly virtualHistoryNormalBasedConfidence = min( virtualHistoryNormalBasedConfidence, normalWeight ); @@ -537,10 +553,10 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : for( i = 1; i <= REBLUR_VIRTUAL_MOTION_PREV_PREV_WEIGHT_ITERATION_NUM; i++ ) { float2 vmbPixelUvPrev = vmbPixelUv + vmbDelta * i * stepBetweenTaps; - float4 vmbNormalAndRoughnessPrev = UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gLinearClamp, vmbPixelUvPrev * gResolutionScalePrev, 0 ) ); + float4 vmbNormalAndRoughnessPrev = NRD_FrontEnd_UnpackNormalAndRoughness( gIn_Prev_Normal_Roughness.SampleLevel( gNearestClamp, StochasticBilinear( vmbPixelUvPrev, gRectSizePrev ) * gResolutionScalePrev, 0 ) ); float2 w; - w.x = GetEncodingAwareNormalWeight( vmbNormalAndRoughness.xyz, vmbNormalAndRoughnessPrev.xyz, lobeHalfAngle, curvatureAngle * ( 1.0 + i * stepBetweenTaps ) ); + w.x = GetEncodingAwareNormalWeight( vmbNormalAndRoughness.xyz, vmbNormalAndRoughnessPrev.xyz, lobeHalfAngle, curvatureAngle * ( 1.0 + i * stepBetweenTaps ), REBLUR_NORMAL_ULP ); w.y = ComputeNonExponentialWeightWithSigma( vmbNormalAndRoughnessPrev.w * vmbNormalAndRoughnessPrev.w, relaxedRoughnessWeightParams.x, relaxedRoughnessWeightParams.y, roughnessSigma ); w = IsInScreenNearest( vmbPixelUvPrev ) ? w : 1.0; @@ -591,20 +607,33 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : surfaceHistoryConfidence = Math::Pow01( f, 4.0 ); } + // Responsive accumulation + float2 maxResponsiveFrameNum = gMaxAccumulatedFrameNum; + { + float responsiveFactor = RemapRoughnessToResponsiveFactor( roughness ); + float smc = GetSpecMagicCurve( roughnessModified ); + + float2 f; + f.x = dot( N, normalize( smbNavg ) ); + f.y = dot( N, vmbN ); + f = lerp( smc, 1.0, responsiveFactor ) * Math::Pow01( f, lerp( 32.0, 1.0, smc ) * ( 1.0 - responsiveFactor ) ); + + maxResponsiveFrameNum = max( gMaxAccumulatedFrameNum * f, gHistoryFixFrameNum ); + } + // Surface motion: max allowed frames - float smbMaxFrameNumNoBoost = gMaxAccumulatedFrameNum * surfaceHistoryConfidence; + float smbMaxFrameNumNoBoost = gMaxAccumulatedFrameNum; + smbMaxFrameNumNoBoost *= surfaceHistoryConfidence; + smbMaxFrameNumNoBoost = min( smbMaxFrameNumNoBoost, maxResponsiveFrameNum.x ); // Ensure that HistoryFix pass doesn't pop up without a disocclusion in critical cases float smbMaxFrameNum = max( smbMaxFrameNumNoBoost, gHistoryFixFrameNum * ( 1.0 - virtualHistoryConfidenceForSmbRelaxation ) ); // Virtual motion: max allowed frames - float responsiveAccumulationAmount = GetResponsiveAccumulationAmount( roughness ); - float smc = GetSpecMagicCurve( roughnessModified ); - responsiveAccumulationAmount = lerp( 1.0, smc, responsiveAccumulationAmount ); - - float vmbMaxFrameNum = gMaxAccumulatedFrameNum * responsiveAccumulationAmount; + float vmbMaxFrameNum = gMaxAccumulatedFrameNum; vmbMaxFrameNum *= virtualHistoryParallaxBasedConfidence; vmbMaxFrameNum *= virtualHistoryNormalBasedConfidence; + vmbMaxFrameNum = min( vmbMaxFrameNum, maxResponsiveFrameNum.y ); // Limit number of accumulated frames float smbSpecAccumSpeedNoBoost = min( smbSpecAccumSpeed, smbMaxFrameNumNoBoost ); @@ -725,6 +754,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : #elif( REBLUR_SHOW == REBLUR_SHOW_VIRTUAL_HISTORY_PARALLAX_CONFIDENCE ) virtualHistoryAmount = virtualHistoryParallaxBasedConfidence; #elif( REBLUR_SHOW == REBLUR_SHOW_HIT_DIST_FOR_TRACKING ) + float smc = GetSpecMagicCurve( roughness ); virtualHistoryAmount = hitDistForTracking * lerp( 1.0, 5.0, smc ) / ( 1.0 + hitDistForTracking * lerp( 1.0, 5.0, smc ) ); #endif #else diff --git a/Shaders/Include/REBLUR_TemporalStabilization.hlsli b/Shaders/Include/REBLUR_TemporalStabilization.hlsli index 0ab18a6a..b467981a 100644 --- a/Shaders/Include/REBLUR_TemporalStabilization.hlsli +++ b/Shaders/Include/REBLUR_TemporalStabilization.hlsli @@ -47,7 +47,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : return; // Early out - float viewZ = REBLUR_UnpackViewZ( gIn_ViewZ[ WithRectOrigin( pixelPos ) ] ); + float viewZ = UnpackViewZ( gIn_ViewZ[ WithRectOrigin( pixelPos ) ] ); if( viewZ > gDenoisingRange ) return; // IMPORTANT: no data output, must be rejected by the "viewZ" check! @@ -403,7 +403,11 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float specHistoryWeight = specTemporalAccumulationParams.x; specHistoryWeight *= specAntilag; // this is important specHistoryWeight *= specStabilizationStrength; - specHistoryWeight *= materialID == gStrandMaterialID ? 0.5 : 1.0; + + float responsiveFactor = RemapRoughnessToResponsiveFactor( roughness ); + float smc = GetSpecMagicCurve( roughness ); + float acceleration = lerp( smc, 1.0, 0.5 + responsiveFactor * 0.5 ); + specHistoryWeight *= materialID == gStrandMaterialID ? 0.5 : acceleration; specHistory = Color::Clamp( specM1, specSigma * specTemporalAccumulationParams.y, specHistory ); diff --git a/Shaders/Include/RELAX_AntiFirefly.hlsli b/Shaders/Include/RELAX_AntiFirefly.hlsli index 52d84d25..e1245526 100644 --- a/Shaders/Include/RELAX_AntiFirefly.hlsli +++ b/Shaders/Include/RELAX_AntiFirefly.hlsli @@ -170,7 +170,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo return; // Early out if linearZ is beyond denoising range - float centerViewZ = abs(gViewZ[pixelPos]); + float centerViewZ = UnpackViewZ(gViewZ[pixelPos]); if (centerViewZ > gDenoisingRange) return; diff --git a/Shaders/Include/RELAX_Atrous.hlsli b/Shaders/Include/RELAX_Atrous.hlsli index a6ec7c25..e66acdc6 100644 --- a/Shaders/Include/RELAX_Atrous.hlsli +++ b/Shaders/Include/RELAX_Atrous.hlsli @@ -17,7 +17,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId) return; // Early out if linearZ is beyond denoising range - float centerViewZ = abs(gViewZ[pixelPos]); + float centerViewZ = UnpackViewZ(gViewZ[pixelPos]); if (centerViewZ > gDenoisingRange) return; @@ -146,7 +146,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId) float4 sampleNormalRoughnes = NRD_FrontEnd_UnpackNormalAndRoughness(gNormalRoughness[p], sampleMaterialID); float3 sampleNormal = sampleNormalRoughnes.rgb; float sampleRoughness = sampleNormalRoughnes.a; - float sampleViewZ = abs(gViewZ[p]); + float sampleViewZ = UnpackViewZ(gViewZ[p]); // Calculating sample world position float3 sampleWorldPos = GetCurrentWorldPosFromPixelPos(p, sampleViewZ); diff --git a/Shaders/Include/RELAX_AtrousSmem.hlsli b/Shaders/Include/RELAX_AtrousSmem.hlsli index ea3146e4..3e286a19 100644 --- a/Shaders/Include/RELAX_AtrousSmem.hlsli +++ b/Shaders/Include/RELAX_AtrousSmem.hlsli @@ -103,7 +103,7 @@ void Preload(uint2 sharedPos, int2 globalPos) float materialID; sharedNormalRoughness[sharedPos.y][sharedPos.x] = NRD_FrontEnd_UnpackNormalAndRoughness(gNormalRoughness[globalPos], materialID); - float viewZ = abs(gViewZ[globalPos]); + float viewZ = UnpackViewZ(gViewZ[globalPos]); sharedWorldPosMaterialID[sharedPos.y][sharedPos.x] = float4(GetCurrentWorldPosFromPixelPos(globalPos, viewZ), materialID); } @@ -115,16 +115,14 @@ NRD_EXPORT void NRD_CS_MAIN(int2 pixelPos : SV_DispatchThreadId, uint2 threadPos float isSky = gTiles[pixelPos >> 4]; PRELOAD_INTO_SMEM_WITH_TILE_CHECK; - int2 sharedMemoryIndex = threadPos.xy + int2(BORDER, BORDER); - - float4 centerWorldPosMaterialID = sharedWorldPosMaterialID[sharedMemoryIndex.y][sharedMemoryIndex.x]; - float3 centerWorldPos = centerWorldPosMaterialID.xyz; - float centerMaterialID = centerWorldPosMaterialID.w; - float centerViewZ = abs(gViewZ[pixelPos]); - gOutViewZ[pixelPos] = centerViewZ; + // Prev ViewZ + float viewZpacked = gViewZ[pixelPos]; + gOutViewZ[pixelPos] = viewZpacked; - // Repacking normal and roughness to prev normal roughness to be used in the next frame + // Prev normal and roughness + int2 sharedMemoryIndex = threadPos.xy + int2(BORDER, BORDER); float4 normalRoughness = sharedNormalRoughness[sharedMemoryIndex.y][sharedMemoryIndex.x]; + float centerViewZ = UnpackViewZ(viewZpacked); if (centerViewZ > gDenoisingRange) { // Setting normal and roughness to close to zero for out of range pixels @@ -132,6 +130,10 @@ NRD_EXPORT void NRD_CS_MAIN(int2 pixelPos : SV_DispatchThreadId, uint2 threadPos } gOutNormalRoughness[pixelPos] = PackPrevNormalRoughness(normalRoughness); + float4 centerWorldPosMaterialID = sharedWorldPosMaterialID[sharedMemoryIndex.y][sharedMemoryIndex.x]; + float3 centerWorldPos = centerWorldPosMaterialID.xyz; + float centerMaterialID = centerWorldPosMaterialID.w; + #if( NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) gOutMaterialID[pixelPos] = centerMaterialID; #endif diff --git a/Shaders/Include/RELAX_Common.hlsli b/Shaders/Include/RELAX_Common.hlsli index 32c70f9f..cd33d2c0 100644 --- a/Shaders/Include/RELAX_Common.hlsli +++ b/Shaders/Include/RELAX_Common.hlsli @@ -144,7 +144,7 @@ float GetSpecularNormalWeight_ATrous(float2 params0, float3 n0, float3 n, float3 float GetNormalWeightParams(float roughness, float angleFraction = 0.75) { float angle = atan(GetSpecLobeTanHalfAngle(roughness, angleFraction)); - angle = 1.0 / max(angle, NRD_NORMAL_ULP); + angle = 1.0 / max(angle, RELAX_NORMAL_ULP); return angle; } diff --git a/Shaders/Include/RELAX_Config.hlsli b/Shaders/Include/RELAX_Config.hlsli index b81bdff3..88f82a5a 100644 --- a/Shaders/Include/RELAX_Config.hlsli +++ b/Shaders/Include/RELAX_Config.hlsli @@ -11,6 +11,9 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define RELAX // Settings +// IMPORTANT: if == 1, then for 0-roughness "GetEncodingAwareNormalWeight" can return values < 1 even for same normals due to data re-packing +#define RELAX_NORMAL_ULP ( 1.5 / 255.0 ) + #define RELAX_MAX_ACCUM_FRAME_NUM 255 #define RELAX_HIT_DIST_MIN_WEIGHT 0.2 // Sacrifices spatial fidelity to improve temporal stability. Should be set to 0 for relatively clean input signals like RTXDI and 0.1 .. 0.2 for lower quality input signals #define RELAX_ANTILAG_ACCELERATION_AMOUNT_SCALE 10.0 // Multiplier used to put RelaxAntilagSettings::accelerationAmount to convenient [0; 1] range diff --git a/Shaders/Include/RELAX_HistoryFix.hlsli b/Shaders/Include/RELAX_HistoryFix.hlsli index 1abd1371..b08fc3a8 100644 --- a/Shaders/Include/RELAX_HistoryFix.hlsli +++ b/Shaders/Include/RELAX_HistoryFix.hlsli @@ -31,7 +31,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId) // Early out if linearZ is beyond denoising range // Early out if no disocclusion detected - float centerViewZ = abs(gViewZ[pixelPos]); + float centerViewZ = UnpackViewZ(gViewZ[pixelPos]); float historyLength = 255.0 * gHistoryLength[pixelPos]; if ((centerViewZ > gDenoisingRange) || (historyLength > gHistoryFixFrameNum || gHistoryFixFrameNum == 1.0)) return; @@ -89,7 +89,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId) float sampleMaterialID; float3 sampleNormal = NRD_FrontEnd_UnpackNormalAndRoughness(gNormalRoughness[samplePosInt], sampleMaterialID).rgb; - float sampleViewZ = abs(gViewZ[samplePosInt]); + float sampleViewZ = UnpackViewZ(gViewZ[samplePosInt]); float3 sampleWorldPos = GetCurrentWorldPosFromPixelPos(samplePosInt, sampleViewZ); float geometryWeight = GetPlaneDistanceWeight_Atrous( centerWorldPos, diff --git a/Shaders/Include/RELAX_HitDistReconstruction.hlsli b/Shaders/Include/RELAX_HitDistReconstruction.hlsli index 4cccdd2e..18d16b6c 100644 --- a/Shaders/Include/RELAX_HitDistReconstruction.hlsli +++ b/Shaders/Include/RELAX_HitDistReconstruction.hlsli @@ -16,7 +16,7 @@ float GetNormalWeightParams(float nonLinearAccumSpeed, float fraction, float rou float angle = atan(GetSpecLobeTanHalfAngle(roughness)); angle *= lerp(saturate(fraction), 1.0, nonLinearAccumSpeed); // TODO: use as "percentOfVolume" instead? - return 1.0 / max(angle, NRD_NORMAL_ULP); + return 1.0 / max(angle, RELAX_NORMAL_ULP); } void Preload(uint2 sharedPos, int2 globalPos) @@ -25,7 +25,7 @@ void Preload(uint2 sharedPos, int2 globalPos) // It's ok that we don't use materialID in Hitdist reconstruction float4 normalRoughness = NRD_FrontEnd_UnpackNormalAndRoughness(gNormalRoughness[WithRectOrigin(globalPos)]); - float viewZ = abs(gViewZ[WithRectOrigin(globalPos)]); + float viewZ = UnpackViewZ(gViewZ[WithRectOrigin(globalPos)]); float2 hitDist = gDenoisingRange; #ifdef RELAX_SPECULAR diff --git a/Shaders/Include/RELAX_PrePass.hlsli b/Shaders/Include/RELAX_PrePass.hlsli index b088ffe5..42b9e3e2 100644 --- a/Shaders/Include/RELAX_PrePass.hlsli +++ b/Shaders/Include/RELAX_PrePass.hlsli @@ -20,7 +20,7 @@ NRD_EXPORT void NRD_CS_MAIN(int2 pixelPos : SV_DispatchThreadId, uint2 threadPos return; // Early out if linearZ is beyond denoising range - float centerViewZ = abs(gViewZ[WithRectOrigin(pixelPos)]); + float centerViewZ = UnpackViewZ(gViewZ[WithRectOrigin(pixelPos)]); if (centerViewZ > gDenoisingRange) return; @@ -42,8 +42,8 @@ NRD_EXPORT void NRD_CS_MAIN(int2 pixelPos : SV_DispatchThreadId, uint2 threadPos if (gSpecCheckerboard != 2) #endif { - float viewZ0 = abs(gViewZ[WithRectOrigin(checkerboardPos.xz)]); - float viewZ1 = abs(gViewZ[WithRectOrigin(checkerboardPos.yz)]); + float viewZ0 = UnpackViewZ(gViewZ[WithRectOrigin(checkerboardPos.xz)]); + float viewZ1 = UnpackViewZ(gViewZ[WithRectOrigin(checkerboardPos.yz)]); #if( NRD_NORMAL_ENCODING == NRD_NORMAL_ENCODING_R10G10B10A2_UNORM ) NRD_FrontEnd_UnpackNormalAndRoughness(gNormalRoughness[WithRectOrigin(checkerboardPos.xz)], materialID0); diff --git a/Shaders/Include/RELAX_TemporalAccumulation.hlsli b/Shaders/Include/RELAX_TemporalAccumulation.hlsli index e7f0303e..c61d651e 100644 --- a/Shaders/Include/RELAX_TemporalAccumulation.hlsli +++ b/Shaders/Include/RELAX_TemporalAccumulation.hlsli @@ -86,10 +86,10 @@ float loadSurfaceMotionBasedPrevData( float2 gatherOrigin10 = (float2(bilinearOrigin) + float2(2.0, 0.0)) * gResourceSizeInvPrev; float2 gatherOrigin01 = (float2(bilinearOrigin) + float2(0.0, 2.0)) * gResourceSizeInvPrev; float2 gatherOrigin11 = (float2(bilinearOrigin) + float2(2.0, 2.0)) * gResourceSizeInvPrev; - float4 prevViewZs00 = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin00).wzxy; - float4 prevViewZs10 = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin10).wzxy; - float4 prevViewZs01 = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin01).wzxy; - float4 prevViewZs11 = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin11).wzxy; + float4 prevViewZs00 = UnpackViewZ(gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin00).wzxy); + float4 prevViewZs10 = UnpackViewZ(gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin10).wzxy); + float4 prevViewZs01 = UnpackViewZ(gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin01).wzxy); + float4 prevViewZs11 = UnpackViewZ(gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin11).wzxy); float4 prevMaterialIDs00 = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin00).wzxy; float4 prevMaterialIDs10 = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin10).wzxy; float4 prevMaterialIDs01 = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin01).wzxy; @@ -274,7 +274,7 @@ float loadVirtualMotionBasedPrevData( vmbDisocclusionThreshold -= NRD_EPS; // Checking bilinear footprint only for virtual motion based specular reprojection - float4 prevViewZs = gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin).wzxy; + float4 prevViewZs = UnpackViewZ(gPrevViewZ.GatherRed(gNearestClamp, gatherOrigin).wzxy); float4 prevMaterialIDs = gPrevMaterialID.GatherRed(gNearestClamp, gatherOrigin).wzxy; float3 prevWorldPosInTap; float4 bilinearTapsValid; @@ -376,7 +376,7 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo return; // Early out if linearZ is beyond denoising range - float currentLinearZ = abs(gViewZ[WithRectOrigin(pixelPos)]); + float currentLinearZ = UnpackViewZ(gViewZ[WithRectOrigin(pixelPos)]); if (currentLinearZ > gDenoisingRange) return; @@ -698,16 +698,11 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo float edgeLenSq = Math::LengthSquared(edge); float curvature = dot(n - currentNormal, edge) * Math::PositiveRcp(edgeLenSq); - // Correction #1 - values below this threshold get turned into garbage due to numerical imprecision - float d = Math::ManhattanDistance(currentNormal, n); - float s = Math::LinearStep(NRD_NORMAL_ENCODING_ERROR, 2.0 * NRD_NORMAL_ENCODING_ERROR, d); - curvature *= s; - - // Correction #2 - this is needed if camera is "inside" a concave mirror ( tests 133, 164, 171 - 176 ) + // Correction #1 - this is needed if camera is "inside" a concave mirror ( tests 133, 164, 171 - 176 ) if (length(currentWorldPos) < -1.0 / curvature) curvature *= NoV; - // Correction #3 - very negative inconsistent with previous frame curvature blows up reprojection ( tests 164, 171 - 176 ) + // Correction #2 - very negative inconsistent with previous frame curvature blows up reprojection ( tests 164, 171 - 176 ) float2 uv1 = Geometry::GetScreenUv(gWorldToClipPrev, currentWorldPos - V * ApplyThinLensEquation(hitDist, curvature)); float2 uv2 = Geometry::GetScreenUv(gWorldToClipPrev, currentWorldPos); float a = length((uv1 - uv2) * gRectSize); @@ -775,9 +770,9 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo float curvatureAngle = atan(tanCurvature); // Normal weight for virtual motion based reprojection - float lobeHalfAngle = max(atan(GetSpecLobeTanHalfAngle(currentRoughnessModified)), NRD_NORMAL_ULP); + float lobeHalfAngle = max(atan(GetSpecLobeTanHalfAngle(currentRoughnessModified)), RELAX_NORMAL_ULP); float angle = lobeHalfAngle + curvatureAngle; - float normalWeight = GetEncodingAwareNormalWeight(currentNormal, prevNormalVMB, lobeHalfAngle, curvatureAngle); + float normalWeight = GetEncodingAwareNormalWeight(currentNormal, prevNormalVMB, lobeHalfAngle, curvatureAngle, RELAX_NORMAL_ULP); virtualHistoryAmount *= lerp(1.0 - saturate(uvDiffLengthInPixels), 1.0, normalWeight); // jitter friendly // Roughness weight for virtual motion based reprojection @@ -797,8 +792,8 @@ NRD_EXPORT void NRD_CS_MAIN(uint2 pixelPos : SV_DispatchThreadId, uint2 threadPo float4 backNormalRoughness2 = UnpackPrevNormalRoughness(gPrevNormalRoughness.SampleLevel(gLinearClamp, backUV2 * gResolutionScalePrev, 0)); backNormalRoughness1.rgb = Geometry::RotateVector(gWorldPrevToWorld, backNormalRoughness1.rgb); backNormalRoughness2.rgb = Geometry::RotateVector(gWorldPrevToWorld, backNormalRoughness2.rgb); - float prevPrevNormalWeight = IsInScreenNearest(backUV1) ? GetEncodingAwareNormalWeight(prevNormalVMB, backNormalRoughness1.rgb, lobeHalfAngle, curvatureAngle * 2.0) : 1.0; - prevPrevNormalWeight *= IsInScreenNearest(backUV2) ? GetEncodingAwareNormalWeight(prevNormalVMB, backNormalRoughness2.rgb, lobeHalfAngle, curvatureAngle * 3.0) : 1.0; + float prevPrevNormalWeight = IsInScreenNearest(backUV1) ? GetEncodingAwareNormalWeight(prevNormalVMB, backNormalRoughness1.rgb, lobeHalfAngle, curvatureAngle * 2.0, RELAX_NORMAL_ULP) : 1.0; + prevPrevNormalWeight *= IsInScreenNearest(backUV2) ? GetEncodingAwareNormalWeight(prevNormalVMB, backNormalRoughness2.rgb, lobeHalfAngle, curvatureAngle * 3.0, RELAX_NORMAL_ULP) : 1.0; virtualHistoryAmount *= 0.33 + 0.67 * prevPrevNormalWeight; specVMBConfidence *= 0.33 + 0.67 * prevPrevNormalWeight; // Taking in account roughness 1 and 2 frames back helps cleaning up surfaces wigh varying roughness diff --git a/Shaders/Include/SIGMA_Blur.hlsli b/Shaders/Include/SIGMA_Blur.hlsli index 98e36c9a..26369cbe 100644 --- a/Shaders/Include/SIGMA_Blur.hlsli +++ b/Shaders/Include/SIGMA_Blur.hlsli @@ -91,7 +91,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : float frustumSize = GetFrustumSize( gMinRectDimMulUnproject, gOrthoMode, viewZ ); float2 geometryWeightParams = GetGeometryWeightParams( gPlaneDistSensitivity, frustumSize, Xv, Nv, 1.0 ); - // Estimate penumbra size and filter shadow ( pass 1: dense 3x3 or 5x5 ) + // Estimate penumbra size and filter shadow ( dense ) float2 sum = 0; float penumbra = 0; SIGMA_TYPE result = 0; @@ -133,13 +133,13 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : } float2 ww = w; - ww.y *= !IsLit( penum ); + ww.y *= saturate( 1.0 - s.x ); // TODO: non linear? float penumInPixels = penum / unprojectZ; ww.y /= 1.0 + penumInPixels; // prefer smaller penumbra result += s * ww.x; - penumbra += penum * ww.y; + penumbra += ww.y == 0.0 ? 0.0 : penum * ww.y; sum += ww; } } @@ -182,7 +182,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Random rotation float4 rotator = GetBlurKernelRotation( SIGMA_ROTATOR_MODE, pixelPos, gRotator, gFrameIndex ); - // Estimate penumbra size and filter shadow ( pass 2: sparse 8-taps ) + // Estimate penumbra size and filter shadow ( sparse ) float invEstimatedPenumbra = 1.0 / max( penumbra, NRD_EPS ); [unroll] @@ -232,13 +232,13 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : // Accumulate float2 ww = w; - ww.y *= !IsLit( penum ); + ww.y *= saturate( 1.0 - s.x ); // TODO: non linear? float penumInPixels = penum / unprojectZ; ww.y /= 1.0 + penumInPixels; // prefer smaller penumbra result += s * ww.x; - penumbra += penum * ww.y; + penumbra += ww.y == 0.0 ? 0.0 : penum * ww.y; sum += ww; } diff --git a/Shaders/Include/SIGMA_Config.hlsli b/Shaders/Include/SIGMA_Config.hlsli index f5423987..528ec1ee 100644 --- a/Shaders/Include/SIGMA_Config.hlsli +++ b/Shaders/Include/SIGMA_Config.hlsli @@ -11,12 +11,13 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define SIGMA // Switches ( default 1 ) +#define SIGMA_USE_EARLY_OUT_IN_TS 1 // improves performance in regions with hard shadow #define SIGMA_USE_CATROM 1 // sharper reprojection #define SIGMA_5X5_TEMPORAL_KERNEL 1 // provides variance estimation in a wider radius #define SIGMA_5X5_BLUR_RADIUS_ESTIMATION_KERNEL 1 // helps to improve stability, but adds 10% of overhead // Switches ( default 0 ) -#define SIGMA_SHOW 0 // 1 - tiles, 2 - history weight +#define SIGMA_SHOW 0 // 1 - tiles, 2 - history weight, 3 - penumbra size in pixels #define SIGMA_SHOW_PENUMBRA_SIZE 0 // Settings @@ -25,13 +26,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define SIGMA_POISSON_SAMPLES g_Special8 #define SIGMA_MAX_PIXEL_RADIUS 32.0 #define SIGMA_TS_SIGMA_SCALE 3.0 -#define SIGMA_TS_MAX_HISTORY_WEIGHT 0.95 -#define SIGMA_TS_Z_FALLOFF 1.0 // exp2( -SIGMA_TS_Z_FALLOFF * dz ) -#define SIGMA_TS_MOTION_MAX_REUSE 0.11 -#define SIGMA_TS_EARLY_OUT_THRESHOLD 0.25 -#define SIGMA_ANTILAG_SIGMA_SCALE 0.25 -#define SIGMA_ANTILAG_POWER 1.0 -#define SIGMA_ANTILAG_EPS 0.05 +#define SIGMA_TS_MAX_HISTORY_WEIGHT 0.8 // 4 frames ( longer accumulation worsens shadows in motion, since there is no shadow MV ) +#define SIGMA_TS_ANTILAG_POWER 1.0 // Data type #ifdef SIGMA_TRANSLUCENT diff --git a/Shaders/Include/SIGMA_TemporalStabilization.hlsli b/Shaders/Include/SIGMA_TemporalStabilization.hlsli index 93d1c438..2c26b1bd 100644 --- a/Shaders/Include/SIGMA_TemporalStabilization.hlsli +++ b/Shaders/Include/SIGMA_TemporalStabilization.hlsli @@ -49,11 +49,12 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : if( viewZ > gDenoisingRange ) return; - // Early out - float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); - float penumbraInPixels = centerPenumbra / unprojectZ; + // Tile-based early out ( potentially ) + float2 pixelUv = float2( pixelPos + 0.5 ) * gRectSizeInv; + float tileValue = TextureCubic( gIn_Tiles, pixelUv * gResolutionScale ); + bool isHardShadow = ( ( tileValue == 0.0 && NRD_USE_TILE_CHECK ) || centerPenumbra == 0.0 ) && SIGMA_USE_EARLY_OUT_IN_TS; - if( penumbraInPixels <= SIGMA_TS_EARLY_OUT_THRESHOLD && SIGMA_SHOW == 0 ) + if( isHardShadow && SIGMA_SHOW == 0 ) { gOut_Shadow_Translucency[ pixelPos ] = PackShadow( s_Shadow_Translucency[ smemPos.y ][ smemPos.x ] ); @@ -88,7 +89,7 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : input = s; else { - w = exp2( -SIGMA_TS_Z_FALLOFF * abs( z - viewZ ) ); // soft Z test // TODO: use relative difference? + w = abs( z - viewZ ) / max( z, viewZ ) < 0.02; // TODO: slope scale? w *= IsLit( penum ) == IsLit( centerPenumbra ); // no-harm on a flat surface due to wide spatials, needed to prevent bleeding from one surface to another w *= float( z < gDenoisingRange ); // ignore sky w *= float( centerSignNoL == signNoL ); // ignore samples with different NoL signs @@ -113,7 +114,6 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : SIGMA_TYPE sigma = GetStdDev( m1, m2 ); // Compute previous pixel position - float2 pixelUv = ( float2( pixelPos ) + 0.5 ) * gRectSizeInv; float3 Xv = Geometry::ReconstructViewPosition( pixelUv, gFrustum, viewZnearest, gOrthoMode ); float3 X = Geometry::RotateVectorInverse( gWorldToView, Xv ); float3 mv = gIn_Mv[ WithRectOrigin( pixelPos ) + offseti - BORDER ] * gMvScale.xyz; @@ -129,52 +129,51 @@ NRD_EXPORT void NRD_CS_MAIN( int2 threadPos : SV_GroupThreadId, int2 pixelPos : history = saturate( history ); history = SIGMA_BackEnd_UnpackShadow( history ); - // Antilag - float fast = m1.x; - float slow = history.x; - - float a = abs( slow - fast ) - SIGMA_ANTILAG_SIGMA_SCALE * sigma.x - SIGMA_ANTILAG_EPS; - float b = max( slow, fast ) + SIGMA_ANTILAG_SIGMA_SCALE * sigma.x + SIGMA_ANTILAG_EPS; - float antilag = a / b; - - antilag = Math::SmoothStep01( 1.0 - antilag ); - antilag = Math::Pow01( antilag, SIGMA_ANTILAG_POWER ); - // Clamp history SIGMA_TYPE inputMin = m1 - sigma * SIGMA_TS_SIGMA_SCALE; SIGMA_TYPE inputMax = m1 + sigma * SIGMA_TS_SIGMA_SCALE; SIGMA_TYPE historyClamped = clamp( history, inputMin, inputMax ); + // Antilag + float antilag = abs( historyClamped.x - history.x ); + antilag = Math::Pow01( antilag, SIGMA_TS_ANTILAG_POWER ); + antilag = 1.0 - antilag; + + // Dark magic ( helps to smooth out "penumbra to 1" regions ) + historyClamped = lerp( historyClamped, history, 0.5 ); + // History weight float historyWeight = SIGMA_TS_MAX_HISTORY_WEIGHT; historyWeight *= IsInScreenNearest( pixelUvPrev ); historyWeight *= antilag; - historyWeight *= Math::SmoothStep( SIGMA_TS_EARLY_OUT_THRESHOLD, 1.0, penumbraInPixels ); historyWeight *= gStabilizationStrength; // Combine with current frame SIGMA_TYPE result = lerp( input, historyClamped, historyWeight ); // Debug - #if( SIGMA_SHOW != 0 ) - #if( SIGMA_SHOW == 1 ) - float tileValue = gIn_Tiles[ pixelPos >> 4 ].x; - tileValue = float( tileValue != 0.0 ); // optional, just to show fully discarded tiles - - #ifdef SIGMA_TRANSLUCENT - result = lerp( float4( 0, 0, 1, 0 ), result, tileValue ); - #else - result = tileValue; - #endif - - // Show grid ( works badly with TAA ) - result *= all( ( pixelPos & 15 ) != 0 ); - #elif( SIGMA_SHOW == 2 ) - // .x - is used in antilag computations! - #ifdef SIGMA_TRANSLUCENT - result.yzw = SIGMA_BackEnd_UnpackShadow( historyWeight ); - #endif + #if( SIGMA_SHOW == 1 ) + tileValue = gIn_Tiles[ pixelPos >> 4 ].x; + tileValue = float( tileValue != 0.0 ); // optional, just to show fully discarded tiles + + #ifdef SIGMA_TRANSLUCENT + result = lerp( float4( 0, 0, 1, 0 ), result, tileValue ); + #else + result = tileValue; + #endif + + // Show grid ( works badly with TAA ) + result *= all( ( pixelPos & 15 ) != 0 ); + #elif( SIGMA_SHOW == 2 ) + // .x - is used in antilag computations! + #ifdef SIGMA_TRANSLUCENT + historyWeight *= float( !isHardShadow ); + result.yzw = historyWeight; #endif + #elif( SIGMA_SHOW == 3 ) + float unprojectZ = PixelRadiusToWorld( gUnproject, gOrthoMode, 1.0, viewZ ); + float penumbraInPixels = centerPenumbra / unprojectZ; + result = saturate( penumbraInPixels / 10.0 ); #endif // Output diff --git a/Source/Reblur.cpp b/Source/Reblur.cpp index 1a4795cc..6c479139 100644 --- a/Source/Reblur.cpp +++ b/Source/Reblur.cpp @@ -46,9 +46,20 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. #define REBLUR_FORMAT_DIRECTIONAL_OCCLUSION_FAST_HISTORY REBLUR_FORMAT_OCCLUSION_FAST_HISTORY #define REBLUR_FORMAT_PREV_VIEWZ Format::R32_SFLOAT -#define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::RGBA8_UNORM #define REBLUR_FORMAT_PREV_INTERNAL_DATA Format::R16_UINT +#if (NRD_NORMAL_ENCODING == 0) + #define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::RGBA8_UNORM +#elif (NRD_NORMAL_ENCODING == 1) + #define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::RGBA8_SNORM +#elif (NRD_NORMAL_ENCODING == 2) + #define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::R10_G10_B10_A2_UNORM +#elif (NRD_NORMAL_ENCODING == 3) + #define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::RGBA16_UNORM +#elif (NRD_NORMAL_ENCODING == 4) + #define REBLUR_FORMAT_PREV_NORMAL_ROUGHNESS Format::RGBA16_SFLOAT +#endif + #define REBLUR_FORMAT_HITDIST_FOR_TRACKING Format::R16_SFLOAT // Other diff --git a/Source/Wrapper.cpp b/Source/Wrapper.cpp index edffa441..a46a6c81 100644 --- a/Source/Wrapper.cpp +++ b/Source/Wrapper.cpp @@ -17,6 +17,8 @@ license agreement from NVIDIA CORPORATION is strictly prohibited. static_assert(VERSION_MAJOR == NRD_VERSION_MAJOR, "VERSION_MAJOR & NRD_VERSION_MAJOR don't match!"); static_assert(VERSION_MINOR == NRD_VERSION_MINOR, "VERSION_MINOR & NRD_VERSION_MINOR don't match!"); static_assert(VERSION_BUILD == NRD_VERSION_BUILD, "VERSION_BUILD & NRD_VERSION_BUILD don't match!"); +static_assert(NRD_NORMAL_ENCODING >= 0 && NRD_NORMAL_ENCODING < (uint32_t)nrd::NormalEncoding::MAX_NUM, "NRD_NORMAL_ENCODING out of bounds!"); +static_assert(NRD_ROUGHNESS_ENCODING >= 0 && NRD_ROUGHNESS_ENCODING < (uint32_t)nrd::RoughnessEncoding::MAX_NUM, "NRD_ROUGHNESS_ENCODING out of bounds!"); constexpr std::array g_NrdSupportedDenoisers = { diff --git a/UPDATE.md b/UPDATE.md index 2c180eb3..5a474d06 100644 --- a/UPDATE.md +++ b/UPDATE.md @@ -259,3 +259,12 @@ A single NRD instance can now include any combination of denoisers, including re - exposed `CommonSettings::strandThickness`, defining how NRD adapts to sub-pixel thick details. It works in conjunction with `CommonSettings::disocclusionThresholdAlternate` for `CommonSettings::strandMaterialID` without a need to provide `IN_DISOCCLUSION_THRESHOLD_MIX` texture - *REBLUR*: - changed usage of `maxBlurRadius` and its default value, old values should be scaled by `2` + +## To v4.10 + +- *NRD INTEGRATION*: + - `Nrd` prefix replaced with `nrd::` namespace + - removed `format` from `NrdIntegrationTexture`, which is now just `nri::TextureBarrierDesc` + - removed the constructor with arguments + - creation parameters grouped into `IntegrationCreationDesc`, which need to be passed to `Initialize` + - added a few potentially useful flags to `IntegrationCreationDesc`