-
Notifications
You must be signed in to change notification settings - Fork 62
/
Copy pathPreprocessEnvironmentMap.hlsl
161 lines (125 loc) · 5.79 KB
/
PreprocessEnvironmentMap.hlsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
/***************************************************************************
# Copyright (c) 2021-2023, NVIDIA CORPORATION. All rights reserved.
#
# NVIDIA CORPORATION and its licensors retain all intellectual property
# and proprietary rights in and to this software, related documentation
# and any modifications thereto. Any use, reproduction, disclosure or
# distribution of this software and related documentation without an express
# license agreement from NVIDIA CORPORATION is strictly prohibited.
**************************************************************************/
#include "ShaderParameters.h"
#include "HelperFunctions.hlsli"
#include <Rtxdi/Utils/Math.hlsli>
#include <donut/shaders/binding_helpers.hlsli>
RWTexture2D<float> u_IntegratedMips[] : register(u0);
VK_PUSH_CONSTANT ConstantBuffer<PreprocessEnvironmentMapConstants> g_Const : register(b0);
#if INPUT_ENVIRONMENT_MAP
Texture2D<float4> t_EnvironmentMap : register(t0);
float getPixelWeight(uint2 position)
{
float3 color = t_EnvironmentMap[position].rgb;
float luma = max(calcLuminance(color), 0);
// Do not sample invalid colors.
if (isinf(luma) || isnan(luma))
return 0;
// Compute the solid angle of the pixel assuming equirectangular projection.
// We don't need the absolute value of the solid angle here, just one at the same scale as the other pixels.
float elevation = ((float(position.y) + 0.5) / float(g_Const.sourceSize.y) - 0.5) * c_pi;
float relativeSolidAngle = cos(elevation);
const float maxWeight = 65504.0; // maximum value that can be encoded in a float16 texture
return clamp(luma * relativeSolidAngle, 0, maxWeight);
}
#endif
groupshared float s_weights[16];
// Warning: do not change the group size. The algorithm is hardcoded to process 16x16 tiles.
[numthreads(256, 1, 1)]
void main(uint2 GroupIndex : SV_GroupID, uint ThreadIndex : SV_GroupThreadID)
{
uint2 LocalIndex = RTXDI_LinearIndexToZCurve(ThreadIndex);
uint2 GlobalIndex = (GroupIndex * 16) + LocalIndex;
// Step 0: Load a 2x2 quad of pixels from the source texture or the source mip level.
float4 sourceWeights;
#if INPUT_ENVIRONMENT_MAP
if (g_Const.sourceMipLevel == 0)
{
uint2 sourcePos = GlobalIndex.xy * 2;
sourceWeights.x = getPixelWeight(sourcePos + int2(0, 0));
sourceWeights.y = getPixelWeight(sourcePos + int2(0, 1));
sourceWeights.z = getPixelWeight(sourcePos + int2(1, 0));
sourceWeights.w = getPixelWeight(sourcePos + int2(1, 1));
RWTexture2D<float> dest = u_IntegratedMips[0];
dest[sourcePos + int2(0, 0)] = sourceWeights.x;
dest[sourcePos + int2(0, 1)] = sourceWeights.y;
dest[sourcePos + int2(1, 0)] = sourceWeights.z;
dest[sourcePos + int2(1, 1)] = sourceWeights.w;
}
else
#endif
{
uint2 sourcePos = GlobalIndex.xy * 2;
RWTexture2D<float> src = u_IntegratedMips[g_Const.sourceMipLevel];
sourceWeights.x = src[sourcePos + int2(0, 0)];
sourceWeights.y = src[sourcePos + int2(0, 1)];
sourceWeights.z = src[sourcePos + int2(1, 0)];
sourceWeights.w = src[sourcePos + int2(1, 1)];
}
uint mipLevelsToWrite = g_Const.numDestMipLevels - g_Const.sourceMipLevel - 1;
if (mipLevelsToWrite < 1) return;
// Average those weights and write out the first mip.
float weight = (sourceWeights.x + sourceWeights.y + sourceWeights.z + sourceWeights.w) * 0.25;
u_IntegratedMips[g_Const.sourceMipLevel + 1][GlobalIndex.xy] = weight;
if (mipLevelsToWrite < 2) return;
// The following sequence is an optimized hierarchical downsampling algorithm using wave ops.
// It assumes that the wave size is at least 16 lanes, which is true for both NV and AMD GPUs.
// It also assumes that the threads are laid out in the group using the Z-curve pattern.
// Step 1: Average 2x2 groups of pixels.
uint lane = WaveGetLaneIndex();
weight = (weight
+ WaveReadLaneAt(weight, lane + 1)
+ WaveReadLaneAt(weight, lane + 2)
+ WaveReadLaneAt(weight, lane + 3)) * 0.25;
if ((lane & 3) == 0)
{
u_IntegratedMips[g_Const.sourceMipLevel + 2][GlobalIndex.xy >> 1] = weight;
}
if (mipLevelsToWrite < 3) return;
// Step 2: Average the previous results from 2 pixels away.
weight = (weight
+ WaveReadLaneAt(weight, lane + 4)
+ WaveReadLaneAt(weight, lane + 8)
+ WaveReadLaneAt(weight, lane + 12)) * 0.25;
if ((lane & 15) == 0)
{
u_IntegratedMips[g_Const.sourceMipLevel + 3][GlobalIndex.xy >> 2] = weight;
// Store the intermediate result into shared memory.
s_weights[ThreadIndex >> 4] = weight;
}
if (mipLevelsToWrite < 4) return;
GroupMemoryBarrierWithGroupSync();
// The rest operates on a 4x4 group of values for the entire thread group
if (ThreadIndex >= 16)
return;
// Load the intermediate results
weight = s_weights[ThreadIndex];
// Change the output texture addressing because we'll be only writing a 2x2 block of pixels
GlobalIndex = (GroupIndex * 2) + (LocalIndex >> 1);
// Step 3: Average the previous results from adjacent threads, meaning from 4 pixels away.
weight = (weight
+ WaveReadLaneAt(weight, lane + 1)
+ WaveReadLaneAt(weight, lane + 2)
+ WaveReadLaneAt(weight, lane + 3)) * 0.25;
if ((lane & 3) == 0)
{
u_IntegratedMips[g_Const.sourceMipLevel + 4][GlobalIndex.xy] = weight;
}
if (mipLevelsToWrite < 5) return;
// Step 4: Average the previous results from 8 pixels away.
weight = (weight
+ WaveReadLaneAt(weight, lane + 4)
+ WaveReadLaneAt(weight, lane + 8)
+ WaveReadLaneAt(weight, lane + 12)) * 0.25;
if (lane == 0)
{
u_IntegratedMips[g_Const.sourceMipLevel + 5][GlobalIndex.xy >> 1] = weight;
}
}