MultiScaleVORender.compute 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295
  1. //
  2. // This is a modified version of the SSAO renderer from Microsoft's MiniEngine
  3. // library. The copyright notice from the original version is included below.
  4. //
  5. // The original source code of MiniEngine is available on GitHub.
  6. // https://github.com/Microsoft/DirectX-Graphics-Samples
  7. //
  8. //
  9. // Copyright (c) Microsoft. All rights reserved.
  10. // This code is licensed under the MIT License (MIT).
  11. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  12. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  13. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  14. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  15. //
  16. // Developed by Minigraph
  17. //
  18. // Author: James Stanard
  19. //
  20. #pragma warning(disable : 3568)
  21. #pragma exclude_renderers gles gles3 d3d11_9x
  22. #pragma kernel MultiScaleVORender MAIN=MultiScaleVORender
  23. #pragma kernel MultiScaleVORender_interleaved MAIN=MultiScaleVORender_interleaved INTERLEAVE_RESULT
  24. #pragma kernel MultiScaleVORender_MSAA MAIN=MultiScaleVORender_MSAA MSAA
  25. #pragma kernel MultiScaleVORender_MSAA_interleaved MAIN=MultiScaleVORender_MSAA_interleaved MSAA INTERLEAVE_RESULT
  26. #include "../StdLib.hlsl"
  27. #ifndef INTERLEAVE_RESULT
  28. #define WIDE_SAMPLING 1
  29. #endif
  30. #if WIDE_SAMPLING
  31. // 32x32 cache size: the 16x16 in the center forms the area of focus with the 8-pixel perimeter used for wide gathering.
  32. #define TILE_DIM 32
  33. #define THREAD_COUNT_X 16
  34. #define THREAD_COUNT_Y 16
  35. #else
  36. // 16x16 cache size: the 8x8 in the center forms the area of focus with the 4-pixel perimeter used for gathering.
  37. #define TILE_DIM 16
  38. #define THREAD_COUNT_X 8
  39. #define THREAD_COUNT_Y 8
  40. #endif
  41. #ifdef MSAA
  42. // Input Textures
  43. #ifdef INTERLEAVE_RESULT
  44. Texture2DArray<float2> DepthTex;
  45. #else
  46. Texture2D<float2> DepthTex;
  47. #endif
  48. // Output texture
  49. RWTexture2D<float2> Occlusion;
  50. // Shared memory
  51. groupshared float2 DepthSamples[TILE_DIM * TILE_DIM];
  52. #else
  53. // Input Textures
  54. #ifdef INTERLEAVE_RESULT
  55. Texture2DArray<float> DepthTex;
  56. #else
  57. Texture2D<float> DepthTex;
  58. #endif
  59. // Output texture
  60. RWTexture2D<float> Occlusion;
  61. // Shared memory
  62. groupshared float DepthSamples[TILE_DIM * TILE_DIM];
  63. #endif
  64. SamplerState samplerDepthTex;
  65. CBUFFER_START(CB1)
  66. float4 gInvThicknessTable[3];
  67. float4 gSampleWeightTable[3];
  68. float4 gInvSliceDimension;
  69. float2 AdditionalParams;
  70. CBUFFER_END
  71. #define gRejectFadeoff AdditionalParams.x
  72. #define gIntensity AdditionalParams.y
  73. #ifdef MSAA
  74. float2 TestSamplePair(float frontDepth, float2 invRange, uint base, int offset)
  75. {
  76. // "Disocclusion" measures the penetration distance of the depth sample within the sphere.
  77. // Disocclusion < 0 (full occlusion) -> the sample fell in front of the sphere
  78. // Disocclusion > 1 (no occlusion) -> the sample fell behind the sphere
  79. float2 disocclusion1 = DepthSamples[base + offset] * invRange - frontDepth;
  80. float2 disocclusion2 = DepthSamples[base - offset] * invRange - frontDepth;
  81. float2 pseudoDisocclusion1 = saturate(gRejectFadeoff * disocclusion1);
  82. float2 pseudoDisocclusion2 = saturate(gRejectFadeoff * disocclusion2);
  83. return saturate(
  84. clamp(disocclusion1, pseudoDisocclusion2, 1.0) +
  85. clamp(disocclusion2, pseudoDisocclusion1, 1.0) -
  86. pseudoDisocclusion1 * pseudoDisocclusion2);
  87. }
  88. float2 TestSamples(uint centerIdx, uint x, uint y, float2 invDepth, float invThickness)
  89. {
  90. #if WIDE_SAMPLING
  91. x <<= 1;
  92. y <<= 1;
  93. #endif
  94. float2 invRange = invThickness * invDepth;
  95. float frontDepth = invThickness - 0.5;
  96. if (y == 0)
  97. {
  98. // Axial
  99. return 0.5 * (
  100. TestSamplePair(frontDepth, invRange, centerIdx, x) +
  101. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM)
  102. );
  103. }
  104. else if (x == y)
  105. {
  106. // Diagonal
  107. return 0.5 * (
  108. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM - x) +
  109. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM + x)
  110. );
  111. }
  112. else
  113. {
  114. // L-Shaped
  115. return 0.25 * (
  116. TestSamplePair(frontDepth, invRange, centerIdx, y * TILE_DIM + x) +
  117. TestSamplePair(frontDepth, invRange, centerIdx, y * TILE_DIM - x) +
  118. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM + y) +
  119. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM - y)
  120. );
  121. }
  122. }
  123. #else
  124. float TestSamplePair(float frontDepth, float invRange, uint base, int offset)
  125. {
  126. // "Disocclusion" measures the penetration distance of the depth sample within the sphere.
  127. // Disocclusion < 0 (full occlusion) -> the sample fell in front of the sphere
  128. // Disocclusion > 1 (no occlusion) -> the sample fell behind the sphere
  129. float disocclusion1 = DepthSamples[base + offset] * invRange - frontDepth;
  130. float disocclusion2 = DepthSamples[base - offset] * invRange - frontDepth;
  131. float pseudoDisocclusion1 = saturate(gRejectFadeoff * disocclusion1);
  132. float pseudoDisocclusion2 = saturate(gRejectFadeoff * disocclusion2);
  133. return saturate(
  134. clamp(disocclusion1, pseudoDisocclusion2, 1.0) +
  135. clamp(disocclusion2, pseudoDisocclusion1, 1.0) -
  136. pseudoDisocclusion1 * pseudoDisocclusion2);
  137. }
  138. float TestSamples(uint centerIdx, uint x, uint y, float invDepth, float invThickness)
  139. {
  140. #if WIDE_SAMPLING
  141. x <<= 1;
  142. y <<= 1;
  143. #endif
  144. float invRange = invThickness * invDepth;
  145. float frontDepth = invThickness - 0.5;
  146. if (y == 0)
  147. {
  148. // Axial
  149. return 0.5 * (
  150. TestSamplePair(frontDepth, invRange, centerIdx, x) +
  151. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM)
  152. );
  153. }
  154. else if (x == y)
  155. {
  156. // Diagonal
  157. return 0.5 * (
  158. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM - x) +
  159. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM + x)
  160. );
  161. }
  162. else
  163. {
  164. // L-Shaped
  165. return 0.25 * (
  166. TestSamplePair(frontDepth, invRange, centerIdx, y * TILE_DIM + x) +
  167. TestSamplePair(frontDepth, invRange, centerIdx, y * TILE_DIM - x) +
  168. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM + y) +
  169. TestSamplePair(frontDepth, invRange, centerIdx, x * TILE_DIM - y)
  170. );
  171. }
  172. }
  173. #endif
  174. #ifdef DISABLE_COMPUTE_SHADERS
  175. TRIVIAL_COMPUTE_KERNEL(MAIN)
  176. #else
  177. [numthreads(THREAD_COUNT_X, THREAD_COUNT_Y, 1)]
  178. void MAIN(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
  179. {
  180. #if WIDE_SAMPLING
  181. float2 QuadCenterUV = int2(DTid.xy + GTid.xy - 7) * gInvSliceDimension.xy;
  182. #else
  183. float2 QuadCenterUV = int2(DTid.xy + GTid.xy - 3) * gInvSliceDimension.xy;
  184. #endif
  185. #ifdef MSAA
  186. // Fetch four depths and store them in LDS
  187. #ifdef INTERLEAVE_RESULT
  188. float4 depths0 = DepthTex.GatherRed(samplerDepthTex, float3(QuadCenterUV, DTid.z));
  189. float4 depths1 = DepthTex.GatherGreen(samplerDepthTex, float3(QuadCenterUV, DTid.z));
  190. #else
  191. float4 depths0 = DepthTex.GatherRed(samplerDepthTex, QuadCenterUV);
  192. float4 depths1 = DepthTex.GatherGreen(samplerDepthTex, QuadCenterUV);
  193. #endif
  194. int destIdx = GTid.x * 2 + GTid.y * 2 * TILE_DIM;
  195. DepthSamples[destIdx] = float2(depths0.w, depths1.w);
  196. DepthSamples[destIdx + 1] = float2(depths0.z, depths1.z);
  197. DepthSamples[destIdx + TILE_DIM] = float2(depths0.x, depths1.x);
  198. DepthSamples[destIdx + TILE_DIM + 1] = float2(depths0.y, depths1.y);
  199. #else
  200. #ifdef INTERLEAVE_RESULT
  201. float4 depths = DepthTex.Gather(samplerDepthTex, float3(QuadCenterUV, DTid.z));
  202. #else
  203. float4 depths = DepthTex.Gather(samplerDepthTex, QuadCenterUV);
  204. #endif
  205. int destIdx = GTid.x * 2 + GTid.y * 2 * TILE_DIM;
  206. DepthSamples[destIdx] = depths.w;
  207. DepthSamples[destIdx + 1] = depths.z;
  208. DepthSamples[destIdx + TILE_DIM] = depths.x;
  209. DepthSamples[destIdx + TILE_DIM + 1] = depths.y;
  210. #endif
  211. GroupMemoryBarrierWithGroupSync();
  212. #if WIDE_SAMPLING
  213. uint thisIdx = GTid.x + GTid.y * TILE_DIM + 8 * TILE_DIM + 8;
  214. #else
  215. uint thisIdx = GTid.x + GTid.y * TILE_DIM + 4 * TILE_DIM + 4;
  216. #endif
  217. #ifdef MSAA
  218. const float2 invThisDepth = float2(1.0 / DepthSamples[thisIdx].x, 1.0 / DepthSamples[thisIdx].y);
  219. float2 ao = 0.0;
  220. #else
  221. const float invThisDepth = 1.0 / DepthSamples[thisIdx];
  222. float ao = 0.0;
  223. #endif
  224. //#define SAMPLE_EXHAUSTIVELY
  225. #ifdef SAMPLE_EXHAUSTIVELY
  226. // 68 samples: sample all cells in *within* a circular radius of 5
  227. ao += gSampleWeightTable[0].x * TestSamples(thisIdx, 1, 0, invThisDepth, gInvThicknessTable[0].x);
  228. ao += gSampleWeightTable[0].y * TestSamples(thisIdx, 2, 0, invThisDepth, gInvThicknessTable[0].y);
  229. ao += gSampleWeightTable[0].z * TestSamples(thisIdx, 3, 0, invThisDepth, gInvThicknessTable[0].z);
  230. ao += gSampleWeightTable[0].w * TestSamples(thisIdx, 4, 0, invThisDepth, gInvThicknessTable[0].w);
  231. ao += gSampleWeightTable[1].x * TestSamples(thisIdx, 1, 1, invThisDepth, gInvThicknessTable[1].x);
  232. ao += gSampleWeightTable[2].x * TestSamples(thisIdx, 2, 2, invThisDepth, gInvThicknessTable[2].x);
  233. ao += gSampleWeightTable[2].w * TestSamples(thisIdx, 3, 3, invThisDepth, gInvThicknessTable[2].w);
  234. ao += gSampleWeightTable[1].y * TestSamples(thisIdx, 1, 2, invThisDepth, gInvThicknessTable[1].y);
  235. ao += gSampleWeightTable[1].z * TestSamples(thisIdx, 1, 3, invThisDepth, gInvThicknessTable[1].z);
  236. ao += gSampleWeightTable[1].w * TestSamples(thisIdx, 1, 4, invThisDepth, gInvThicknessTable[1].w);
  237. ao += gSampleWeightTable[2].y * TestSamples(thisIdx, 2, 3, invThisDepth, gInvThicknessTable[2].y);
  238. ao += gSampleWeightTable[2].z * TestSamples(thisIdx, 2, 4, invThisDepth, gInvThicknessTable[2].z);
  239. #else // SAMPLE_CHECKER
  240. // 36 samples: sample every-other cell in a checker board pattern
  241. ao += gSampleWeightTable[0].y * TestSamples(thisIdx, 2, 0, invThisDepth, gInvThicknessTable[0].y);
  242. ao += gSampleWeightTable[0].w * TestSamples(thisIdx, 4, 0, invThisDepth, gInvThicknessTable[0].w);
  243. ao += gSampleWeightTable[1].x * TestSamples(thisIdx, 1, 1, invThisDepth, gInvThicknessTable[1].x);
  244. ao += gSampleWeightTable[2].x * TestSamples(thisIdx, 2, 2, invThisDepth, gInvThicknessTable[2].x);
  245. ao += gSampleWeightTable[2].w * TestSamples(thisIdx, 3, 3, invThisDepth, gInvThicknessTable[2].w);
  246. ao += gSampleWeightTable[1].z * TestSamples(thisIdx, 1, 3, invThisDepth, gInvThicknessTable[1].z);
  247. ao += gSampleWeightTable[2].z * TestSamples(thisIdx, 2, 4, invThisDepth, gInvThicknessTable[2].z);
  248. #endif
  249. #ifdef INTERLEAVE_RESULT
  250. uint2 OutPixel = DTid.xy << 2 | uint2(DTid.z & 3, DTid.z >> 2);
  251. #else
  252. uint2 OutPixel = DTid.xy;
  253. #endif
  254. Occlusion[OutPixel] = lerp(1, ao, gIntensity);
  255. }
  256. #endif // DISABLE_COMPUTE_SHADERS