MultiScaleVOUpsample.compute 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432
  1. //
  2. // This is a modified version of the SSAO renderer from Microsoft's MiniEngine
  3. // library. The copyright notice from the original version is included below.
  4. //
  5. // The original source code of MiniEngine is available on GitHub.
  6. // https://github.com/Microsoft/DirectX-Graphics-Samples
  7. //
  8. //
  9. // Copyright (c) Microsoft. All rights reserved.
  10. // This code is licensed under the MIT License (MIT).
  11. // THIS CODE IS PROVIDED *AS IS* WITHOUT WARRANTY OF
  12. // ANY KIND, EITHER EXPRESS OR IMPLIED, INCLUDING ANY
  13. // IMPLIED WARRANTIES OF FITNESS FOR A PARTICULAR
  14. // PURPOSE, MERCHANTABILITY, OR NON-INFRINGEMENT.
  15. //
  16. // Developed by Minigraph
  17. //
  18. // Author: James Stanard
  19. //
  20. #pragma warning(disable : 3568)
  21. #pragma exclude_renderers gles gles3 d3d11_9x
  22. #pragma kernel MultiScaleVOUpSample MAIN=MultiScaleVOUpSample
  23. #pragma kernel MultiScaleVOUpSample_invert MAIN=MultiScaleVOUpSample_invert INVERT
  24. #pragma kernel MultiScaleVOUpSample_premin MAIN=MultiScaleVOUpSample_premin COMBINE_LOWER_RESOLUTIONS
  25. #pragma kernel MultiScaleVOUpSample_blendout MAIN=MultiScaleVOUpSample_blendout BLEND_WITH_HIGHER_RESOLUTION
  26. #pragma kernel MultiScaleVOUpSample_premin_blendout MAIN=MultiScaleVOUpSample_premin_blendout COMBINE_LOWER_RESOLUTIONS BLEND_WITH_HIGHER_RESOLUTION
  27. #pragma kernel MultiScaleVOUpSample_MSAA MAIN=MultiScaleVOUpSample_MSAA
  28. #pragma kernel MultiScaleVOUpSample_MSAA_invert MAIN=MultiScaleVOUpSample_MSAA_invert MSAA INVERT
  29. #pragma kernel MultiScaleVOUpSample_MSAA_premin MAIN=MultiScaleVOUpSample_MSAA_premin MSAA COMBINE_LOWER_RESOLUTIONS
  30. #pragma kernel MultiScaleVOUpSample_MSAA_blendout MAIN=MultiScaleVOUpSample_MSAA_blendout MSAA BLEND_WITH_HIGHER_RESOLUTION
  31. #pragma kernel MultiScaleVOUpSample_MSAA_premin_blendout MAIN=MultiScaleVOUpSample_MSAA_premin_blendout MSAA COMBINE_LOWER_RESOLUTIONS BLEND_WITH_HIGHER_RESOLUTION
  32. #include "../StdLib.hlsl"
  33. #ifdef MSAA
  34. Texture2D<float2> LoResDB; SamplerState samplerLoResDB;
  35. Texture2D<float2> HiResDB; SamplerState samplerHiResDB;
  36. Texture2D<float2> LoResAO1; SamplerState samplerLoResAO1;
  37. #ifdef COMBINE_LOWER_RESOLUTIONS
  38. Texture2D<float2> LoResAO2; SamplerState samplerLoResAO2;
  39. #endif
  40. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  41. Texture2D<float2> HiResAO; SamplerState samplerHiResAO;
  42. #endif
  43. // Output textures
  44. RWTexture2D<float2> AoResult;
  45. // Shared memory
  46. groupshared float2 DepthCache[256];
  47. groupshared float2 AOCache1[256];
  48. groupshared float2 AOCache2[256];
  49. #else
  50. // Input textures
  51. Texture2D<float> LoResDB; SamplerState samplerLoResDB;
  52. Texture2D<float> HiResDB; SamplerState samplerHiResDB;
  53. Texture2D<float> LoResAO1; SamplerState samplerLoResAO1;
  54. #ifdef COMBINE_LOWER_RESOLUTIONS
  55. Texture2D<float> LoResAO2; SamplerState samplerLoResAO2;
  56. #endif
  57. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  58. Texture2D<float> HiResAO; SamplerState samplerHiResAO;
  59. #endif
  60. // Ouput textures
  61. RWTexture2D<float> AoResult;
  62. // Shared memory
  63. groupshared float DepthCache[256];
  64. groupshared float AOCache1[256];
  65. groupshared float AOCache2[256];
  66. #endif
  67. CBUFFER_START(CB1)
  68. float4 InvLowResolution;
  69. float4 InvHighResolution;
  70. float4 AdditionalParams;
  71. CBUFFER_END
  72. #define NoiseFilterStrength AdditionalParams.x
  73. #define StepSize AdditionalParams.y
  74. #define kBlurTolerance AdditionalParams.z
  75. #define kUpsampleTolerance AdditionalParams.w
  76. void PrefetchData(uint index, float2 uv)
  77. {
  78. #ifdef MSAA
  79. float4 AO1_0 = LoResAO1.GatherRed(samplerLoResAO1, uv);
  80. float4 AO1_1 = LoResAO1.GatherGreen(samplerLoResAO1, uv);
  81. #ifdef COMBINE_LOWER_RESOLUTIONS
  82. AO1_0 = min(AO1_0, LoResAO2.GatherRed(samplerLoResAO2, uv));
  83. AO1_1 = min(AO1_1, LoResAO2.GatherGreen(samplerLoResAO2, uv));
  84. #endif
  85. AOCache1[index] = float2(AO1_0.w, AO1_1.w);
  86. AOCache1[index + 1] = float2(AO1_0.z, AO1_1.z);
  87. AOCache1[index + 16] = float2(AO1_0.x, AO1_1.x);
  88. AOCache1[index + 17] = float2(AO1_0.y, AO1_1.y);
  89. float4 ID_0 = 1.0 / LoResDB.GatherRed(samplerLoResDB, uv);
  90. float4 ID_1 = 1.0 / LoResDB.GatherGreen(samplerLoResDB, uv);
  91. DepthCache[index] = float2(ID_0.w, ID_1.w);
  92. DepthCache[index + 1] = float2(ID_0.z, ID_1.z);
  93. DepthCache[index + 16] = float2(ID_0.x, ID_1.x);
  94. DepthCache[index + 17] = float2(ID_0.y, ID_1.y);
  95. #else
  96. float4 AO1 = LoResAO1.Gather(samplerLoResAO1, uv);
  97. #ifdef COMBINE_LOWER_RESOLUTIONS
  98. AO1 = min(AO1, LoResAO2.Gather(samplerLoResAO2, uv));
  99. #endif
  100. AOCache1[index] = AO1.w;
  101. AOCache1[index + 1] = AO1.z;
  102. AOCache1[index + 16] = AO1.x;
  103. AOCache1[index + 17] = AO1.y;
  104. float4 ID = 1.0 / LoResDB.Gather(samplerLoResDB, uv);
  105. DepthCache[index] = ID.w;
  106. DepthCache[index + 1] = ID.z;
  107. DepthCache[index + 16] = ID.x;
  108. DepthCache[index + 17] = ID.y;
  109. #endif
  110. }
  111. float SmartBlur(float a, float b, float c, float d, float e, bool Left, bool Middle, bool Right)
  112. {
  113. b = Left | Middle ? b : c;
  114. a = Left ? a : b;
  115. d = Right | Middle ? d : c;
  116. e = Right ? e : d;
  117. return ((a + e) / 2.0 + b + c + d) / 4.0;
  118. }
  119. bool CompareDeltas(float d1, float d2, float l1, float l2)
  120. {
  121. float temp = d1 * d2 + StepSize;
  122. return temp * temp > l1 * l2 * kBlurTolerance;
  123. }
  124. void BlurHorizontally(uint leftMostIndex)
  125. {
  126. #ifdef MSAA
  127. float2 a0 = AOCache1[leftMostIndex];
  128. float2 a1 = AOCache1[leftMostIndex + 1];
  129. float2 a2 = AOCache1[leftMostIndex + 2];
  130. float2 a3 = AOCache1[leftMostIndex + 3];
  131. float2 a4 = AOCache1[leftMostIndex + 4];
  132. float2 a5 = AOCache1[leftMostIndex + 5];
  133. float2 a6 = AOCache1[leftMostIndex + 6];
  134. float2 d0 = DepthCache[leftMostIndex];
  135. float2 d1 = DepthCache[leftMostIndex + 1];
  136. float2 d2 = DepthCache[leftMostIndex + 2];
  137. float2 d3 = DepthCache[leftMostIndex + 3];
  138. float2 d4 = DepthCache[leftMostIndex + 4];
  139. float2 d5 = DepthCache[leftMostIndex + 5];
  140. float2 d6 = DepthCache[leftMostIndex + 6];
  141. float2 d01 = d1 - d0;
  142. float2 d12 = d2 - d1;
  143. float2 d23 = d3 - d2;
  144. float2 d34 = d4 - d3;
  145. float2 d45 = d5 - d4;
  146. float2 d56 = d6 - d5;
  147. float2 l01 = d01 * d01 + StepSize;
  148. float2 l12 = d12 * d12 + StepSize;
  149. float2 l23 = d23 * d23 + StepSize;
  150. float2 l34 = d34 * d34 + StepSize;
  151. float2 l45 = d45 * d45 + StepSize;
  152. float2 l56 = d56 * d56 + StepSize;
  153. bool c02_0 = CompareDeltas(d01.x, d12.x, l01.x, l12.x);
  154. bool c13_0 = CompareDeltas(d12.x, d23.x, l12.x, l23.x);
  155. bool c24_0 = CompareDeltas(d23.x, d34.x, l23.x, l34.x);
  156. bool c35_0 = CompareDeltas(d34.x, d45.x, l34.x, l45.x);
  157. bool c46_0 = CompareDeltas(d45.x, d56.x, l45.x, l56.x);
  158. bool c02_1 = CompareDeltas(d01.y, d12.y, l01.y, l12.y);
  159. bool c13_1 = CompareDeltas(d12.y, d23.y, l12.y, l23.y);
  160. bool c24_1 = CompareDeltas(d23.y, d34.y, l23.y, l34.y);
  161. bool c35_1 = CompareDeltas(d34.y, d45.y, l34.y, l45.y);
  162. bool c46_1 = CompareDeltas(d45.y, d56.y, l45.y, l56.y);
  163. AOCache2[leftMostIndex] = float2(SmartBlur(a0.x.x, a1.x, a2.x, a3.x, a4.x, c02_0, c13_0, c24_0), SmartBlur(a0.y, a1.y, a2.y, a3.y, a4.y, c02_1, c13_1, c24_1));
  164. AOCache2[leftMostIndex + 1] = float2(SmartBlur(a1.x, a2.x, a3.x, a4.x, a5.x, c13_0, c24_0, c35_0), SmartBlur(a1.y, a2.y, a3.y, a4.y, a5.y, c13_1, c24_1, c35_1));
  165. AOCache2[leftMostIndex + 2] = float2(SmartBlur(a2.x, a3.x, a4.x, a5.x, a6.x, c24_0, c35_0, c46_0), SmartBlur(a2.y, a3.y, a4.y, a5.y, a6.y, c24_1, c35_1, c46_1));
  166. #else
  167. float a0 = AOCache1[leftMostIndex];
  168. float a1 = AOCache1[leftMostIndex + 1];
  169. float a2 = AOCache1[leftMostIndex + 2];
  170. float a3 = AOCache1[leftMostIndex + 3];
  171. float a4 = AOCache1[leftMostIndex + 4];
  172. float a5 = AOCache1[leftMostIndex + 5];
  173. float a6 = AOCache1[leftMostIndex + 6];
  174. float d0 = DepthCache[leftMostIndex];
  175. float d1 = DepthCache[leftMostIndex + 1];
  176. float d2 = DepthCache[leftMostIndex + 2];
  177. float d3 = DepthCache[leftMostIndex + 3];
  178. float d4 = DepthCache[leftMostIndex + 4];
  179. float d5 = DepthCache[leftMostIndex + 5];
  180. float d6 = DepthCache[leftMostIndex + 6];
  181. float d01 = d1 - d0;
  182. float d12 = d2 - d1;
  183. float d23 = d3 - d2;
  184. float d34 = d4 - d3;
  185. float d45 = d5 - d4;
  186. float d56 = d6 - d5;
  187. float l01 = d01 * d01 + StepSize;
  188. float l12 = d12 * d12 + StepSize;
  189. float l23 = d23 * d23 + StepSize;
  190. float l34 = d34 * d34 + StepSize;
  191. float l45 = d45 * d45 + StepSize;
  192. float l56 = d56 * d56 + StepSize;
  193. bool c02 = CompareDeltas(d01, d12, l01, l12);
  194. bool c13 = CompareDeltas(d12, d23, l12, l23);
  195. bool c24 = CompareDeltas(d23, d34, l23, l34);
  196. bool c35 = CompareDeltas(d34, d45, l34, l45);
  197. bool c46 = CompareDeltas(d45, d56, l45, l56);
  198. AOCache2[leftMostIndex] = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24);
  199. AOCache2[leftMostIndex + 1] = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35);
  200. AOCache2[leftMostIndex + 2] = SmartBlur(a2, a3, a4, a5, a6, c24, c35, c46);
  201. #endif
  202. }
  203. void BlurVertically(uint topMostIndex)
  204. {
  205. #ifdef MSAA
  206. float2 a0 = AOCache2[topMostIndex];
  207. float2 a1 = AOCache2[topMostIndex + 16];
  208. float2 a2 = AOCache2[topMostIndex + 32];
  209. float2 a3 = AOCache2[topMostIndex + 48];
  210. float2 a4 = AOCache2[topMostIndex + 64];
  211. float2 a5 = AOCache2[topMostIndex + 80];
  212. float2 d0 = DepthCache[topMostIndex + 2];
  213. float2 d1 = DepthCache[topMostIndex + 18];
  214. float2 d2 = DepthCache[topMostIndex + 34];
  215. float2 d3 = DepthCache[topMostIndex + 50];
  216. float2 d4 = DepthCache[topMostIndex + 66];
  217. float2 d5 = DepthCache[topMostIndex + 82];
  218. float2 d01 = d1 - d0;
  219. float2 d12 = d2 - d1;
  220. float2 d23 = d3 - d2;
  221. float2 d34 = d4 - d3;
  222. float2 d45 = d5 - d4;
  223. float2 l01 = d01 * d01 + StepSize;
  224. float2 l12 = d12 * d12 + StepSize;
  225. float2 l23 = d23 * d23 + StepSize;
  226. float2 l34 = d34 * d34 + StepSize;
  227. float2 l45 = d45 * d45 + StepSize;
  228. bool c02_0 = CompareDeltas(d01.x, d12.x, l01.x, l12.x);
  229. bool c13_0 = CompareDeltas(d12.x, d23.x, l12.x, l23.x);
  230. bool c24_0 = CompareDeltas(d23.x, d34.x, l23.x, l34.x);
  231. bool c35_0 = CompareDeltas(d34.x, d45.x, l34.x, l45.x);
  232. bool c02_1 = CompareDeltas(d01.y, d12.y, l01.y, l12.y);
  233. bool c13_1 = CompareDeltas(d12.y, d23.y, l12.y, l23.y);
  234. bool c24_1 = CompareDeltas(d23.y, d34.y, l23.y, l34.y);
  235. bool c35_1 = CompareDeltas(d34.y, d45.y, l34.y, l45.y);
  236. float2 aoResult1 = float2(SmartBlur(a0.x, a1.x, a2.x, a3.x, a4.x, c02_0, c13_0, c24_0), SmartBlur(a0.y, a1.y, a2.y, a3.y, a4.y, c02_1, c13_1, c24_1));
  237. float2 aoResult2 = float2(SmartBlur(a1.x, a2.x, a3.x, a4.x, a5.x, c13_0, c24_0, c35_0), SmartBlur(a1.y, a2.y, a3.y, a4.y, a5.y, c13_1, c24_1, c35_1));
  238. AOCache1[topMostIndex] = aoResult1;
  239. AOCache1[topMostIndex + 16] = aoResult2;
  240. #else
  241. float a0 = AOCache2[topMostIndex];
  242. float a1 = AOCache2[topMostIndex + 16];
  243. float a2 = AOCache2[topMostIndex + 32];
  244. float a3 = AOCache2[topMostIndex + 48];
  245. float a4 = AOCache2[topMostIndex + 64];
  246. float a5 = AOCache2[topMostIndex + 80];
  247. float d0 = DepthCache[topMostIndex + 2];
  248. float d1 = DepthCache[topMostIndex + 18];
  249. float d2 = DepthCache[topMostIndex + 34];
  250. float d3 = DepthCache[topMostIndex + 50];
  251. float d4 = DepthCache[topMostIndex + 66];
  252. float d5 = DepthCache[topMostIndex + 82];
  253. float d01 = d1 - d0;
  254. float d12 = d2 - d1;
  255. float d23 = d3 - d2;
  256. float d34 = d4 - d3;
  257. float d45 = d5 - d4;
  258. float l01 = d01 * d01 + StepSize;
  259. float l12 = d12 * d12 + StepSize;
  260. float l23 = d23 * d23 + StepSize;
  261. float l34 = d34 * d34 + StepSize;
  262. float l45 = d45 * d45 + StepSize;
  263. bool c02 = CompareDeltas(d01, d12, l01, l12);
  264. bool c13 = CompareDeltas(d12, d23, l12, l23);
  265. bool c24 = CompareDeltas(d23, d34, l23, l34);
  266. bool c35 = CompareDeltas(d34, d45, l34, l45);
  267. float aoResult1 = SmartBlur(a0, a1, a2, a3, a4, c02, c13, c24);
  268. float aoResult2 = SmartBlur(a1, a2, a3, a4, a5, c13, c24, c35);
  269. AOCache1[topMostIndex] = aoResult1;
  270. AOCache1[topMostIndex + 16] = aoResult2;
  271. #endif
  272. }
  273. // We essentially want 5 weights: 4 for each low-res pixel and 1 to blend in when none of the 4 really
  274. // match. The filter strength is 1 / DeltaZTolerance. So a tolerance of 0.01 would yield a strength of 100.
  275. // Note that a perfect match of low to high depths would yield a weight of 10^6, completely superceding any
  276. // noise filtering. The noise filter is intended to soften the effects of shimmering when the high-res depth
  277. // buffer has a lot of small holes in it causing the low-res depth buffer to inaccurately represent it.
  278. float BilateralUpsample(float HiDepth, float HiAO, float4 LowDepths, float4 LowAO)
  279. {
  280. float4 weights = float4(9, 3, 1, 3) / (abs(HiDepth - LowDepths) + kUpsampleTolerance);
  281. float TotalWeight = dot(weights, 1) + NoiseFilterStrength;
  282. float WeightedSum = dot(LowAO, weights) + NoiseFilterStrength;// * HiAO;
  283. return HiAO * WeightedSum / TotalWeight;
  284. }
  285. #ifdef DISABLE_COMPUTE_SHADERS
  286. TRIVIAL_COMPUTE_KERNEL(MAIN)
  287. #else
  288. [numthreads(8, 8, 1)]
  289. void MAIN(uint3 Gid : SV_GroupID, uint GI : SV_GroupIndex, uint3 GTid : SV_GroupThreadID, uint3 DTid : SV_DispatchThreadID)
  290. {
  291. //
  292. // Load 4 pixels per thread into LDS to fill the 16x16 LDS cache with depth and AO
  293. //
  294. PrefetchData(GTid.x << 1 | GTid.y << 5, int2(DTid.xy + GTid.xy - 2) * InvLowResolution.xy);
  295. GroupMemoryBarrierWithGroupSync();
  296. // Goal: End up with a 9x9 patch that is blurred so we can upsample. Blur radius is 2 pixels, so start with 13x13 area.
  297. //
  298. // Horizontally blur the pixels. 13x13 -> 9x13
  299. //
  300. if (GI < 39)
  301. BlurHorizontally((GI / 3) * 16 + (GI % 3) * 3);
  302. GroupMemoryBarrierWithGroupSync();
  303. //
  304. // Vertically blur the pixels. 9x13 -> 9x9
  305. //
  306. if (GI < 45)
  307. BlurVertically((GI / 9) * 32 + GI % 9);
  308. GroupMemoryBarrierWithGroupSync();
  309. //
  310. // Bilateral upsample
  311. //
  312. uint Idx0 = GTid.x + GTid.y * 16;
  313. #ifdef MSAA
  314. float4 LoSSAOs0 = float4(AOCache1[Idx0 + 16].x, AOCache1[Idx0 + 17].x, AOCache1[Idx0 + 1].x, AOCache1[Idx0].x);
  315. float4 LoSSAOs1 = float4(AOCache1[Idx0 + 16].y, AOCache1[Idx0 + 17].y, AOCache1[Idx0 + 1].y, AOCache1[Idx0].y);
  316. #else
  317. float4 LoSSAOs = float4(AOCache1[Idx0 + 16], AOCache1[Idx0 + 17], AOCache1[Idx0 + 1], AOCache1[Idx0]);
  318. #endif
  319. // We work on a quad of pixels at once because then we can gather 4 each of high and low-res depth values
  320. float2 UV0 = DTid.xy * InvLowResolution.xy;
  321. float2 UV1 = DTid.xy * 2 * InvHighResolution.xy;
  322. #ifdef MSAA
  323. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  324. float4 HiSSAOs0 = HiResAO.GatherRed(samplerHiResAO, UV1);
  325. float4 HiSSAOs1 = HiResAO.GatherGreen(samplerHiResAO, UV1);
  326. #else
  327. float4 HiSSAOs0 = 1.0;
  328. float4 HiSSAOs1 = 1.0;
  329. #endif
  330. float4 LoDepths0 = LoResDB.GatherRed(samplerLoResDB, UV0);
  331. float4 LoDepths1 = LoResDB.GatherGreen(samplerLoResDB, UV0);
  332. float4 HiDepths0 = HiResDB.GatherRed(samplerHiResDB, UV1);
  333. float4 HiDepths1 = HiResDB.GatherGreen(samplerHiResDB, UV1);
  334. int2 OutST = DTid.xy << 1;
  335. #ifdef INVERT
  336. AoResult[OutST + int2(-1, 0)] = float2(1.0 - BilateralUpsample(HiDepths0.x, HiSSAOs0.x, LoDepths0.xyzw, LoSSAOs0.xyzw), 1.0 - BilateralUpsample(HiDepths1.x, HiSSAOs1.x, LoDepths1.xyzw, LoSSAOs1.xyzw));
  337. AoResult[OutST + int2( 0, 0)] = float2(1.0 - BilateralUpsample(HiDepths0.y, HiSSAOs0.y, LoDepths0.yzwx, LoSSAOs0.yzwx), 1.0 - BilateralUpsample(HiDepths1.y, HiSSAOs1.y, LoDepths1.yzwx, LoSSAOs1.yzwx));
  338. AoResult[OutST + int2( 0, -1)] = float2(1.0 - BilateralUpsample(HiDepths0.z, HiSSAOs0.z, LoDepths0.zwxy, LoSSAOs0.zwxy), 1.0 - BilateralUpsample(HiDepths1.z, HiSSAOs1.z, LoDepths1.zwxy, LoSSAOs1.zwxy));
  339. AoResult[OutST + int2(-1, -1)] = float2(1.0 - BilateralUpsample(HiDepths0.w, HiSSAOs0.w, LoDepths0.wxyz, LoSSAOs0.wxyz), 1.0 - BilateralUpsample(HiDepths1.w, HiSSAOs1.w, LoDepths1.wxyz, LoSSAOs1.wxyz));
  340. #else
  341. AoResult[OutST + int2(-1, 0)] = float2(BilateralUpsample(HiDepths0.x, HiSSAOs0.x, LoDepths0.xyzw, LoSSAOs0.xyzw), BilateralUpsample(HiDepths1.x, HiSSAOs1.x, LoDepths1.xyzw, LoSSAOs1.xyzw));
  342. AoResult[OutST + int2( 0, 0)] = float2(BilateralUpsample(HiDepths0.y, HiSSAOs0.y, LoDepths0.yzwx, LoSSAOs0.yzwx), BilateralUpsample(HiDepths1.y, HiSSAOs1.y, LoDepths1.yzwx, LoSSAOs1.yzwx));
  343. AoResult[OutST + int2( 0, -1)] = float2(BilateralUpsample(HiDepths0.z, HiSSAOs0.z, LoDepths0.zwxy, LoSSAOs0.zwxy), BilateralUpsample(HiDepths1.z, HiSSAOs1.z, LoDepths1.zwxy, LoSSAOs1.zwxy));
  344. AoResult[OutST + int2(-1, -1)] = float2(BilateralUpsample(HiDepths0.w, HiSSAOs0.w, LoDepths0.wxyz, LoSSAOs0.wxyz),BilateralUpsample(HiDepths1.w, HiSSAOs1.w, LoDepths1.wxyz, LoSSAOs1.wxyz));
  345. #endif
  346. #else
  347. #ifdef BLEND_WITH_HIGHER_RESOLUTION
  348. float4 HiSSAOs = HiResAO.Gather(samplerHiResAO, UV1);
  349. #else
  350. float4 HiSSAOs = 1.0;
  351. #endif
  352. float4 LoDepths = LoResDB.Gather(samplerLoResDB, UV0);
  353. float4 HiDepths = HiResDB.Gather(samplerHiResDB, UV1);
  354. int2 OutST = DTid.xy << 1;
  355. #ifdef INVERT
  356. AoResult[OutST + int2(-1, 0)] = 1.0 - BilateralUpsample(HiDepths.x, HiSSAOs.x, LoDepths.xyzw, LoSSAOs.xyzw);
  357. AoResult[OutST + int2( 0, 0)] = 1.0 - BilateralUpsample(HiDepths.y, HiSSAOs.y, LoDepths.yzwx, LoSSAOs.yzwx);
  358. AoResult[OutST + int2( 0, -1)] = 1.0 - BilateralUpsample(HiDepths.z, HiSSAOs.z, LoDepths.zwxy, LoSSAOs.zwxy);
  359. AoResult[OutST + int2(-1, -1)] = 1.0 - BilateralUpsample(HiDepths.w, HiSSAOs.w, LoDepths.wxyz, LoSSAOs.wxyz);
  360. #else
  361. AoResult[OutST + int2(-1, 0)] = BilateralUpsample(HiDepths.x, HiSSAOs.x, LoDepths.xyzw, LoSSAOs.xyzw);
  362. AoResult[OutST + int2( 0, 0)] = BilateralUpsample(HiDepths.y, HiSSAOs.y, LoDepths.yzwx, LoSSAOs.yzwx);
  363. AoResult[OutST + int2( 0, -1)] = BilateralUpsample(HiDepths.z, HiSSAOs.z, LoDepths.zwxy, LoSSAOs.zwxy);
  364. AoResult[OutST + int2(-1, -1)] = BilateralUpsample(HiDepths.w, HiSSAOs.w, LoDepths.wxyz, LoSSAOs.wxyz);
  365. #endif
  366. #endif
  367. }
  368. #endif // DISABLE_COMPUTE_SHADERS