ExposureHistogram.compute 2.8 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586
  1. // Put the following line to 0 or comment it to disable vignette weighting
  2. #define USE_VIGNETTE_WEIGHTING 1
  3. #pragma warning(disable : 3568)
  4. #pragma exclude_renderers gles gles3 d3d11_9x
  5. #include "../StdLib.hlsl"
  6. #include "../Colors.hlsl"
  7. #include "ExposureHistogram.hlsl"
  8. RWStructuredBuffer<uint> _HistogramBuffer;
  9. Texture2D<float4> _Source;
  10. SamplerState sampler_LinearClamp;
  11. CBUFFER_START(Params)
  12. float4 _ScaleOffsetRes; // x: scale, y: offset, z: width, w: height
  13. CBUFFER_END
  14. groupshared uint gs_histogram[HISTOGRAM_BINS];
  15. #pragma kernel KEyeHistogram
  16. #ifdef DISABLE_COMPUTE_SHADERS
  17. TRIVIAL_COMPUTE_KERNEL(KEyeHistogram)
  18. TRIVIAL_COMPUTE_KERNEL(KEyeHistogramClear)
  19. #else
  20. [numthreads(HISTOGRAM_THREAD_X, HISTOGRAM_THREAD_Y, 1)]
  21. void KEyeHistogram(uint2 dispatchThreadId : SV_DispatchThreadID, uint2 groupThreadId : SV_GroupThreadID)
  22. {
  23. // Pretty straightforward implementation of histogram gathering using atomic ops.
  24. // I tried a few methods (no atomic ops / heavy LDS leveraging) but this one turned out to be
  25. // the fastest on desktop (Nvidia - Kepler/Maxwell) and PS4. Still need to try it on GCN/desktop
  26. // but considering it runs very fast on PS4 we can expect it to run well (?).
  27. const uint localThreadId = groupThreadId.y * HISTOGRAM_THREAD_X + groupThreadId.x;
  28. // Clears the shared memory
  29. if (localThreadId < HISTOGRAM_BINS)
  30. gs_histogram[localThreadId] = 0u;
  31. float2 ipos = float2(dispatchThreadId) * 2.0;
  32. GroupMemoryBarrierWithGroupSync();
  33. // Gather local group histogram
  34. if (ipos.x < _ScaleOffsetRes.z && ipos.y < _ScaleOffsetRes.w)
  35. {
  36. uint weight = 1u;
  37. float2 sspos = ipos / _ScaleOffsetRes.zw;
  38. // Vignette weighting to put more focus on what's in the center of the screen
  39. #if USE_VIGNETTE_WEIGHTING
  40. {
  41. float2 d = abs(sspos - (0.5).xx);
  42. float vfactor = saturate(1.0 - dot(d, d));
  43. vfactor *= vfactor;
  44. weight = (uint)(64.0 * vfactor);
  45. }
  46. #endif
  47. float3 color = _Source.SampleLevel(sampler_LinearClamp, sspos, 0.0).xyz; // Bilinear downsample 2x
  48. float luminance = Luminance(color);
  49. float logLuminance = GetHistogramBinFromLuminance(luminance, _ScaleOffsetRes.xy);
  50. uint idx = (uint)(logLuminance * (HISTOGRAM_BINS - 1u));
  51. InterlockedAdd(gs_histogram[idx], weight);
  52. }
  53. GroupMemoryBarrierWithGroupSync();
  54. // Merge everything
  55. if (localThreadId < HISTOGRAM_BINS)
  56. InterlockedAdd(_HistogramBuffer[localThreadId], gs_histogram[localThreadId]);
  57. }
  58. #pragma kernel KEyeHistogramClear
  59. [numthreads(HISTOGRAM_THREAD_X, 1, 1)]
  60. void KEyeHistogramClear(uint dispatchThreadId : SV_DispatchThreadID)
  61. {
  62. if (dispatchThreadId < HISTOGRAM_BINS)
  63. _HistogramBuffer[dispatchThreadId] = 0u;
  64. }
  65. #endif // DISABLE_COMPUTE_SHADERS