Optimize glow and tonemap gather step in the mobile renderer

Mobile devices are typically bandwidth bound which means we need to do as few texture samples as possible.

They typically use TBDR GPUs which means that all rendering takes place on special optimized tiles. As a side effect, reading back memory from tile to VRAM is really slow, especially on Mali devices.

This commit uses a technique where you do a small blur while downsampling, and then another small blur while upsampling to get really high quality glow. While this doesn't reduce the renderpass count very much, it does reduce the texture read bandwidth by almost 10 times. Overall glow was more texture-read bound than memory write, bound, so this was a huge win.

A side effect of this new technique is that we can gather the glow as we upsample instead of gathering the glow in the final tonemap pass. Doing so allows us to significantly reduce the cost of the tonemap pass as well.
This commit is contained in:
clayjohn 2025-09-01 14:43:37 -07:00
parent 084d5d407e
commit 2e59cb41f4
22 changed files with 1524 additions and 519 deletions

View file

@ -43,18 +43,103 @@ layout(location = 0) in vec2 uv_interp;
layout(set = 0, binding = 0) uniform sampler2D source_color;
#ifdef GLOW_USE_AUTO_EXPOSURE
layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure;
#ifdef MODE_GLOW_UPSAMPLE
// When upsampling this is original downsampled texture, not the blended upsampled texture.
layout(set = 1, binding = 0) uniform sampler2D blend_color;
layout(constant_id = 0) const bool use_debanding = false;
layout(constant_id = 1) const bool use_blend_color = false;
// From https://alex.vlachos.com/graphics/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf
// and https://www.shadertoy.com/view/MslGR8 (5th one starting from the bottom)
// NOTE: `frag_coord` is in pixels (i.e. not normalized UV).
// This dithering must be applied after encoding changes (linear/nonlinear) have been applied
// as the final step before quantization from floating point to integer values.
vec3 screen_space_dither(vec2 frag_coord, float bit_alignment_diviser) {
// Iestyn's RGB dither (7 asm instructions) from Portal 2 X360, slightly modified for VR.
// Removed the time component to avoid passing time into this shader.
vec3 dither = vec3(dot(vec2(171.0, 231.0), frag_coord));
dither.rgb = fract(dither.rgb / vec3(103.0, 71.0, 97.0));
// Subtract 0.5 to avoid slightly brightening the whole viewport.
// Use a dither strength of 100% rather than the 37.5% suggested by the original source.
return (dither.rgb - 0.5) / bit_alignment_diviser;
}
#endif
layout(location = 0) out vec4 frag_color;
#ifdef MODE_GLOW_DOWNSAMPLE
// https://www.shadertoy.com/view/mdsyDf
vec4 BloomDownKernel4(sampler2D Tex, vec2 uv0) {
vec2 RcpSrcTexRes = blur.source_pixel_size;
vec2 tc = (uv0 * 2.0 + 1.0) * RcpSrcTexRes;
float la = 1.0 / 4.0;
vec2 o = (0.5 + la) * RcpSrcTexRes;
vec4 c = vec4(0.0);
c += textureLod(Tex, tc + vec2(-1.0, -1.0) * o, 0.0) * 0.25;
c += textureLod(Tex, tc + vec2(1.0, -1.0) * o, 0.0) * 0.25;
c += textureLod(Tex, tc + vec2(-1.0, 1.0) * o, 0.0) * 0.25;
c += textureLod(Tex, tc + vec2(1.0, 1.0) * o, 0.0) * 0.25;
return c;
}
#endif
#ifdef MODE_GLOW_UPSAMPLE
// https://www.shadertoy.com/view/mdsyDf
vec4 BloomUpKernel4(sampler2D Tex, vec2 uv0) {
vec2 RcpSrcTexRes = blur.source_pixel_size;
vec2 uv = uv0 * 0.5 + 0.5;
vec2 uvI = floor(uv);
vec2 uvF = uv - uvI;
vec2 tc = uvI * RcpSrcTexRes.xy;
// optimal stop-band
float lw = 0.357386;
float la = 25.0 / 32.0; // 0.78125 ~ 0.779627;
float lb = 3.0 / 64.0; // 0.046875 ~ 0.0493871;
vec2 l = vec2(-1.5 + la, 0.5 + lb);
vec2 lx = uvF.x == 0.0 ? l.xy : -l.yx;
vec2 ly = uvF.y == 0.0 ? l.xy : -l.yx;
lx *= RcpSrcTexRes.xx;
ly *= RcpSrcTexRes.yy;
vec4 c00 = textureLod(Tex, tc + vec2(lx.x, ly.x), 0.0);
vec4 c10 = textureLod(Tex, tc + vec2(lx.y, ly.x), 0.0);
vec4 c01 = textureLod(Tex, tc + vec2(lx.x, ly.y), 0.0);
vec4 c11 = textureLod(Tex, tc + vec2(lx.y, ly.y), 0.0);
vec2 w = abs(uvF * 2.0 - lw);
vec4 cx0 = c00 * (1.0 - w.x) + (c10 * w.x);
vec4 cx1 = c01 * (1.0 - w.x) + (c11 * w.x);
vec4 cxy = cx0 * (1.0 - w.y) + (cx1 * w.y);
return cxy;
}
#endif // MODE_GLOW_UPSAMPLE
void main() {
// We do not apply our color scale for our mobile renderer here, we'll leave our colors at half brightness and apply scale in the tonemap raster.
#ifdef MODE_MIPMAP
vec2 pix_size = blur.pixel_size;
vec2 pix_size = blur.dest_pixel_size;
vec4 color = texture(source_color, uv_interp + vec2(-0.5, -0.5) * pix_size);
color += texture(source_color, uv_interp + vec2(0.5, -0.5) * pix_size);
color += texture(source_color, uv_interp + vec2(0.5, 0.5) * pix_size);
@ -68,19 +153,19 @@ void main() {
// For Gaussian Blur we use 13 taps in a single pass instead of 12 taps over 2 passes.
// This minimizes the number of times we change framebuffers which is very important for mobile.
// Source: http://www.iryoku.com/next-generation-post-processing-in-call-of-duty-advanced-warfare
vec4 A = texture(source_color, uv_interp + blur.pixel_size * vec2(-1.0, -1.0));
vec4 B = texture(source_color, uv_interp + blur.pixel_size * vec2(0.0, -1.0));
vec4 C = texture(source_color, uv_interp + blur.pixel_size * vec2(1.0, -1.0));
vec4 D = texture(source_color, uv_interp + blur.pixel_size * vec2(-0.5, -0.5));
vec4 E = texture(source_color, uv_interp + blur.pixel_size * vec2(0.5, -0.5));
vec4 F = texture(source_color, uv_interp + blur.pixel_size * vec2(-1.0, 0.0));
vec4 A = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(-1.0, -1.0));
vec4 B = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(0.0, -1.0));
vec4 C = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(1.0, -1.0));
vec4 D = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(-0.5, -0.5));
vec4 E = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(0.5, -0.5));
vec4 F = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(-1.0, 0.0));
vec4 G = texture(source_color, uv_interp);
vec4 H = texture(source_color, uv_interp + blur.pixel_size * vec2(1.0, 0.0));
vec4 I = texture(source_color, uv_interp + blur.pixel_size * vec2(-0.5, 0.5));
vec4 J = texture(source_color, uv_interp + blur.pixel_size * vec2(0.5, 0.5));
vec4 K = texture(source_color, uv_interp + blur.pixel_size * vec2(-1.0, 1.0));
vec4 L = texture(source_color, uv_interp + blur.pixel_size * vec2(0.0, 1.0));
vec4 M = texture(source_color, uv_interp + blur.pixel_size * vec2(1.0, 1.0));
vec4 H = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(1.0, 0.0));
vec4 I = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(-0.5, 0.5));
vec4 J = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(0.5, 0.5));
vec4 K = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(-1.0, 1.0));
vec4 L = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(0.0, 1.0));
vec4 M = texture(source_color, uv_interp + blur.dest_pixel_size * vec2(1.0, 1.0));
float base_weight = 0.5 / 4.0;
float lesser_weight = 0.125 / 4.0;
@ -92,67 +177,55 @@ void main() {
frag_color += (G + H + M + L) * lesser_weight;
#endif
#ifdef MODE_GAUSSIAN_GLOW
#ifdef MODE_GLOW_GATHER
// First step, go straight to quarter resolution.
// Don't apply blur, but include thresholding.
//Glow uses larger sigma 1 for a more rounded blur effect
vec2 block_pos = floor(gl_FragCoord.xy) * 4.0;
vec2 end = max(1.0 / blur.source_pixel_size - vec2(4.0), vec2(0.0));
block_pos = clamp(block_pos, vec2(0.0), end);
#define GLOW_ADD(m_ofs, m_mult) \
{ \
vec2 ofs = uv_interp + m_ofs * pix_size; \
vec4 c = texture(source_color, ofs) * m_mult; \
if (any(lessThan(ofs, vec2(0.0))) || any(greaterThan(ofs, vec2(1.0)))) { \
c *= 0.0; \
} \
color += c; \
// We skipped a level, so gather 16 closest samples now.
vec4 color = textureLod(source_color, (block_pos + vec2(1.0, 1.0)) * blur.source_pixel_size, 0.0);
color += textureLod(source_color, (block_pos + vec2(1.0, 3.0)) * blur.source_pixel_size, 0.0);
color += textureLod(source_color, (block_pos + vec2(3.0, 1.0)) * blur.source_pixel_size, 0.0);
color += textureLod(source_color, (block_pos + vec2(3.0, 3.0)) * blur.source_pixel_size, 0.0);
frag_color = color * 0.25;
// Apply strength a second time since it usually gets added at each level.
frag_color *= blur.glow_strength;
frag_color *= blur.glow_strength;
// In the first pass bring back to correct color range else we're applying the wrong threshold
// in subsequent passes we can use it as is as we'd just be undoing it right after.
frag_color *= blur.luminance_multiplier;
frag_color *= blur.glow_exposure;
float luminance = max(frag_color.r, max(frag_color.g, frag_color.b));
float feedback = max(smoothstep(blur.glow_hdr_threshold, blur.glow_hdr_threshold + blur.glow_hdr_scale, luminance), blur.glow_bloom);
frag_color = min(frag_color * feedback, vec4(blur.glow_luminance_cap)) / blur.luminance_multiplier;
#endif // MODE_GLOW_GATHER_WIDE
#ifdef MODE_GLOW_DOWNSAMPLE
// Regular downsample, apply a simple blur.
frag_color = BloomDownKernel4(source_color, floor(gl_FragCoord.xy));
frag_color *= blur.glow_strength;
#endif // MODE_GLOW_DOWNSAMPLE
#ifdef MODE_GLOW_UPSAMPLE
frag_color = BloomUpKernel4(source_color, floor(gl_FragCoord.xy)) * blur.glow_strength; // "glow_strength" here is actually the glow level. It is always 1.0, except for the first upsample where we need to apply the level to two textures at once.
if (use_blend_color) {
vec2 uv = floor(gl_FragCoord.xy) + 0.5;
frag_color += textureLod(blend_color, uv * blur.dest_pixel_size, 0.0) * blur.glow_level;
}
if (bool(blur.flags & FLAG_HORIZONTAL)) {
vec2 pix_size = blur.pixel_size;
pix_size *= 0.5; //reading from larger buffer, so use more samples
vec4 color = texture(source_color, uv_interp + vec2(0.0, 0.0) * pix_size) * 0.174938;
GLOW_ADD(vec2(1.0, 0.0), 0.165569);
GLOW_ADD(vec2(2.0, 0.0), 0.140367);
GLOW_ADD(vec2(3.0, 0.0), 0.106595);
GLOW_ADD(vec2(-1.0, 0.0), 0.165569);
GLOW_ADD(vec2(-2.0, 0.0), 0.140367);
GLOW_ADD(vec2(-3.0, 0.0), 0.106595);
// only do this in the horizontal pass, if we also do this in the vertical pass we're doubling up.
color *= blur.glow_strength;
frag_color = color;
} else {
vec2 pix_size = blur.pixel_size;
vec4 color = texture(source_color, uv_interp + vec2(0.0, 0.0) * pix_size) * 0.288713;
GLOW_ADD(vec2(0.0, 1.0), 0.233062);
GLOW_ADD(vec2(0.0, 2.0), 0.122581);
GLOW_ADD(vec2(0.0, -1.0), 0.233062);
GLOW_ADD(vec2(0.0, -2.0), 0.122581);
frag_color = color;
if (use_debanding) {
frag_color.rgb += screen_space_dither(gl_FragCoord.xy, 1023.0);
}
#undef GLOW_ADD
if (bool(blur.flags & FLAG_GLOW_FIRST_PASS)) {
// In the first pass bring back to correct color range else we're applying the wrong threshold
// in subsequent passes we can use it as is as we'd just be undoing it right after.
frag_color *= blur.luminance_multiplier;
#ifdef GLOW_USE_AUTO_EXPOSURE
frag_color /= texelFetch(source_auto_exposure, ivec2(0, 0), 0).r / blur.glow_auto_exposure_scale;
#endif
frag_color *= blur.glow_exposure;
float luminance = max(frag_color.r, max(frag_color.g, frag_color.b));
float feedback = max(smoothstep(blur.glow_hdr_threshold, blur.glow_hdr_threshold + blur.glow_hdr_scale, luminance), blur.glow_bloom);
frag_color = min(frag_color * feedback, vec4(blur.glow_luminance_cap)) / blur.luminance_multiplier;
}
#endif // MODE_GAUSSIAN_GLOW
#endif // MODE_GLOW_UPSAMPLE
#ifdef MODE_COPY
vec4 color = textureLod(source_color, uv_interp, 0.0);

View file

@ -3,24 +3,22 @@
#define FLAG_GLOW_FIRST_PASS (1 << 2)
layout(push_constant, std430) uniform Blur {
vec2 pixel_size; // 08 - 08
uint flags; // 04 - 12
uint pad; // 04 - 16
vec2 dest_pixel_size; // 08 - 08
vec2 source_pixel_size; // 08 - 16
vec2 pad; // 08 - 24
uint flags; // 04 - 28
float glow_level; // 04 - 32
// Glow.
float glow_strength; // 04 - 20
float glow_bloom; // 04 - 24
float glow_hdr_threshold; // 04 - 28
float glow_hdr_scale; // 04 - 32
float glow_strength; // 04 - 36
float glow_bloom; // 04 - 40
float glow_hdr_threshold; // 04 - 44
float glow_hdr_scale; // 04 - 48
float glow_exposure; // 04 - 36
float glow_white; // 04 - 40
float glow_luminance_cap; // 04 - 44
float glow_auto_exposure_scale; // 04 - 48
float luminance_multiplier; // 04 - 52
float res1; // 04 - 56
float res2; // 04 - 60
float res3; // 04 - 64
float glow_exposure; // 04 - 52
float glow_white; // 04 - 56
float glow_luminance_cap; // 04 - 60
float luminance_multiplier; // 04 - 64
}
blur;

View file

@ -67,7 +67,7 @@ layout(location = 0) out vec4 out_color;
layout(push_constant, std430) uniform Params {
vec2 inv_size;
uint flags;
uint use_debanding;
float pad;
}
params;
@ -140,11 +140,8 @@ void main() {
out_color.rgb = linear_to_srgb(out_color.rgb);
out_color.a = texture(color_tex, tex_coord).a;
}
if (bool(params.flags & FLAG_USE_8_BIT_DEBANDING)) {
if (bool(params.use_debanding)) {
// Divide by 255 to align to 8-bit quantization.
out_color.rgb += screen_space_dither(gl_FragCoord.xy, 255.0);
} else if (bool(params.flags & FLAG_USE_10_BIT_DEBANDING)) {
// Divide by 1023 to align to 10-bit quantization.
out_color.rgb += screen_space_dither(gl_FragCoord.xy, 1023.0);
}
}

View file

@ -38,21 +38,16 @@ void main() {
layout(location = 0) in vec2 uv_interp;
#ifdef SUBPASS
layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInput input_color;
#elif defined(USE_MULTIVIEW)
layout(set = 0, binding = 0) uniform sampler2DArray source_color;
#ifdef USE_MULTIVIEW
#define SAMPLER_FORMAT sampler2DArray
#else
layout(set = 0, binding = 0) uniform sampler2D source_color;
#define SAMPLER_FORMAT sampler2D
#endif
layout(set = 0, binding = 0) uniform SAMPLER_FORMAT source_color;
layout(set = 1, binding = 0) uniform sampler2D source_auto_exposure;
#ifdef USE_MULTIVIEW
layout(set = 2, binding = 0) uniform sampler2DArray source_glow;
#else
layout(set = 2, binding = 0) uniform sampler2D source_glow;
#endif
layout(set = 2, binding = 1) uniform sampler2D glow_map;
layout(set = 2, binding = 0) uniform SAMPLER_FORMAT source_glow;
layout(set = 2, binding = 1) uniform sampler2D glow_map; // TODO needs multiview support
#ifdef USE_1D_LUT
layout(set = 3, binding = 0) uniform sampler2D source_color_correction;
@ -66,8 +61,7 @@ layout(set = 3, binding = 0) uniform sampler3D source_color_correction;
#define FLAG_USE_COLOR_CORRECTION (1 << 3)
#define FLAG_USE_FXAA (1 << 4)
#define FLAG_USE_8_BIT_DEBANDING (1 << 5)
#define FLAG_USE_10_BIT_DEBANDING (1 << 6)
#define FLAG_CONVERT_TO_SRGB (1 << 7)
#define FLAG_CONVERT_TO_SRGB (1 << 6)
layout(push_constant, std430) uniform Params {
vec3 bcs;
@ -93,111 +87,6 @@ params;
layout(location = 0) out vec4 frag_color;
#ifdef USE_GLOW_FILTER_BICUBIC
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
float w0(float a) {
return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
float w1(float a) {
return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
float w2(float a) {
return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
float w3(float a) {
return (1.0f / 6.0f) * (a * a * a);
}
// g0 and g1 are the two amplitude functions
float g0(float a) {
return w0(a) + w1(a);
}
float g1(float a) {
return w2(a) + w3(a);
}
// h0 and h1 are the two offset functions
float h0(float a) {
return -1.0f + w1(a) / (w0(a) + w1(a));
}
float h1(float a) {
return 1.0f + w3(a) / (w2(a) + w3(a));
}
#ifdef USE_MULTIVIEW
vec4 texture2D_bicubic(sampler2DArray tex, vec2 uv, int p_lod) {
float lod = float(p_lod);
vec2 tex_size = vec2(params.glow_texture_size >> p_lod);
vec2 pixel_size = vec2(1.0f) / tex_size;
uv = uv * tex_size + vec2(0.5f);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec3 p0 = vec3((vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p1 = vec3((vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p2 = vec3((vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p3 = vec3((vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex);
return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) +
(g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod)));
}
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod)
#else // USE_MULTIVIEW
vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) {
float lod = float(p_lod);
vec2 tex_size = vec2(params.glow_texture_size >> p_lod);
vec2 pixel_size = vec2(1.0f) / tex_size;
uv = uv * tex_size + vec2(0.5f);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size;
return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) +
(g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod)));
}
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod)
#endif // !USE_MULTIVIEW
#else // USE_GLOW_FILTER_BICUBIC
#ifdef USE_MULTIVIEW
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, vec3(m_uv, ViewIndex), float(m_lod))
#else // USE_MULTIVIEW
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, m_uv, float(m_lod))
#endif // !USE_MULTIVIEW
#endif // !USE_GLOW_FILTER_BICUBIC
// Based on Reinhard's extended formula, see equation 4 in https://doi.org/cjbgrt
vec3 tonemap_reinhard(vec3 color, float white) {
float white_squared = white * white;
@ -360,11 +249,113 @@ vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR
}
}
#ifdef USE_GLOW_FILTER_BICUBIC
// w0, w1, w2, and w3 are the four cubic B-spline basis functions
float w0(float a) {
return (1.0f / 6.0f) * (a * (a * (-a + 3.0f) - 3.0f) + 1.0f);
}
float w1(float a) {
return (1.0f / 6.0f) * (a * a * (3.0f * a - 6.0f) + 4.0f);
}
float w2(float a) {
return (1.0f / 6.0f) * (a * (a * (-3.0f * a + 3.0f) + 3.0f) + 1.0f);
}
float w3(float a) {
return (1.0f / 6.0f) * (a * a * a);
}
// g0 and g1 are the two amplitude functions
float g0(float a) {
return w0(a) + w1(a);
}
float g1(float a) {
return w2(a) + w3(a);
}
// h0 and h1 are the two offset functions
float h0(float a) {
return -1.0f + w1(a) / (w0(a) + w1(a));
}
float h1(float a) {
return 1.0f + w3(a) / (w2(a) + w3(a));
}
#ifdef USE_MULTIVIEW
vec3 gather_glow(sampler2DArray tex, vec2 uv) { // sample all selected glow levels, view is added to uv later
#else
vec3 gather_glow(sampler2D tex, vec2 uv) { // sample all selected glow levels
#endif // defined(USE_MULTIVIEW)
vec4 texture2D_bicubic(sampler2DArray tex, vec2 uv, int p_lod) {
float lod = float(p_lod);
vec2 tex_size = vec2(params.glow_texture_size >> p_lod);
vec2 pixel_size = vec2(1.0f) / tex_size;
uv = uv * tex_size + vec2(0.5f);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec3 p0 = vec3((vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p1 = vec3((vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p2 = vec3((vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex);
vec3 p3 = vec3((vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size, ViewIndex);
return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) +
(g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod)));
}
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod)
#else // USE_MULTIVIEW
vec4 texture2D_bicubic(sampler2D tex, vec2 uv, int p_lod) {
float lod = float(p_lod);
vec2 tex_size = vec2(params.glow_texture_size >> p_lod);
vec2 pixel_size = vec2(1.0f) / tex_size;
uv = uv * tex_size + vec2(0.5f);
vec2 iuv = floor(uv);
vec2 fuv = fract(uv);
float g0x = g0(fuv.x);
float g1x = g1(fuv.x);
float h0x = h0(fuv.x);
float h1x = h1(fuv.x);
float h0y = h0(fuv.y);
float h1y = h1(fuv.y);
vec2 p0 = (vec2(iuv.x + h0x, iuv.y + h0y) - vec2(0.5f)) * pixel_size;
vec2 p1 = (vec2(iuv.x + h1x, iuv.y + h0y) - vec2(0.5f)) * pixel_size;
vec2 p2 = (vec2(iuv.x + h0x, iuv.y + h1y) - vec2(0.5f)) * pixel_size;
vec2 p3 = (vec2(iuv.x + h1x, iuv.y + h1y) - vec2(0.5f)) * pixel_size;
return (g0(fuv.y) * (g0x * textureLod(tex, p0, lod) + g1x * textureLod(tex, p1, lod))) +
(g1(fuv.y) * (g0x * textureLod(tex, p2, lod) + g1x * textureLod(tex, p3, lod)));
}
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) texture2D_bicubic(m_tex, m_uv, m_lod)
#endif // !USE_MULTIVIEW
#else // USE_GLOW_FILTER_BICUBIC
#ifdef USE_MULTIVIEW
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, vec3(m_uv, ViewIndex), float(m_lod))
#else // USE_MULTIVIEW
#define GLOW_TEXTURE_SAMPLE(m_tex, m_uv, m_lod) textureLod(m_tex, m_uv, float(m_lod))
#endif // !USE_MULTIVIEW
#endif // !USE_GLOW_FILTER_BICUBIC
vec3 gather_glow(SAMPLER_FORMAT tex, vec2 uv) { // sample all selected glow levels
vec3 glow = vec3(0.0f);
if (params.glow_levels[0] > 0.0001) {
@ -461,8 +452,6 @@ vec3 apply_color_correction(vec3 color) {
}
#endif
#ifndef SUBPASS
// FXAA 3.11 compact, Ported from https://github.com/kosua20/Rendu/blob/master/resources/common/shaders/screens/fxaa.frag
///////////////////////////////////////////////////////////////////////////////////
// MIT License
@ -831,7 +820,6 @@ vec3 do_fxaa(vec3 color, float exposure, vec2 uv_interp) {
#endif
}
#endif // !SUBPASS
// From https://alex.vlachos.com/graphics/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf
// and https://www.shadertoy.com/view/MslGR8 (5th one starting from the bottom)
@ -850,15 +838,7 @@ vec3 screen_space_dither(vec2 frag_coord, float bit_alignment_diviser) {
}
void main() {
#ifdef SUBPASS
// SUBPASS and USE_MULTIVIEW can be combined but in that case we're already reading from the correct layer
#ifdef USE_MULTIVIEW
// In order to ensure the `SpvCapabilityMultiView` is included in the SPIR-V capabilities, gl_ViewIndex must
// be read in the shader. Without this, transpilation to Metal fails to include the multi-view variant.
uint vi = ViewIndex;
#endif
vec4 color = subpassLoad(input_color);
#elif defined(USE_MULTIVIEW)
vec4 color = textureLod(source_color, vec3(uv_interp, ViewIndex), 0.0f);
#else
vec4 color = textureLod(source_color, uv_interp, 0.0f);
@ -869,17 +849,13 @@ void main() {
float exposure = params.exposure;
#ifndef SUBPASS
if (bool(params.flags & FLAG_USE_AUTO_EXPOSURE)) {
exposure *= 1.0 / (texelFetch(source_auto_exposure, ivec2(0, 0), 0).r * params.luminance_multiplier / params.auto_exposure_scale);
}
#endif
color.rgb *= exposure;
// Single-pass FXAA and pre-tonemap glow.
#ifndef SUBPASS
if (bool(params.flags & FLAG_USE_FXAA)) {
// FXAA must be performed before glow to preserve the "bleed" effect of glow.
color.rgb = do_fxaa(color.rgb, exposure, uv_interp);
@ -900,15 +876,13 @@ void main() {
color.rgb = apply_glow(color.rgb, glow, params.white);
}
}
#endif
// Tonemap to lower dynamic range.
color.rgb = apply_tonemapping(color.rgb, params.white);
// Additional effects.
// Post-tonemap glow.
#ifndef SUBPASS
if (bool(params.flags & FLAG_USE_GLOW) && params.glow_mode == GLOW_MODE_SOFTLIGHT) {
// Apply soft light after tonemapping to mitigate the issue of discontinuity
// at 1.0 and higher. This makes the issue only appear with HDR output that
@ -921,7 +895,8 @@ void main() {
glow = apply_tonemapping(glow, params.white);
color.rgb = apply_glow(color.rgb, glow, params.white);
}
#endif
// Additional effects.
if (bool(params.flags & FLAG_USE_BCS)) {
// Apply brightness:
@ -964,9 +939,6 @@ void main() {
if (bool(params.flags & FLAG_USE_8_BIT_DEBANDING)) {
// Divide by 255 to align to 8-bit quantization.
color.rgb += screen_space_dither(gl_FragCoord.xy, 255.0);
} else if (bool(params.flags & FLAG_USE_10_BIT_DEBANDING)) {
// Divide by 1023 to align to 10-bit quantization.
color.rgb += screen_space_dither(gl_FragCoord.xy, 1023.0);
}
frag_color = color;

View file

@ -0,0 +1,818 @@
#[vertex]
#version 450
#VERSION_DEFINES
layout(location = 0) out vec2 uv_interp;
void main() {
// old code, ARM driver bug on Mali-GXXx GPUs and Vulkan API 1.3.xxx
// https://github.com/godotengine/godot/pull/92817#issuecomment-2168625982
//vec2 base_arr[3] = vec2[](vec2(-1.0, -1.0), vec2(-1.0, 3.0), vec2(3.0, -1.0));
//gl_Position = vec4(base_arr[gl_VertexIndex], 0.0, 1.0);
//uv_interp = clamp(gl_Position.xy, vec2(0.0, 0.0), vec2(1.0, 1.0)) * 2.0; // saturate(x) * 2.0
vec2 vertex_base;
if (gl_VertexIndex == 0) {
vertex_base = vec2(-1.0, -1.0);
} else if (gl_VertexIndex == 1) {
vertex_base = vec2(-1.0, 3.0);
} else {
vertex_base = vec2(3.0, -1.0);
}
gl_Position = vec4(vertex_base, 0.0, 1.0);
uv_interp = clamp(vertex_base, vec2(0.0, 0.0), vec2(1.0, 1.0)) * 2.0; // saturate(x) * 2.0
}
#[fragment]
#version 450
#VERSION_DEFINES
#ifdef USE_MULTIVIEW
#extension GL_EXT_multiview : enable
#define ViewIndex gl_ViewIndex
#endif //USE_MULTIVIEW
layout(location = 0) in vec2 uv_interp;
#ifdef USE_MULTIVIEW
#define SAMPLER_FORMAT sampler2DArray
#else
#define SAMPLER_FORMAT sampler2D
#endif
#ifdef SUBPASS
layout(input_attachment_index = 0, set = 0, binding = 0) uniform subpassInput input_color;
#else
layout(set = 0, binding = 0) uniform SAMPLER_FORMAT source_color;
#endif
layout(set = 1, binding = 0) uniform SAMPLER_FORMAT source_glow;
layout(set = 1, binding = 1) uniform sampler2D glow_map;
#ifdef USE_1D_LUT
layout(set = 2, binding = 0) uniform sampler2D source_color_correction;
#else
layout(set = 2, binding = 0) uniform sampler3D source_color_correction;
#endif
layout(constant_id = 0) const bool use_bcs = false;
layout(constant_id = 1) const bool use_glow = false;
layout(constant_id = 2) const bool use_glow_map = false;
layout(constant_id = 3) const bool use_color_correction = false;
layout(constant_id = 4) const bool use_fxaa = false;
layout(constant_id = 5) const bool deband_8_bit = false;
layout(constant_id = 6) const bool deband_10_bit = false;
layout(constant_id = 7) const bool convert_to_srgb = false;
layout(constant_id = 8) const bool tonemapper_linear = false;
layout(constant_id = 9) const bool tonemapper_reinhard = false;
layout(constant_id = 10) const bool tonemapper_filmic = false;
layout(constant_id = 11) const bool tonemapper_aces = false;
layout(constant_id = 12) const bool tonemapper_agx = false;
layout(constant_id = 13) const bool glow_mode_add = false;
layout(constant_id = 14) const bool glow_mode_screen = false;
layout(constant_id = 15) const bool glow_mode_softlight = false;
layout(constant_id = 16) const bool glow_mode_replace = false;
layout(constant_id = 17) const bool glow_mode_mix = false;
layout(push_constant, std430) uniform Params {
vec3 bcs;
float luminance_multiplier;
vec2 src_pixel_size;
vec2 dest_pixel_size;
float glow_intensity;
float glow_map_strength;
float exposure;
float white;
}
params;
layout(location = 0) out vec4 frag_color;
// Based on Reinhard's extended formula, see equation 4 in https://doi.org/cjbgrt
vec3 tonemap_reinhard(vec3 color, float white) {
float white_squared = white * white;
vec3 white_squared_color = white_squared * color;
// Equivalent to color * (1 + color / white_squared) / (1 + color)
return (white_squared_color + color * color) / (white_squared_color + white_squared);
}
vec3 tonemap_filmic(vec3 color, float white) {
// exposure bias: input scale (color *= bias, white *= bias) to make the brightness consistent with other tonemappers
// also useful to scale the input to the range that the tonemapper is designed for (some require very high input values)
// has no effect on the curve's general shape or visual properties
const float exposure_bias = 2.0f;
const float A = 0.22f * exposure_bias * exposure_bias; // bias baked into constants for performance
const float B = 0.30f * exposure_bias;
const float C = 0.10f;
const float D = 0.20f;
const float E = 0.01f;
const float F = 0.30f;
vec3 color_tonemapped = ((color * (A * color + C * B) + D * E) / (color * (A * color + B) + D * F)) - E / F;
float white_tonemapped = ((white * (A * white + C * B) + D * E) / (white * (A * white + B) + D * F)) - E / F;
return color_tonemapped / white_tonemapped;
}
// Adapted from https://github.com/TheRealMJP/BakingLab/blob/master/BakingLab/ACES.hlsl
// (MIT License).
vec3 tonemap_aces(vec3 color, float white) {
const float exposure_bias = 1.8f;
const float A = 0.0245786f;
const float B = 0.000090537f;
const float C = 0.983729f;
const float D = 0.432951f;
const float E = 0.238081f;
// Exposure bias baked into transform to save shader instructions. Equivalent to `color *= exposure_bias`
const mat3 rgb_to_rrt = mat3(
vec3(0.59719f * exposure_bias, 0.35458f * exposure_bias, 0.04823f * exposure_bias),
vec3(0.07600f * exposure_bias, 0.90834f * exposure_bias, 0.01566f * exposure_bias),
vec3(0.02840f * exposure_bias, 0.13383f * exposure_bias, 0.83777f * exposure_bias));
const mat3 odt_to_rgb = mat3(
vec3(1.60475f, -0.53108f, -0.07367f),
vec3(-0.10208f, 1.10813f, -0.00605f),
vec3(-0.00327f, -0.07276f, 1.07602f));
color *= rgb_to_rrt;
vec3 color_tonemapped = (color * (color + A) - B) / (color * (C * color + D) + E);
color_tonemapped *= odt_to_rgb;
white *= exposure_bias;
float white_tonemapped = (white * (white + A) - B) / (white * (C * white + D) + E);
return color_tonemapped / white_tonemapped;
}
// Polynomial approximation of EaryChow's AgX sigmoid curve.
// x must be within the range [0.0, 1.0]
vec3 agx_contrast_approx(vec3 x) {
// Generated with Excel trendline
// Input data: Generated using python sigmoid with EaryChow's configuration and 57 steps
// Additional padding values were added to give correct intersections at 0.0 and 1.0
// 6th order, intercept of 0.0 to remove an operation and ensure intersection at 0.0
vec3 x2 = x * x;
vec3 x4 = x2 * x2;
return 0.021 * x + 4.0111 * x2 - 25.682 * x2 * x + 70.359 * x4 - 74.778 * x4 * x + 27.069 * x4 * x2;
}
// This is an approximation and simplification of EaryChow's AgX implementation that is used by Blender.
// This code is based off of the script that generates the AgX_Base_sRGB.cube LUT that Blender uses.
// Source: https://github.com/EaryChow/AgX_LUT_Gen/blob/main/AgXBasesRGB.py
vec3 tonemap_agx(vec3 color) {
// Combined linear sRGB to linear Rec 2020 and Blender AgX inset matrices:
const mat3 srgb_to_rec2020_agx_inset_matrix = mat3(
0.54490813676363087053, 0.14044005884001287035, 0.088827411851915368603,
0.37377945959812267119, 0.75410959864013760045, 0.17887712465043811023,
0.081384976686407536266, 0.10543358536857773485, 0.73224999956948382528);
// Combined inverse AgX outset matrix and linear Rec 2020 to linear sRGB matrices.
const mat3 agx_outset_rec2020_to_srgb_matrix = mat3(
1.9645509602733325934, -0.29932243390911083839, -0.16436833806080403409,
-0.85585845117807513559, 1.3264510741502356555, -0.23822464068860595117,
-0.10886710826831608324, -0.027084020983874825605, 1.402665347143271889);
// LOG2_MIN = -10.0
// LOG2_MAX = +6.5
// MIDDLE_GRAY = 0.18
const float min_ev = -12.4739311883324; // log2(pow(2, LOG2_MIN) * MIDDLE_GRAY)
const float max_ev = 4.02606881166759; // log2(pow(2, LOG2_MAX) * MIDDLE_GRAY)
// Large negative values in one channel and large positive values in other
// channels can result in a colour that appears darker and more saturated than
// desired after passing it through the inset matrix. For this reason, it is
// best to prevent negative input values.
// This is done before the Rec. 2020 transform to allow the Rec. 2020
// transform to be combined with the AgX inset matrix. This results in a loss
// of color information that could be correctly interpreted within the
// Rec. 2020 color space as positive RGB values, but it is less common for Godot
// to provide this function with negative sRGB values and therefore not worth
// the performance cost of an additional matrix multiplication.
// A value of 2e-10 intentionally introduces insignificant error to prevent
// log2(0.0) after the inset matrix is applied; color will be >= 1e-10 after
// the matrix transform.
color = max(color, 2e-10);
// Do AGX in rec2020 to match Blender and then apply inset matrix.
color = srgb_to_rec2020_agx_inset_matrix * color;
// Log2 space encoding.
// Must be clamped because agx_contrast_approx may not work
// well with values outside of the range [0.0, 1.0]
color = clamp(log2(color), min_ev, max_ev);
color = (color - min_ev) / (max_ev - min_ev);
// Apply sigmoid function approximation.
color = agx_contrast_approx(color);
// Convert back to linear before applying outset matrix.
color = pow(color, vec3(2.4));
// Apply outset to make the result more chroma-laden and then go back to linear sRGB.
color = agx_outset_rec2020_to_srgb_matrix * color;
// Blender's lusRGB.compensate_low_side is too complex for this shader, so
// simply return the color, even if it has negative components. These negative
// components may be useful for subsequent color adjustments.
return color;
}
vec3 linear_to_srgb(vec3 color) {
// Clamping is not strictly necessary for floating point nonlinear sRGB encoding,
// but many cases that call this function need the result clamped.
color = clamp(color, vec3(0.0), vec3(1.0));
const vec3 a = vec3(0.055f);
return mix((vec3(1.0f) + a) * pow(color.rgb, vec3(1.0f / 2.4f)) - a, 12.92f * color.rgb, lessThan(color.rgb, vec3(0.0031308f)));
}
vec3 srgb_to_linear(vec3 color) {
const vec3 a = vec3(0.055f);
return mix(pow((color.rgb + a) * (1.0f / (vec3(1.0f) + a)), vec3(2.4f)), color.rgb * (1.0f / 12.92f), lessThan(color.rgb, vec3(0.04045f)));
}
vec3 apply_tonemapping(vec3 color, float white) { // inputs are LINEAR
// Ensure color values passed to tonemappers are positive.
// They can be negative in the case of negative lights, which leads to undesired behavior.
if (tonemapper_linear) {
return color;
} else if (tonemapper_reinhard) {
return tonemap_reinhard(max(vec3(0.0f), color), white);
} else if (tonemapper_filmic) {
return tonemap_filmic(max(vec3(0.0f), color), white);
} else if (tonemapper_aces) {
return tonemap_aces(max(vec3(0.0f), color), white);
} else { // FLAG_TONEMAPPER_AGX
return tonemap_agx(color);
}
}
#ifdef USE_MULTIVIEW
vec3 gather_glow() {
vec2 uv = gl_FragCoord.xy * params.dest_pixel_size;
return textureLod(source_glow, vec3(uv, ViewIndex), 0.0).rgb;
}
#else
vec3 gather_glow() {
vec2 uv = gl_FragCoord.xy * params.dest_pixel_size;
return textureLod(source_glow, uv, 0.0).rgb;
}
#endif // !USE_MULTIVIEW
// Applies glow using the selected blending mode. Does not handle the mix blend mode.
vec3 apply_glow(vec3 color, vec3 glow, float white) {
if (glow_mode_add) {
return color + glow;
} else if (glow_mode_screen) {
// Glow cannot be above 1.0 after normalizing and should be non-negative
// to produce expected results. It is possible that glow can be negative
// if negative lights were used in the scene.
// We clamp to white because glow will be normalized to this range.
// Note: white cannot be smaller than the maximum output value.
glow.rgb = clamp(glow.rgb, 0.0, white);
// Normalize to white range.
//glow.rgb /= white;
//color.rgb /= white;
//color.rgb = (color.rgb + glow.rgb) - (color.rgb * glow.rgb);
// Expand back to original range.
//color.rgb *= white;
// The following is a mathematically simplified version of the above.
color.rgb = color.rgb + glow.rgb - (color.rgb * glow.rgb / white);
return color;
} else if (glow_mode_softlight) {
// Glow cannot be above 1.0 should be non-negative to produce
// expected results. It is possible that glow can be negative
// if negative lights were used in the scene.
// Note: This approach causes a discontinuity with scene values
// at 1.0, but because this glow should have its strongest influence
// anchored at 0.25 there is no way around this.
glow.rgb = clamp(glow.rgb, 0.0, 1.0);
color.r = color.r > 1.0 ? color.r : color.r + glow.r * ((color.r <= 0.25f ? ((16.0f * color.r - 12.0f) * color.r + 4.0f) * color.r : sqrt(color.r)) - color.r);
color.g = color.g > 1.0 ? color.g : color.g + glow.g * ((color.g <= 0.25f ? ((16.0f * color.g - 12.0f) * color.g + 4.0f) * color.g : sqrt(color.g)) - color.g);
color.b = color.b > 1.0 ? color.b : color.b + glow.b * ((color.b <= 0.25f ? ((16.0f * color.b - 12.0f) * color.b + 4.0f) * color.b : sqrt(color.b)) - color.b);
return color;
} else { //replace
return glow;
}
}
#ifdef USE_1D_LUT
vec3 apply_color_correction(vec3 color) {
color.r = texture(source_color_correction, vec2(color.r, 0.0f)).r;
color.g = texture(source_color_correction, vec2(color.g, 0.0f)).g;
color.b = texture(source_color_correction, vec2(color.b, 0.0f)).b;
return color;
}
#else
vec3 apply_color_correction(vec3 color) {
return textureLod(source_color_correction, color, 0.0).rgb;
}
#endif
#ifndef SUBPASS
// FXAA 3.11 compact, Ported from https://github.com/kosua20/Rendu/blob/master/resources/common/shaders/screens/fxaa.frag
///////////////////////////////////////////////////////////////////////////////////
// MIT License
//
// Copyright (c) 2017 Simon Rodriguez
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
///////////////////////////////////////////////////////////////////////////////////
// Nvidia Original FXAA 3.11 License
//----------------------------------------------------------------------------------
// File: es3-kepler\FXAA/FXAA3_11.h
// SDK Version: v3.00
// Email: gameworks@nvidia.com
// Site: http://developer.nvidia.com/
//
// Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//----------------------------------------------------------------------------------
//
// NVIDIA FXAA 3.11 by TIMOTHY LOTTES
//
//----------------------------------------------------------------------------------
float QUALITY(float q) {
return (q < 5 ? 1.0 : (q > 5 ? (q < 10 ? 2.0 : (q < 11 ? 4.0 : 8.0)) : 1.5));
}
float rgb2luma(vec3 rgb) {
return sqrt(dot(rgb, vec3(0.299, 0.587, 0.114)));
}
vec3 do_fxaa(vec3 color, float exposure, vec2 uv_interp) {
const float EDGE_THRESHOLD_MIN = 0.0312;
const float EDGE_THRESHOLD_MAX = 0.125;
const int ITERATIONS = 12;
const float SUBPIXEL_QUALITY = 0.75;
#ifdef USE_MULTIVIEW
float lumaUp = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(0, 1)).xyz * exposure * params.luminance_multiplier);
float lumaDown = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(0, -1)).xyz * exposure * params.luminance_multiplier);
float lumaLeft = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(-1, 0)).xyz * exposure * params.luminance_multiplier);
float lumaRight = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(1, 0)).xyz * exposure * params.luminance_multiplier);
float lumaCenter = rgb2luma(color);
float lumaMin = min(lumaCenter, min(min(lumaUp, lumaDown), min(lumaLeft, lumaRight)));
float lumaMax = max(lumaCenter, max(max(lumaUp, lumaDown), max(lumaLeft, lumaRight)));
float lumaRange = lumaMax - lumaMin;
if (lumaRange < max(EDGE_THRESHOLD_MIN, lumaMax * EDGE_THRESHOLD_MAX)) {
return color;
}
float lumaDownLeft = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(-1, -1)).xyz * exposure * params.luminance_multiplier);
float lumaUpRight = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(1, 1)).xyz * exposure * params.luminance_multiplier);
float lumaUpLeft = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(-1, 1)).xyz * exposure * params.luminance_multiplier);
float lumaDownRight = rgb2luma(textureLodOffset(source_color, vec3(uv_interp, ViewIndex), 0.0, ivec2(1, -1)).xyz * exposure * params.luminance_multiplier);
float lumaDownUp = lumaDown + lumaUp;
float lumaLeftRight = lumaLeft + lumaRight;
float lumaLeftCorners = lumaDownLeft + lumaUpLeft;
float lumaDownCorners = lumaDownLeft + lumaDownRight;
float lumaRightCorners = lumaDownRight + lumaUpRight;
float lumaUpCorners = lumaUpRight + lumaUpLeft;
float edgeHorizontal = abs(-2.0 * lumaLeft + lumaLeftCorners) + abs(-2.0 * lumaCenter + lumaDownUp) * 2.0 + abs(-2.0 * lumaRight + lumaRightCorners);
float edgeVertical = abs(-2.0 * lumaUp + lumaUpCorners) + abs(-2.0 * lumaCenter + lumaLeftRight) * 2.0 + abs(-2.0 * lumaDown + lumaDownCorners);
bool isHorizontal = (edgeHorizontal >= edgeVertical);
float stepLength = isHorizontal ? params.src_pixel_size.y : params.src_pixel_size.x;
float luma1 = isHorizontal ? lumaDown : lumaLeft;
float luma2 = isHorizontal ? lumaUp : lumaRight;
float gradient1 = luma1 - lumaCenter;
float gradient2 = luma2 - lumaCenter;
bool is1Steepest = abs(gradient1) >= abs(gradient2);
float gradientScaled = 0.25 * max(abs(gradient1), abs(gradient2));
float lumaLocalAverage = 0.0;
if (is1Steepest) {
stepLength = -stepLength;
lumaLocalAverage = 0.5 * (luma1 + lumaCenter);
} else {
lumaLocalAverage = 0.5 * (luma2 + lumaCenter);
}
vec2 currentUv = uv_interp;
if (isHorizontal) {
currentUv.y += stepLength * 0.5;
} else {
currentUv.x += stepLength * 0.5;
}
vec2 offset = isHorizontal ? vec2(params.src_pixel_size.x, 0.0) : vec2(0.0, params.src_pixel_size.y);
vec3 uv1 = vec3(currentUv - offset * QUALITY(0), ViewIndex);
vec3 uv2 = vec3(currentUv + offset * QUALITY(0), ViewIndex);
float lumaEnd1 = rgb2luma(textureLod(source_color, uv1, 0.0).xyz * exposure * params.luminance_multiplier);
float lumaEnd2 = rgb2luma(textureLod(source_color, uv2, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd1 -= lumaLocalAverage;
lumaEnd2 -= lumaLocalAverage;
bool reached1 = abs(lumaEnd1) >= gradientScaled;
bool reached2 = abs(lumaEnd2) >= gradientScaled;
bool reachedBoth = reached1 && reached2;
if (!reached1) {
uv1 -= vec3(offset * QUALITY(1), 0.0);
}
if (!reached2) {
uv2 += vec3(offset * QUALITY(1), 0.0);
}
if (!reachedBoth) {
for (int i = 2; i < ITERATIONS; i++) {
if (!reached1) {
lumaEnd1 = rgb2luma(textureLod(source_color, uv1, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd1 = lumaEnd1 - lumaLocalAverage;
}
if (!reached2) {
lumaEnd2 = rgb2luma(textureLod(source_color, uv2, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd2 = lumaEnd2 - lumaLocalAverage;
}
reached1 = abs(lumaEnd1) >= gradientScaled;
reached2 = abs(lumaEnd2) >= gradientScaled;
reachedBoth = reached1 && reached2;
if (!reached1) {
uv1 -= vec3(offset * QUALITY(i), 0.0);
}
if (!reached2) {
uv2 += vec3(offset * QUALITY(i), 0.0);
}
if (reachedBoth) {
break;
}
}
}
float distance1 = isHorizontal ? (uv_interp.x - uv1.x) : (uv_interp.y - uv1.y);
float distance2 = isHorizontal ? (uv2.x - uv_interp.x) : (uv2.y - uv_interp.y);
bool isDirection1 = distance1 < distance2;
float distanceFinal = min(distance1, distance2);
float edgeThickness = (distance1 + distance2);
bool isLumaCenterSmaller = lumaCenter < lumaLocalAverage;
bool correctVariation1 = (lumaEnd1 < 0.0) != isLumaCenterSmaller;
bool correctVariation2 = (lumaEnd2 < 0.0) != isLumaCenterSmaller;
bool correctVariation = isDirection1 ? correctVariation1 : correctVariation2;
float pixelOffset = -distanceFinal / edgeThickness + 0.5;
float finalOffset = correctVariation ? pixelOffset : 0.0;
float lumaAverage = (1.0 / 12.0) * (2.0 * (lumaDownUp + lumaLeftRight) + lumaLeftCorners + lumaRightCorners);
float subPixelOffset1 = clamp(abs(lumaAverage - lumaCenter) / lumaRange, 0.0, 1.0);
float subPixelOffset2 = (-2.0 * subPixelOffset1 + 3.0) * subPixelOffset1 * subPixelOffset1;
float subPixelOffsetFinal = subPixelOffset2 * subPixelOffset2 * SUBPIXEL_QUALITY;
finalOffset = max(finalOffset, subPixelOffsetFinal);
vec3 finalUv = vec3(uv_interp, ViewIndex);
if (isHorizontal) {
finalUv.y += finalOffset * stepLength;
} else {
finalUv.x += finalOffset * stepLength;
}
vec3 finalColor = textureLod(source_color, finalUv, 0.0).xyz * exposure * params.luminance_multiplier;
return finalColor;
#else
float lumaUp = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(0, 1)).xyz * exposure * params.luminance_multiplier);
float lumaDown = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(0, -1)).xyz * exposure * params.luminance_multiplier);
float lumaLeft = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(-1, 0)).xyz * exposure * params.luminance_multiplier);
float lumaRight = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(1, 0)).xyz * exposure * params.luminance_multiplier);
float lumaCenter = rgb2luma(color);
float lumaMin = min(lumaCenter, min(min(lumaUp, lumaDown), min(lumaLeft, lumaRight)));
float lumaMax = max(lumaCenter, max(max(lumaUp, lumaDown), max(lumaLeft, lumaRight)));
float lumaRange = lumaMax - lumaMin;
if (lumaRange < max(EDGE_THRESHOLD_MIN, lumaMax * EDGE_THRESHOLD_MAX)) {
return color;
}
float lumaDownLeft = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(-1, -1)).xyz * exposure * params.luminance_multiplier);
float lumaUpRight = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(1, 1)).xyz * exposure * params.luminance_multiplier);
float lumaUpLeft = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(-1, 1)).xyz * exposure * params.luminance_multiplier);
float lumaDownRight = rgb2luma(textureLodOffset(source_color, uv_interp, 0.0, ivec2(1, -1)).xyz * exposure * params.luminance_multiplier);
float lumaDownUp = lumaDown + lumaUp;
float lumaLeftRight = lumaLeft + lumaRight;
float lumaLeftCorners = lumaDownLeft + lumaUpLeft;
float lumaDownCorners = lumaDownLeft + lumaDownRight;
float lumaRightCorners = lumaDownRight + lumaUpRight;
float lumaUpCorners = lumaUpRight + lumaUpLeft;
float edgeHorizontal = abs(-2.0 * lumaLeft + lumaLeftCorners) + abs(-2.0 * lumaCenter + lumaDownUp) * 2.0 + abs(-2.0 * lumaRight + lumaRightCorners);
float edgeVertical = abs(-2.0 * lumaUp + lumaUpCorners) + abs(-2.0 * lumaCenter + lumaLeftRight) * 2.0 + abs(-2.0 * lumaDown + lumaDownCorners);
bool isHorizontal = (edgeHorizontal >= edgeVertical);
float stepLength = isHorizontal ? params.src_pixel_size.y : params.src_pixel_size.x;
float luma1 = isHorizontal ? lumaDown : lumaLeft;
float luma2 = isHorizontal ? lumaUp : lumaRight;
float gradient1 = luma1 - lumaCenter;
float gradient2 = luma2 - lumaCenter;
bool is1Steepest = abs(gradient1) >= abs(gradient2);
float gradientScaled = 0.25 * max(abs(gradient1), abs(gradient2));
float lumaLocalAverage = 0.0;
if (is1Steepest) {
stepLength = -stepLength;
lumaLocalAverage = 0.5 * (luma1 + lumaCenter);
} else {
lumaLocalAverage = 0.5 * (luma2 + lumaCenter);
}
vec2 currentUv = uv_interp;
if (isHorizontal) {
currentUv.y += stepLength * 0.5;
} else {
currentUv.x += stepLength * 0.5;
}
vec2 offset = isHorizontal ? vec2(params.src_pixel_size.x, 0.0) : vec2(0.0, params.src_pixel_size.y);
vec2 uv1 = currentUv - offset * QUALITY(0);
vec2 uv2 = currentUv + offset * QUALITY(0);
float lumaEnd1 = rgb2luma(textureLod(source_color, uv1, 0.0).xyz * exposure * params.luminance_multiplier);
float lumaEnd2 = rgb2luma(textureLod(source_color, uv2, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd1 -= lumaLocalAverage;
lumaEnd2 -= lumaLocalAverage;
bool reached1 = abs(lumaEnd1) >= gradientScaled;
bool reached2 = abs(lumaEnd2) >= gradientScaled;
bool reachedBoth = reached1 && reached2;
if (!reached1) {
uv1 -= offset * QUALITY(1);
}
if (!reached2) {
uv2 += offset * QUALITY(1);
}
if (!reachedBoth) {
for (int i = 2; i < ITERATIONS; i++) {
if (!reached1) {
lumaEnd1 = rgb2luma(textureLod(source_color, uv1, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd1 = lumaEnd1 - lumaLocalAverage;
}
if (!reached2) {
lumaEnd2 = rgb2luma(textureLod(source_color, uv2, 0.0).xyz * exposure * params.luminance_multiplier);
lumaEnd2 = lumaEnd2 - lumaLocalAverage;
}
reached1 = abs(lumaEnd1) >= gradientScaled;
reached2 = abs(lumaEnd2) >= gradientScaled;
reachedBoth = reached1 && reached2;
if (!reached1) {
uv1 -= offset * QUALITY(i);
}
if (!reached2) {
uv2 += offset * QUALITY(i);
}
if (reachedBoth) {
break;
}
}
}
float distance1 = isHorizontal ? (uv_interp.x - uv1.x) : (uv_interp.y - uv1.y);
float distance2 = isHorizontal ? (uv2.x - uv_interp.x) : (uv2.y - uv_interp.y);
bool isDirection1 = distance1 < distance2;
float distanceFinal = min(distance1, distance2);
float edgeThickness = (distance1 + distance2);
bool isLumaCenterSmaller = lumaCenter < lumaLocalAverage;
bool correctVariation1 = (lumaEnd1 < 0.0) != isLumaCenterSmaller;
bool correctVariation2 = (lumaEnd2 < 0.0) != isLumaCenterSmaller;
bool correctVariation = isDirection1 ? correctVariation1 : correctVariation2;
float pixelOffset = -distanceFinal / edgeThickness + 0.5;
float finalOffset = correctVariation ? pixelOffset : 0.0;
float lumaAverage = (1.0 / 12.0) * (2.0 * (lumaDownUp + lumaLeftRight) + lumaLeftCorners + lumaRightCorners);
float subPixelOffset1 = clamp(abs(lumaAverage - lumaCenter) / lumaRange, 0.0, 1.0);
float subPixelOffset2 = (-2.0 * subPixelOffset1 + 3.0) * subPixelOffset1 * subPixelOffset1;
float subPixelOffsetFinal = subPixelOffset2 * subPixelOffset2 * SUBPIXEL_QUALITY;
finalOffset = max(finalOffset, subPixelOffsetFinal);
vec2 finalUv = uv_interp;
if (isHorizontal) {
finalUv.y += finalOffset * stepLength;
} else {
finalUv.x += finalOffset * stepLength;
}
vec3 finalColor = textureLod(source_color, finalUv, 0.0).xyz * exposure * params.luminance_multiplier;
return finalColor;
#endif
}
#endif // !SUBPASS
// From https://alex.vlachos.com/graphics/Alex_Vlachos_Advanced_VR_Rendering_GDC2015.pdf
// and https://www.shadertoy.com/view/MslGR8 (5th one starting from the bottom)
// NOTE: `frag_coord` is in pixels (i.e. not normalized UV).
// This dithering must be applied after encoding changes (linear/nonlinear) have been applied
// as the final step before quantization from floating point to integer values.
vec3 screen_space_dither(vec2 frag_coord, float bit_alignment_diviser) {
// Iestyn's RGB dither (7 asm instructions) from Portal 2 X360, slightly modified for VR.
// Removed the time component to avoid passing time into this shader.
vec3 dither = vec3(dot(vec2(171.0, 231.0), frag_coord));
dither.rgb = fract(dither.rgb / vec3(103.0, 71.0, 97.0));
// Subtract 0.5 to avoid slightly brightening the whole viewport.
// Use a dither strength of 100% rather than the 37.5% suggested by the original source.
return (dither.rgb - 0.5) / bit_alignment_diviser;
}
void main() {
#ifdef SUBPASS
// SUBPASS and USE_MULTIVIEW can be combined but in that case we're already reading from the correct layer
#ifdef USE_MULTIVIEW
// In order to ensure the `SpvCapabilityMultiView` is included in the SPIR-V capabilities, gl_ViewIndex must
// be read in the shader. Without this, transpilation to Metal fails to include the multi-view variant.
uint vi = ViewIndex;
#endif
vec4 color = subpassLoad(input_color);
#elif defined(USE_MULTIVIEW)
vec4 color = textureLod(source_color, vec3(uv_interp, ViewIndex), 0.0f);
#else
vec4 color = textureLod(source_color, uv_interp, 0.0f);
#endif
color.rgb *= params.luminance_multiplier;
// Exposure
color.rgb *= params.exposure;
// Early Tonemap & SRGB Conversion
#ifndef SUBPASS
if (use_fxaa) {
// FXAA must be performed before glow to preserve the "bleed" effect of glow.
color.rgb = do_fxaa(color.rgb, params.exposure, uv_interp);
}
if (use_glow && !glow_mode_softlight) {
vec3 glow = gather_glow() * params.luminance_multiplier * params.glow_intensity;
if (use_glow_map) {
glow = mix(glow, texture(glow_map, uv_interp).rgb * glow, params.glow_map_strength);
}
if (glow_mode_mix) {
color.rgb = color.rgb * (1.0 - params.glow_intensity) + glow;
} else {
color.rgb = apply_glow(color.rgb, glow, params.white);
}
}
#endif
color.rgb = apply_tonemapping(color.rgb, params.white);
#ifndef SUBPASS
// Glow
if (use_glow && glow_mode_softlight) {
// Apply soft light after tonemapping to mitigate the issue of discontinuity
// at 1.0 and higher. This makes the issue only appear with HDR output that
// can exceed a 1.0 output value.
vec3 glow = gather_glow() * params.glow_intensity * params.luminance_multiplier;
if (use_glow_map) {
glow = mix(glow, texture(glow_map, uv_interp).rgb * glow, params.glow_map_strength);
}
glow = apply_tonemapping(glow, params.white);
color.rgb = apply_glow(color.rgb, glow, params.white);
}
#endif
// Additional effects
if (use_bcs) {
// Apply brightness:
// Apply to relative luminance. This ensures that the hue and saturation of
// colors is not affected by the adjustment, but requires the multiplication
// to be performed on linear-encoded values.
color.rgb = color.rgb * params.bcs.x;
color.rgb = linear_to_srgb(color.rgb);
// Apply contrast:
// By applying contrast to RGB values that are perceptually uniform (nonlinear),
// the darkest values are not hard-clipped as badly, which produces a
// higher quality contrast adjustment and maintains compatibility with
// existing projects.
color.rgb = mix(vec3(0.5), color.rgb, params.bcs.y);
// Apply saturation:
// By applying saturation adjustment to nonlinear sRGB-encoded values with
// even weights the preceived brightness of blues are affected, but this
// maintains compatibility with existing projects.
color.rgb = mix(vec3(dot(vec3(1.0), color.rgb) * (1.0 / 3.0)), color.rgb, params.bcs.z);
if (use_color_correction) {
color.rgb = clamp(color.rgb, vec3(0.0), vec3(1.0));
color.rgb = apply_color_correction(color.rgb);
// When using color correction and convert_to_srgb is false, there
// is no need to convert back to linear because the color correction
// texture sampling does this for us.
} else if (!convert_to_srgb) {
color.rgb = srgb_to_linear(color.rgb);
}
} else if (convert_to_srgb) {
color.rgb = linear_to_srgb(color.rgb); // Regular linear -> SRGB conversion.
}
// Debanding should be done at the end of tonemapping, but before writing to the LDR buffer.
// Otherwise, we're adding noise to an already-quantized image.
if (deband_8_bit) {
// Divide by 255 to align to 8-bit quantization.
color.rgb += screen_space_dither(gl_FragCoord.xy, 255.0);
} else if (deband_10_bit) {
// Divide by 1023 to align to 10-bit quantization.
color.rgb += screen_space_dither(gl_FragCoord.xy, 1023.0);
}
frag_color = color;
}