Merge pull request #106493 from DarioSamo/mobile-scs-permutations

Reduce amount of permutations in mobile shader.
This commit is contained in:
Thaddeus Crews 2025-05-20 11:37:26 -05:00
commit a12e9d5c31
No known key found for this signature in database
GPG key ID: 8C6E5FEB5FC03CCC
5 changed files with 116 additions and 76 deletions

View file

@ -1050,7 +1050,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
{ {
base_specialization.use_directional_soft_shadows = p_render_data->directional_light_count > 0 ? p_render_data->directional_light_soft_shadows : false; base_specialization.use_directional_soft_shadows = p_render_data->directional_light_count > 0 ? p_render_data->directional_light_soft_shadows : false;
base_specialization.directional_lights = p_render_data->directional_light_count; base_specialization.directional_lights = SceneShaderForwardMobile::shader_count_for(p_render_data->directional_light_count);
base_specialization.directional_light_blend_splits = light_storage->get_directional_light_blend_splits(p_render_data->directional_light_count); base_specialization.directional_light_blend_splits = light_storage->get_directional_light_blend_splits(p_render_data->directional_light_count);
if (!is_environment(p_render_data->environment) || !environment_get_fog_enabled(p_render_data->environment)) { if (!is_environment(p_render_data->environment) || !environment_get_fog_enabled(p_render_data->environment)) {
@ -2214,10 +2214,10 @@ void RenderForwardMobile::_render_list_template(RenderingDevice::DrawListID p_dr
} else { } else {
pipeline_specialization.use_light_projector = inst->use_projector; pipeline_specialization.use_light_projector = inst->use_projector;
pipeline_specialization.use_light_soft_shadows = inst->use_soft_shadow; pipeline_specialization.use_light_soft_shadows = inst->use_soft_shadow;
pipeline_specialization.omni_lights = inst->omni_light_count; pipeline_specialization.omni_lights = SceneShaderForwardMobile::shader_count_for(inst->omni_light_count);
pipeline_specialization.spot_lights = inst->spot_light_count; pipeline_specialization.spot_lights = SceneShaderForwardMobile::shader_count_for(inst->spot_light_count);
pipeline_specialization.reflection_probes = inst->reflection_probe_count; pipeline_specialization.reflection_probes = SceneShaderForwardMobile::shader_count_for(inst->reflection_probe_count);
pipeline_specialization.decals = inst->decals_count; pipeline_specialization.decals = inst->decals_count > 0;
#ifdef DEBUG_ENABLED #ifdef DEBUG_ENABLED
if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_LIGHTING)) { if (unlikely(get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_LIGHTING)) {

View file

@ -249,7 +249,6 @@ void SceneShaderForwardMobile::ShaderData::_create_pipeline(PipelineKey p_pipeli
"SPEC PACKED #0:", p_pipeline_key.shader_specialization.packed_0, "SPEC PACKED #0:", p_pipeline_key.shader_specialization.packed_0,
"SPEC PACKED #1:", p_pipeline_key.shader_specialization.packed_1, "SPEC PACKED #1:", p_pipeline_key.shader_specialization.packed_1,
"SPEC PACKED #2:", p_pipeline_key.shader_specialization.packed_2, "SPEC PACKED #2:", p_pipeline_key.shader_specialization.packed_2,
"SPEC PACKED #3:", p_pipeline_key.shader_specialization.packed_3,
"RENDER PASS:", p_pipeline_key.render_pass, "RENDER PASS:", p_pipeline_key.render_pass,
"WIREFRAME:", p_pipeline_key.wireframe); "WIREFRAME:", p_pipeline_key.wireframe);
#endif #endif
@ -340,12 +339,7 @@ void SceneShaderForwardMobile::ShaderData::_create_pipeline(PipelineKey p_pipeli
specialization_constants.push_back(sc); specialization_constants.push_back(sc);
sc.constant_id = 2; sc.constant_id = 2;
sc.int_value = p_pipeline_key.shader_specialization.packed_2; sc.float_value = p_pipeline_key.shader_specialization.packed_2;
sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_INT;
specialization_constants.push_back(sc);
sc.constant_id = 3;
sc.float_value = p_pipeline_key.shader_specialization.packed_3;
sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT; sc.type = RD::PIPELINE_SPECIALIZATION_CONSTANT_TYPE_FLOAT;
specialization_constants.push_back(sc); specialization_constants.push_back(sc);

View file

@ -56,6 +56,22 @@ public:
SHADER_VERSION_MAX SHADER_VERSION_MAX
}; };
enum ShaderCount {
SHADER_COUNT_NONE,
SHADER_COUNT_SINGLE,
SHADER_COUNT_MULTIPLE
};
_FORCE_INLINE_ static ShaderCount shader_count_for(uint32_t p_count) {
if (p_count == 0) {
return SHADER_COUNT_NONE;
} else if (p_count == 1) {
return SHADER_COUNT_SINGLE;
} else {
return SHADER_COUNT_MULTIPLE;
}
}
struct ShaderSpecialization { struct ShaderSpecialization {
union { union {
uint32_t packed_0; uint32_t packed_0;
@ -91,25 +107,18 @@ public:
struct { struct {
uint32_t directional_soft_shadow_samples : 6; uint32_t directional_soft_shadow_samples : 6;
uint32_t directional_penumbra_shadow_samples : 6; uint32_t directional_penumbra_shadow_samples : 6;
uint32_t omni_lights : 4; uint32_t omni_lights : 2;
uint32_t spot_lights : 4; uint32_t spot_lights : 2;
uint32_t reflection_probes : 4; uint32_t reflection_probes : 2;
uint32_t directional_lights : 4; uint32_t directional_lights : 2;
uint32_t decals : 4; uint32_t decals : 1;
};
};
union {
uint32_t packed_2;
struct {
uint32_t directional_light_blend_splits : 8; uint32_t directional_light_blend_splits : 8;
uint32_t padding_1 : 24; uint32_t padding_1 : 3;
}; };
}; };
union { union {
float packed_3; float packed_2;
float luminance_multiplier; float luminance_multiplier;
}; };
}; };
@ -122,10 +131,6 @@ public:
uint32_t cull_mode : 2; uint32_t cull_mode : 2;
}; };
}; };
uint32_t padding_1;
uint32_t padding_2;
uint32_t padding_3;
}; };
struct ShaderData : public RendererRD::MaterialStorage::ShaderData { struct ShaderData : public RendererRD::MaterialStorage::ShaderData {
@ -172,8 +177,7 @@ public:
h = hash_murmur3_one_32(primitive_type, h); h = hash_murmur3_one_32(primitive_type, h);
h = hash_murmur3_one_32(shader_specialization.packed_0, h); h = hash_murmur3_one_32(shader_specialization.packed_0, h);
h = hash_murmur3_one_32(shader_specialization.packed_1, h); h = hash_murmur3_one_32(shader_specialization.packed_1, h);
h = hash_murmur3_one_32(shader_specialization.packed_2, h); h = hash_murmur3_one_float(shader_specialization.packed_2, h);
h = hash_murmur3_one_float(shader_specialization.packed_3, h);
h = hash_murmur3_one_32(version, h); h = hash_murmur3_one_32(version, h);
h = hash_murmur3_one_32(render_pass, h); h = hash_murmur3_one_32(render_pass, h);
h = hash_murmur3_one_32(wireframe, h); h = hash_murmur3_one_32(wireframe, h);

View file

@ -456,24 +456,35 @@ void main() {
diffuse_light_interp = vec4(0.0); diffuse_light_interp = vec4(0.0);
specular_light_interp = vec4(0.0); specular_light_interp = vec4(0.0);
uint omni_light_count = sc_omni_lights(8);
uvec2 omni_light_indices = instances.data[draw_call.instance_index].omni_lights; uvec2 omni_light_indices = instances.data[draw_call.instance_index].omni_lights;
for (uint i = 0; i < sc_omni_lights(); i++) { for (uint i = 0; i < omni_light_count; i++) {
uint light_index = (i > 3) ? ((omni_light_indices.y >> ((i - 4) * 8)) & 0xFF) : ((omni_light_indices.x >> (i * 8)) & 0xFF); uint light_index = (i > 3) ? ((omni_light_indices.y >> ((i - 4) * 8)) & 0xFF) : ((omni_light_indices.x >> (i * 8)) & 0xFF);
if (i > 0 && light_index == 0xFF) {
break;
}
light_process_omni_vertex(light_index, vertex, view, normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb); light_process_omni_vertex(light_index, vertex, view, normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb);
} }
uint spot_light_count = sc_spot_lights(8);
uvec2 spot_light_indices = instances.data[draw_call.instance_index].spot_lights; uvec2 spot_light_indices = instances.data[draw_call.instance_index].spot_lights;
for (uint i = 0; i < sc_spot_lights(); i++) { for (uint i = 0; i < spot_light_count; i++) {
uint light_index = (i > 3) ? ((spot_light_indices.y >> ((i - 4) * 8)) & 0xFF) : ((spot_light_indices.x >> (i * 8)) & 0xFF); uint light_index = (i > 3) ? ((spot_light_indices.y >> ((i - 4) * 8)) & 0xFF) : ((spot_light_indices.x >> (i * 8)) & 0xFF);
if (i > 0 && light_index == 0xFF) {
break;
}
light_process_spot_vertex(light_index, vertex, view, normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb); light_process_spot_vertex(light_index, vertex, view, normal_interp, roughness, diffuse_light_interp.rgb, specular_light_interp.rgb);
} }
if (sc_directional_lights() > 0) { uint directional_lights_count = sc_directional_lights(scene_data.directional_light_count);
if (directional_lights_count > 0) {
// We process the first directional light separately as it may have shadows. // We process the first directional light separately as it may have shadows.
vec3 directional_diffuse = vec3(0.0); vec3 directional_diffuse = vec3(0.0);
vec3 directional_specular = vec3(0.0); vec3 directional_specular = vec3(0.0);
for (uint i = 0; i < sc_directional_lights(); i++) { for (uint i = 0; i < directional_lights_count; i++) {
if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) { if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) {
continue; // Not masked, skip. continue; // Not masked, skip.
} }
@ -729,6 +740,8 @@ layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms
#GLOBALS #GLOBALS
#define scene_data scene_data_block.data
/* clang-format on */ /* clang-format on */
#ifdef MODE_RENDER_DEPTH #ifdef MODE_RENDER_DEPTH
@ -799,7 +812,8 @@ vec4 fog_process(vec3 vertex) {
float sun_total = 0.0; float sun_total = 0.0;
vec3 view = normalize(vertex); vec3 view = normalize(vertex);
for (uint i = 0; i < sc_directional_lights(); i++) { uint directional_lights_count = sc_directional_lights(scene_data.directional_light_count);
for (uint i = 0; i < directional_lights_count; i++) {
vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy;
float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0);
fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter;
@ -831,8 +845,6 @@ vec4 fog_process(vec3 vertex) {
#endif //!MODE_RENDER DEPTH #endif //!MODE_RENDER DEPTH
#define scene_data scene_data_block.data
void main() { void main() {
#ifdef UBERSHADER #ifdef UBERSHADER
bool front_facing = gl_FrontFacing; bool front_facing = gl_FrontFacing;
@ -1129,9 +1141,13 @@ void main() {
vec3 vertex_ddx = dFdx(vertex); vec3 vertex_ddx = dFdx(vertex);
vec3 vertex_ddy = dFdy(vertex); vec3 vertex_ddy = dFdy(vertex);
uint decal_count = sc_decals(8);
uvec2 decal_indices = instances.data[draw_call.instance_index].decals; uvec2 decal_indices = instances.data[draw_call.instance_index].decals;
for (uint i = 0; i < sc_decals(); i++) { for (uint i = 0; i < decal_count; i++) {
uint decal_index = (i > 3) ? ((decal_indices.y >> ((i - 4) * 8)) & 0xFF) : ((decal_indices.x >> (i * 8)) & 0xFF); uint decal_index = (i > 3) ? ((decal_indices.y >> ((i - 4) * 8)) & 0xFF) : ((decal_indices.x >> (i * 8)) & 0xFF);
if (decal_index == 0xFF) {
break;
}
vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz; vec3 uv_local = (decals.data[decal_index].xform * vec4(vertex, 1.0)).xyz;
if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) { if (any(lessThan(uv_local, vec3(0.0, -1.0, 0.0))) || any(greaterThan(uv_local, vec3(1.0)))) {
@ -1405,7 +1421,8 @@ void main() {
// skipping ssao, do we remove ssao totally? // skipping ssao, do we remove ssao totally?
if (sc_reflection_probes() > 0) { uint reflection_probe_count = sc_reflection_probes(8);
if (reflection_probe_count > 0) {
vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 reflection_accum = vec4(0.0, 0.0, 0.0, 0.0);
vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0); vec4 ambient_accum = vec4(0.0, 0.0, 0.0, 0.0);
@ -1423,8 +1440,11 @@ void main() {
ref_vec = mix(ref_vec, bent_normal, roughness * roughness * roughness * roughness); ref_vec = mix(ref_vec, bent_normal, roughness * roughness * roughness * roughness);
uvec2 reflection_indices = instances.data[draw_call.instance_index].reflection_probes; uvec2 reflection_indices = instances.data[draw_call.instance_index].reflection_probes;
for (uint i = 0; i < sc_reflection_probes(); i++) { for (uint i = 0; i < reflection_probe_count; i++) {
uint reflection_index = (i > 3) ? ((reflection_indices.y >> ((i - 4) * 8)) & 0xFF) : ((reflection_indices.x >> (i * 8)) & 0xFF); uint reflection_index = (i > 3) ? ((reflection_indices.y >> ((i - 4) * 8)) & 0xFF) : ((reflection_indices.x >> (i * 8)) & 0xFF);
if (reflection_index == 0xFF) {
break;
}
if (reflection_accum.a >= 1.0 && ambient_accum.a >= 1.0) { if (reflection_accum.a >= 1.0 && ambient_accum.a >= 1.0) {
break; break;
@ -1519,7 +1539,8 @@ void main() {
direct_specular_light += specular_light_interp.rgb * f0; direct_specular_light += specular_light_interp.rgb * f0;
#endif #endif
if (sc_directional_lights() > 0) { uint directional_lights_count = sc_directional_lights(scene_data.directional_light_count);
if (directional_lights_count > 0) {
#ifndef SHADOWS_DISABLED #ifndef SHADOWS_DISABLED
// Do shadow and lighting in two passes to reduce register pressure // Do shadow and lighting in two passes to reduce register pressure
uint shadow0 = 0; uint shadow0 = 0;
@ -1554,7 +1575,7 @@ void main() {
// Only process the first light's shadow for vertex lighting. // Only process the first light's shadow for vertex lighting.
for (uint i = 0; i < 1; i++) { for (uint i = 0; i < 1; i++) {
#else #else
for (uint i = 0; i < sc_directional_lights(); i++) { for (uint i = 0; i < directional_lights_count; i++) {
#endif #endif
if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) { if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) {
continue; //not masked continue; //not masked
@ -1696,7 +1717,8 @@ void main() {
#endif // SHADOWS_DISABLED #endif // SHADOWS_DISABLED
#ifndef USE_VERTEX_LIGHTING #ifndef USE_VERTEX_LIGHTING
for (uint i = 0; i < sc_directional_lights(); i++) { uint directional_lights_count = sc_directional_lights(scene_data.directional_light_count);
for (uint i = 0; i < directional_lights_count; i++) {
if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) { if (!bool(directional_lights.data[i].mask & instances.data[draw_call.instance_index].layer_mask)) {
continue; //not masked continue; //not masked
} }
@ -1767,9 +1789,14 @@ void main() {
} //directional light } //directional light
#ifndef USE_VERTEX_LIGHTING #ifndef USE_VERTEX_LIGHTING
uint omni_light_count = sc_omni_lights(8);
uvec2 omni_indices = instances.data[draw_call.instance_index].omni_lights; uvec2 omni_indices = instances.data[draw_call.instance_index].omni_lights;
for (uint i = 0; i < sc_omni_lights(); i++) { for (uint i = 0; i < omni_light_count; i++) {
uint light_index = (i > 3) ? ((omni_indices.y >> ((i - 4) * 8)) & 0xFF) : ((omni_indices.x >> (i * 8)) & 0xFF); uint light_index = (i > 3) ? ((omni_indices.y >> ((i - 4) * 8)) & 0xFF) : ((omni_indices.x >> (i * 8)) & 0xFF);
if (i > 0 && light_index == 0xFF) {
break;
}
light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0), light_process_omni(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0),
#ifdef LIGHT_BACKLIGHT_USED #ifdef LIGHT_BACKLIGHT_USED
backlight, backlight,
@ -1795,9 +1822,14 @@ void main() {
diffuse_light, direct_specular_light); diffuse_light, direct_specular_light);
} }
uint spot_light_count = sc_spot_lights(8);
uvec2 spot_indices = instances.data[draw_call.instance_index].spot_lights; uvec2 spot_indices = instances.data[draw_call.instance_index].spot_lights;
for (uint i = 0; i < sc_spot_lights(); i++) { for (uint i = 0; i < spot_light_count; i++) {
uint light_index = (i > 3) ? ((spot_indices.y >> ((i - 4) * 8)) & 0xFF) : ((spot_indices.x >> (i * 8)) & 0xFF); uint light_index = (i > 3) ? ((spot_indices.y >> ((i - 4) * 8)) & 0xFF) : ((spot_indices.x >> (i * 8)) & 0xFF);
if (i > 0 && light_index == 0xFF) {
break;
}
light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0), light_process_spot(light_index, vertex, view, normal, vertex_ddx, vertex_ddy, f0, orms, scene_data.taa_frame_count, albedo, alpha, screen_uv, vec3(1.0),
#ifdef LIGHT_BACKLIGHT_USED #ifdef LIGHT_BACKLIGHT_USED
backlight, backlight,

View file

@ -23,12 +23,8 @@ layout(push_constant, std430) uniform DrawCall {
#ifdef UBERSHADER #ifdef UBERSHADER
uint sc_packed_0; uint sc_packed_0;
uint sc_packed_1; uint sc_packed_1;
uint sc_packed_2; float sc_packed_2;
float sc_packed_3;
uint uc_packed_0; uint uc_packed_0;
uint uc_padding_1;
uint uc_padding_2;
uint uc_padding_3;
#endif #endif
} }
draw_call; draw_call;
@ -50,14 +46,10 @@ uint sc_packed_1() {
return draw_call.sc_packed_1; return draw_call.sc_packed_1;
} }
uint sc_packed_2() { float sc_packed_2() {
return draw_call.sc_packed_2; return draw_call.sc_packed_2;
} }
float sc_packed_3() {
return draw_call.sc_packed_3;
}
uint uc_cull_mode() { uint uc_cull_mode() {
return (draw_call.uc_packed_0 >> 0) & 3U; return (draw_call.uc_packed_0 >> 0) & 3U;
} }
@ -67,8 +59,7 @@ uint uc_cull_mode() {
// Pull the constants from the pipeline's specialization constants. // Pull the constants from the pipeline's specialization constants.
layout(constant_id = 0) const uint pso_sc_packed_0 = 0; layout(constant_id = 0) const uint pso_sc_packed_0 = 0;
layout(constant_id = 1) const uint pso_sc_packed_1 = 0; layout(constant_id = 1) const uint pso_sc_packed_1 = 0;
layout(constant_id = 2) const uint pso_sc_packed_2 = 0; layout(constant_id = 2) const float pso_sc_packed_2 = 2.0;
layout(constant_id = 3) const float pso_sc_packed_3 = 2.0;
uint sc_packed_0() { uint sc_packed_0() {
return pso_sc_packed_0; return pso_sc_packed_0;
@ -78,14 +69,10 @@ uint sc_packed_1() {
return pso_sc_packed_1; return pso_sc_packed_1;
} }
uint sc_packed_2() { float sc_packed_2() {
return pso_sc_packed_2; return pso_sc_packed_2;
} }
float sc_packed_3() {
return pso_sc_packed_3;
}
#endif #endif
bool sc_use_light_projector() { bool sc_use_light_projector() {
@ -176,32 +163,55 @@ uint sc_directional_penumbra_shadow_samples() {
return (sc_packed_1() >> 6) & 63U; return (sc_packed_1() >> 6) & 63U;
} }
uint sc_omni_lights() { #define SHADER_COUNT_NONE 0
return (sc_packed_1() >> 12) & 15U; #define SHADER_COUNT_SINGLE 1
#define SHADER_COUNT_MULTIPLE 2
uint option_to_count(uint option, uint bound) {
switch (option) {
case SHADER_COUNT_NONE:
return 0;
case SHADER_COUNT_SINGLE:
return 1;
case SHADER_COUNT_MULTIPLE:
return bound;
}
} }
uint sc_spot_lights() { uint sc_omni_lights(uint bound) {
return (sc_packed_1() >> 16) & 15U; uint option = (sc_packed_1() >> 12) & 3U;
return option_to_count(option, bound);
} }
uint sc_reflection_probes() { uint sc_spot_lights(uint bound) {
return (sc_packed_1() >> 20) & 15U; uint option = (sc_packed_1() >> 14) & 3U;
return option_to_count(option, bound);
} }
uint sc_directional_lights() { uint sc_reflection_probes(uint bound) {
return (sc_packed_1() >> 24) & 15U; uint option = (sc_packed_1() >> 16) & 3U;
return option_to_count(option, bound);
} }
uint sc_decals() { uint sc_directional_lights(uint bound) {
return (sc_packed_1() >> 28) & 15U; uint option = (sc_packed_1() >> 18) & 3U;
return option_to_count(option, bound);
}
uint sc_decals(uint bound) {
if (((sc_packed_1() >> 20) & 1U) != 0) {
return bound;
} else {
return 0;
}
} }
bool sc_directional_light_blend_split(uint i) { bool sc_directional_light_blend_split(uint i) {
return ((sc_packed_2() >> i) & 1U) != 0; return ((sc_packed_1() >> (21 + i)) & 1U) != 0;
} }
float sc_luminance_multiplier() { float sc_luminance_multiplier() {
return sc_packed_3(); return sc_packed_2();
} }
/* Set 0: Base Pass (never changes) */ /* Set 0: Base Pass (never changes) */