Optimize vertex shader using mat3x4 to reduce bandwidth, load/store operations and ALUs

This commit is contained in:
clayjohn 2025-06-23 23:06:11 -07:00
parent 9283328fe7
commit 14b60f2264
11 changed files with 211 additions and 138 deletions

View file

@ -789,19 +789,22 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
if (likely(inst->store_transform_cache)) {
RendererRD::MaterialStorage::store_transform(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform(inst->prev_transform, instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->prev_transform, instance_data.prev_transform);
#ifdef REAL_T_IS_DOUBLE
// Split the origin into two components, the float approximation and the missing precision.
// In the shader we will combine these back together to restore the lost precision.
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.transform[3]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.transform[7]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.transform[11]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.model_precision[2]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.x, &instance_data.prev_transform[12], &instance_data.prev_model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.y, &instance_data.prev_transform[13], &instance_data.prev_model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.z, &instance_data.prev_transform[14], &instance_data.prev_model_precision[2]);
#endif
} else {
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.prev_transform);
}
instance_data.flags = inst->flags_cache;

View file

@ -322,16 +322,20 @@ private:
};
struct InstanceData {
float transform[16];
float prev_transform[16];
float transform[12];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
uint32_t flags;
uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables
uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index)
uint32_t layer_mask;
float prev_transform[12];
float lightmap_uv_scale[4];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
#ifdef REAL_T_IS_DOUBLE
float model_precision[4];
float prev_model_precision[4];
#endif
// These setters allow us to copy the data over with operation when using floats.
inline void set_lightmap_uv_scale(const Rect2 &p_rect) {

View file

@ -1935,19 +1935,22 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
}
if (inst->store_transform_cache) {
RendererRD::MaterialStorage::store_transform(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform(inst->prev_transform, instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->prev_transform, instance_data.prev_transform);
#ifdef REAL_T_IS_DOUBLE
// Split the origin into two components, the float approximation and the missing precision.
// In the shader we will combine these back together to restore the lost precision.
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.transform[3]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.transform[7]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.transform[11]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.x, &instance_data.transform[12], &instance_data.model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.y, &instance_data.transform[13], &instance_data.model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->transform.origin.z, &instance_data.transform[14], &instance_data.model_precision[2]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.x, &instance_data.prev_transform[12], &instance_data.prev_model_precision[0]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.y, &instance_data.prev_transform[13], &instance_data.prev_model_precision[1]);
RendererRD::MaterialStorage::split_double(inst->prev_transform.origin.z, &instance_data.prev_transform[14], &instance_data.prev_model_precision[2]);
#endif
} else {
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform(Transform3D(), instance_data.prev_transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.transform);
RendererRD::MaterialStorage::store_transform_transposed_3x4(Transform3D(), instance_data.prev_transform);
}
instance_data.flags = inst->flags_cache;

View file

@ -209,20 +209,24 @@ private:
};
struct InstanceData {
float transform[16];
float prev_transform[16];
float transform[12];
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
uint32_t flags;
uint32_t instance_uniforms_ofs; // Base offset in global buffer for instance variables.
uint32_t gi_offset; // GI information when using lightmapping (VCT or lightmap index).
uint32_t layer_mask;
float prev_transform[12];
float lightmap_uv_scale[4]; // Doubles as uv_offset when needed.
uint32_t reflection_probes[2]; // Packed reflection probes.
uint32_t omni_lights[2]; // Packed omni lights.
uint32_t spot_lights[2]; // Packed spot lights.
uint32_t decals[2]; // Packed spot lights.
float compressed_aabb_position[4];
float compressed_aabb_size[4];
float uv_scale[4];
#ifdef REAL_T_IS_DOUBLE
float model_precision[4];
float prev_model_precision[4];
#endif
// These setters allow us to copy the data over with operation when using floats.
inline void set_lightmap_uv_scale(const Rect2 &p_rect) {

View file

@ -224,23 +224,28 @@ void vertex_shader(vec3 vertex_input,
in vec3 tangent_input,
in vec3 binormal_input,
#endif
in uint instance_index, in uint multimesh_offset, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) {
in uint instance_index, in uint multimesh_offset, in SceneData scene_data, in mat3x4 in_model_matrix,
#ifdef USE_DOUBLE_PRECISION
in vec3 model_precision,
#endif
out vec4 screen_pos) {
vec4 instance_custom = vec4(0.0);
#if defined(COLOR_USED)
color_interp = color_attrib;
#endif
mat4 inv_view_matrix = scene_data.inv_view_matrix;
mat4 inv_view_matrix = transpose(mat4(scene_data.inv_view_matrix[0],
scene_data.inv_view_matrix[1],
scene_data.inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
mat4 model_matrix = transpose(mat4(in_model_matrix[0],
in_model_matrix[1],
in_model_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
#ifdef USE_DOUBLE_PRECISION
vec3 model_precision = vec3(model_matrix[0][3], model_matrix[1][3], model_matrix[2][3]);
model_matrix[0][3] = 0.0;
model_matrix[1][3] = 0.0;
model_matrix[2][3] = 0.0;
vec3 view_precision = vec3(inv_view_matrix[0][3], inv_view_matrix[1][3], inv_view_matrix[2][3]);
inv_view_matrix[0][3] = 0.0;
inv_view_matrix[1][3] = 0.0;
inv_view_matrix[2][3] = 0.0;
vec3 view_precision = scene_data.inv_view_precision.xyz;
#endif
mat3 model_normal_matrix;
@ -404,8 +409,13 @@ void vertex_shader(vec3 vertex_input,
float roughness_highp = 1.0;
mat4 read_view_matrix = transpose(mat4(scene_data.view_matrix[0],
scene_data.view_matrix[1],
scene_data.view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
#ifdef USE_DOUBLE_PRECISION
mat4 modelview = scene_data.view_matrix * model_matrix;
mat4 modelview = read_view_matrix * model_matrix;
// We separate the basis from the origin because the basis is fine with single point precision.
// Then we combine the translations from the model matrix and the view matrix using emulated doubles.
@ -420,13 +430,12 @@ void vertex_shader(vec3 vertex_input,
// Overwrite the translation part of modelview with improved precision.
vec3 temp_precision; // Will be ignored.
modelview[3].xyz = double_add_vec3(model_origin, model_precision, scene_data.inv_view_matrix[3].xyz, view_precision, temp_precision);
modelview[3].xyz = mat3(scene_data.view_matrix) * modelview[3].xyz;
modelview[3].xyz = double_add_vec3(model_origin, model_precision, inv_view_matrix[3].xyz, view_precision, temp_precision);
modelview[3].xyz = mat3(read_view_matrix) * modelview[3].xyz;
#else
mat4 modelview = scene_data.view_matrix * model_matrix;
mat4 modelview = read_view_matrix * model_matrix;
#endif
mat3 modelview_normal = mat3(scene_data.view_matrix) * model_normal_matrix;
mat4 read_view_matrix = scene_data.view_matrix;
mat3 modelview_normal = mat3(read_view_matrix) * model_normal_matrix;
vec2 read_viewport_size = scene_data.viewport_size;
{
@ -457,14 +466,14 @@ void vertex_shader(vec3 vertex_input,
//using world coordinates
#if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED)
vertex = (scene_data.view_matrix * vec4(vertex, 1.0)).xyz;
vertex = (read_view_matrix * vec4(vertex, 1.0)).xyz;
#ifdef NORMAL_USED
normal = (scene_data.view_matrix * vec4(normal, 0.0)).xyz;
normal = (read_view_matrix * vec4(normal, 0.0)).xyz;
#endif
#ifdef TANGENT_USED
binormal = (scene_data.view_matrix * vec4(binormal, 0.0)).xyz;
tangent = (scene_data.view_matrix * vec4(tangent, 0.0)).xyz;
binormal = (read_view_matrix * vec4(binormal, 0.0)).xyz;
tangent = (read_view_matrix * vec4(tangent, 0.0)).xyz;
#endif
#endif
@ -736,8 +745,6 @@ void main() {
instance_index_interp = instance_index;
mat4 model_matrix = instances.data[instance_index].transform;
#ifdef MOTION_VECTORS
// Previous vertex.
vec3 prev_vertex;
@ -773,7 +780,11 @@ void main() {
prev_tangent,
prev_binormal,
#endif
instance_index, draw_call.multimesh_motion_vectors_previous_offset, scene_data_block.prev_data, instances.data[instance_index].prev_transform, prev_screen_position);
instance_index, draw_call.multimesh_motion_vectors_previous_offset, scene_data_block.prev_data, instances.data[instance_index].prev_transform,
#ifdef USE_DOUBLE_PRECISION
instances.data[instance_index].prev_model_precision.xyz,
#endif
prev_screen_position);
#else
// Unused output.
vec4 screen_position;
@ -812,7 +823,12 @@ void main() {
tangent,
binormal,
#endif
instance_index, draw_call.multimesh_motion_vectors_current_offset, scene_data_block.data, model_matrix, screen_position);
instance_index, draw_call.multimesh_motion_vectors_current_offset, scene_data_block.data, instances.data[instance_index].transform,
#ifdef USE_DOUBLE_PRECISION
instances.data[instance_index].model_precision.xyz,
#endif
screen_position);
}
#[fragment]
@ -1085,7 +1101,12 @@ vec4 fog_process(vec3 vertex) {
}
if (abs(scene_data_block.data.fog_height_density) >= 0.0001) {
float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y;
mat4 inv_view_matrix = transpose(mat4(scene_data_block.data.inv_view_matrix[0],
scene_data_block.data.inv_view_matrix[1],
scene_data_block.data.inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
float y = (inv_view_matrix * vec4(vertex, 1.0)).y;
float y_dist = y - scene_data_block.data.fog_height;
@ -1244,16 +1265,14 @@ void fragment_shader(in SceneData scene_data) {
vec2 alpha_texture_coordinate = vec2(0.0, 0.0);
#endif // ALPHA_ANTIALIASING_EDGE_USED
mat4 inv_view_matrix = scene_data.inv_view_matrix;
mat4 read_model_matrix = instances.data[instance_index].transform;
#ifdef USE_DOUBLE_PRECISION
read_model_matrix[0][3] = 0.0;
read_model_matrix[1][3] = 0.0;
read_model_matrix[2][3] = 0.0;
inv_view_matrix[0][3] = 0.0;
inv_view_matrix[1][3] = 0.0;
inv_view_matrix[2][3] = 0.0;
#endif
mat4 inv_view_matrix = transpose(mat4(scene_data.inv_view_matrix[0],
scene_data.inv_view_matrix[1],
scene_data.inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
mat4 read_model_matrix = transpose(mat4(instances.data[instance_index].transform[0],
instances.data[instance_index].transform[1],
instances.data[instance_index].transform[2],
vec4(0.0, 0.0, 0.0, 1.0)));
#ifdef LIGHT_VERTEX_USED
vec3 light_vertex = vertex;
@ -1266,7 +1285,10 @@ void fragment_shader(in SceneData scene_data) {
model_normal_matrix = mat3(read_model_matrix);
}
mat4 read_view_matrix = scene_data.view_matrix;
mat4 read_view_matrix = transpose(mat4(scene_data.view_matrix[0],
scene_data.view_matrix[1],
scene_data.view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
vec2 read_viewport_size = scene_data.viewport_size;
{
@ -1701,7 +1723,7 @@ void fragment_shader(in SceneData scene_data) {
uint index = instances.data[instance_index].gi_offset;
// The world normal.
vec3 wnormal = mat3(scene_data.inv_view_matrix) * indirect_normal;
vec3 wnormal = mat3(inv_view_matrix) * indirect_normal;
// The SH coefficients used for evaluating diffuse data from SH probes.
const float c[5] = float[](
@ -1771,9 +1793,9 @@ void fragment_shader(in SceneData scene_data) {
if (sc_use_forward_gi() && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_SDFGI)) { //has lightmap capture
//make vertex orientation the world one, but still align to camera
vec3 cam_pos = mat3(scene_data.inv_view_matrix) * vertex;
vec3 cam_normal = mat3(scene_data.inv_view_matrix) * indirect_normal;
vec3 cam_reflection = mat3(scene_data.inv_view_matrix) * reflect(-view, indirect_normal);
vec3 cam_pos = mat3(inv_view_matrix) * vertex;
vec3 cam_normal = mat3(inv_view_matrix) * indirect_normal;
vec3 cam_reflection = mat3(inv_view_matrix) * reflect(-view, indirect_normal);
//apply y-mult
cam_pos.y *= sdfgi.y_mult;
@ -1843,9 +1865,9 @@ void fragment_shader(in SceneData scene_data) {
if (sc_use_forward_gi() && bool(instances.data[instance_index].flags & INSTANCE_FLAGS_USE_VOXEL_GI)) { // process voxel_gi_instances
uint index1 = instances.data[instance_index].gi_offset & 0xFFFF;
// Make vertex orientation the world one, but still align to camera.
vec3 cam_pos = mat3(scene_data.inv_view_matrix) * vertex;
vec3 cam_normal = mat3(scene_data.inv_view_matrix) * indirect_normal;
vec3 ref_vec = mat3(scene_data.inv_view_matrix) * normalize(reflect(-view, indirect_normal));
vec3 cam_pos = mat3(inv_view_matrix) * vertex;
vec3 cam_normal = mat3(inv_view_matrix) * indirect_normal;
vec3 ref_vec = mat3(inv_view_matrix) * normalize(reflect(-view, indirect_normal));
//find arbitrary tangent and bitangent, then build a matrix
vec3 v0 = abs(cam_normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
@ -2676,7 +2698,7 @@ void fragment_shader(in SceneData scene_data) {
vec3(0, -1, 0),
vec3(0, 0, -1));
vec3 cam_normal = mat3(scene_data.inv_view_matrix) * normalize(normal_interp);
vec3 cam_normal = mat3(inv_view_matrix) * normalize(normal_interp);
float closest_dist = -1e20;

View file

@ -313,16 +313,20 @@ implementation_data_block;
#define implementation_data implementation_data_block.data
struct InstanceData {
mat4 transform;
mat4 prev_transform;
mat3x4 transform;
vec4 compressed_aabb_position_pad; // Only .xyz is used. .w is padding.
vec4 compressed_aabb_size_pad; // Only .xyz is used. .w is padding.
vec4 uv_scale;
uint flags;
uint instance_uniforms_ofs; //base offset in global buffer for instance variables
uint gi_offset; //GI information when using lightmapping (VCT or lightmap index)
uint layer_mask;
mat3x4 prev_transform;
vec4 lightmap_uv_scale;
vec4 compressed_aabb_position_pad; // Only .xyz is used. .w is padding.
vec4 compressed_aabb_size_pad; // Only .xyz is used. .w is padding.
vec4 uv_scale;
#ifdef USE_DOUBLE_PRECISION
vec4 model_precision;
vec4 prev_model_precision;
#endif
};
layout(set = 1, binding = 2, std430) buffer restrict readonly InstanceDataBuffer {

View file

@ -251,7 +251,11 @@ void vertex_shader(in vec3 vertex,
in vec3 tangent_highp,
in vec3 binormal_highp,
#endif
in uint instance_index, in uint multimesh_offset, in mat4 model_matrix,
in uint instance_index, in uint multimesh_offset, in mat3x4 in_model_matrix,
#ifdef USE_DOUBLE_PRECISION
in vec3 model_precision,
in vec3 view_precision,
#endif
#ifdef MODE_DUAL_PARABOLOID
in float dual_paraboloid_side,
in float z_far,
@ -264,8 +268,8 @@ void vertex_shader(in vec3 vertex,
#ifdef USE_MULTIVIEW
in vec4 scene_eye_offset,
#endif
in mat4 view_matrix,
in mat4 inv_view_matrix,
in mat3x4 in_view_matrix,
in mat3x4 in_inv_view_matrix,
in vec2 viewport_size,
in uint scene_directional_light_count,
out vec4 screen_position_output) {
@ -274,16 +278,15 @@ void vertex_shader(in vec3 vertex,
vec4 color_highp = color_attrib;
#endif
#ifdef USE_DOUBLE_PRECISION
vec3 model_precision = vec3(model_matrix[0][3], model_matrix[1][3], model_matrix[2][3]);
model_matrix[0][3] = 0.0;
model_matrix[1][3] = 0.0;
model_matrix[2][3] = 0.0;
vec3 view_precision = vec3(inv_view_matrix[0][3], inv_view_matrix[1][3], inv_view_matrix[2][3]);
inv_view_matrix[0][3] = 0.0;
inv_view_matrix[1][3] = 0.0;
inv_view_matrix[2][3] = 0.0;
#endif
mat4 inv_view_matrix = transpose(mat4(in_inv_view_matrix[0],
in_inv_view_matrix[1],
in_inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
mat4 model_matrix = transpose(mat4(in_model_matrix[0],
in_model_matrix[1],
in_model_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
mat3 model_normal_matrix;
if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) {
@ -432,8 +435,13 @@ void vertex_shader(in vec3 vertex,
float roughness_highp = 1.0;
mat4 read_view_matrix = transpose(mat4(in_view_matrix[0],
in_view_matrix[1],
in_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
#ifdef USE_DOUBLE_PRECISION
mat4 modelview = view_matrix * model_matrix;
mat4 modelview = read_view_matrix * model_matrix;
// We separate the basis from the origin because the basis is fine with single point precision.
// Then we combine the translations from the model matrix and the view matrix using emulated doubles.
@ -449,13 +457,12 @@ void vertex_shader(in vec3 vertex,
// Overwrite the translation part of modelview with improved precision.
vec3 temp_precision; // Will be ignored.
modelview[3].xyz = double_add_vec3(model_origin, model_precision, inv_view_matrix[3].xyz, view_precision, temp_precision);
modelview[3].xyz = mat3(view_matrix) * modelview[3].xyz;
modelview[3].xyz = mat3(read_view_matrix) * modelview[3].xyz;
#else
mat4 modelview = view_matrix * model_matrix;
mat4 modelview = read_view_matrix * model_matrix;
#endif
mat3 modelview_normal = mat3(view_matrix) * model_normal_matrix;
mat4 read_view_matrix = view_matrix;
vec2 read_viewport_size = viewport_size;
mat3 modelview_normal = mat3(read_view_matrix) * model_normal_matrix;
vec2 read_viewport_size = scene_data.viewport_size;
{
#CODE : VERTEX
@ -486,14 +493,14 @@ void vertex_shader(in vec3 vertex,
//using world coordinates
#if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED)
vertex = (view_matrix * vec4(vertex, 1.0)).xyz;
vertex = (read_view_matrix * vec4(vertex, 1.0)).xyz;
#ifdef NORMAL_USED
normal_highp = (view_matrix * vec4(normal_highp, 0.0)).xyz;
normal_highp = (read_view_matrix * vec4(normal_highp, 0.0)).xyz;
#endif
#if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(BENT_NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED)
binormal_highp = (view_matrix * vec4(binormal_highp, 0.0)).xyz;
tangent_highp = (view_matrix * vec4(tangent_highp, 0.0)).xyz;
binormal_highp = (read_view_matrix * vec4(binormal_highp, 0.0)).xyz;
tangent_highp = (read_view_matrix * vec4(tangent_highp, 0.0)).xyz;
#endif
#endif
@ -694,6 +701,11 @@ void main() {
prev_binormal,
#endif
draw_call.instance_index, draw_call.multimesh_motion_vectors_previous_offset, instances.data[draw_call.instance_index].prev_transform,
#ifdef USE_DOUBLE_PRECISION
instances.data[draw_call.instance_index].prev_model_precision.xyz,
scene_data_block.prev_data.inv_view_precision,
#endif
#ifdef MODE_DUAL_PARABOLOID
scene_data_block.prev_data.dual_paraboloid_side,
scene_data_block.prev_data.z_far,
@ -751,6 +763,10 @@ void main() {
binormal,
#endif
draw_call.instance_index, draw_call.multimesh_motion_vectors_current_offset, instances.data[draw_call.instance_index].transform,
#ifdef USE_DOUBLE_PRECISION
instances.data[draw_call.instance_index].model_precision.xyz,
scene_data_block.data.inv_view_precision,
#endif
#ifdef MODE_DUAL_PARABOLOID
scene_data_block.data.dual_paraboloid_side,
scene_data_block.data.z_far,
@ -1034,7 +1050,12 @@ hvec4 fog_process(vec3 vertex) {
}
if (sc_use_fog_height_density()) {
float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y;
mat4 inv_view_matrix = transpose(mat4(scene_data_block.data.inv_view_matrix[0],
scene_data_block.data.inv_view_matrix[1],
scene_data_block.data.inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
float y = (inv_view_matrix * vec4(vertex, 1.0)).y;
float y_dist = y - scene_data_block.data.fog_height;
@ -1164,16 +1185,14 @@ void main() {
vec2 alpha_texture_coordinate = vec2(0.0, 0.0);
#endif // ALPHA_ANTIALIASING_EDGE_USED
mat4 inv_view_matrix = scene_data.inv_view_matrix;
mat4 read_model_matrix = instances.data[draw_call.instance_index].transform;
#ifdef USE_DOUBLE_PRECISION
read_model_matrix[0][3] = 0.0;
read_model_matrix[1][3] = 0.0;
read_model_matrix[2][3] = 0.0;
inv_view_matrix[0][3] = 0.0;
inv_view_matrix[1][3] = 0.0;
inv_view_matrix[2][3] = 0.0;
#endif
mat4 inv_view_matrix = transpose(mat4(scene_data.inv_view_matrix[0],
scene_data.inv_view_matrix[1],
scene_data.inv_view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
mat4 read_model_matrix = transpose(mat4(instances.data[draw_call.instance_index].transform[0],
instances.data[draw_call.instance_index].transform[1],
instances.data[draw_call.instance_index].transform[2],
vec4(0.0, 0.0, 0.0, 1.0)));
#ifdef LIGHT_VERTEX_USED
vec3 light_vertex = vertex;
@ -1186,7 +1205,10 @@ void main() {
model_normal_matrix = mat3(read_model_matrix);
}
mat4 read_view_matrix = scene_data.view_matrix;
mat4 read_view_matrix = transpose(mat4(scene_data.view_matrix[0],
scene_data.view_matrix[1],
scene_data.view_matrix[2],
vec4(0.0, 0.0, 0.0, 1.0)));
vec2 read_viewport_size = scene_data.viewport_size;
{
@ -1617,7 +1639,7 @@ void main() {
uint index = instances.data[draw_call.instance_index].gi_offset;
// The world normal.
hvec3 wnormal = hmat3(scene_data.inv_view_matrix) * indirect_normal;
hvec3 wnormal = hmat3(inv_view_matrix) * indirect_normal;
// The SH coefficients used for evaluating diffuse data from SH probes.
const half c[5] = half[](

View file

@ -303,22 +303,25 @@ layout(set = 1, binding = 0, std140) uniform SceneDataBlock {
scene_data_block;
struct InstanceData {
highp mat4 transform; // 64 - 64
highp mat4 prev_transform;
uint flags; // 04 - 68
uint instance_uniforms_ofs; // Base offset in global buffer for instance variables. // 04 - 72
uint gi_offset; // GI information when using lightmapping (VCT or lightmap index). // 04 - 76
uint layer_mask; // 04 - 80
vec4 lightmap_uv_scale; // 16 - 96 Doubles as uv_offset when needed.
highp mat3x4 transform;
vec4 compressed_aabb_position_pad; // Only .xyz is used. .w is padding.
vec4 compressed_aabb_size_pad; // Only .xyz is used. .w is padding.
vec4 uv_scale;
uint flags;
uint instance_uniforms_ofs; // Base offset in global buffer for instance variables.
uint gi_offset; // GI information when using lightmapping (VCT or lightmap index).
uint layer_mask;
highp mat3x4 prev_transform;
uvec2 reflection_probes; // 08 - 104
uvec2 omni_lights; // 08 - 112
uvec2 spot_lights; // 08 - 120
uvec2 decals; // 08 - 128
vec4 compressed_aabb_position_pad; // 16 - 144 // Only .xyz is used. .w is padding.
vec4 compressed_aabb_size_pad; // 16 - 160 // Only .xyz is used. .w is padding.
vec4 uv_scale; // 16 - 176
vec4 lightmap_uv_scale; // Doubles as uv_offset when needed.
uvec2 reflection_probes;
uvec2 omni_lights;
uvec2 spot_lights;
uvec2 decals;
#ifdef USE_DOUBLE_PRECISION
vec4 model_precision;
vec4 prev_model_precision;
#endif
};
layout(set = 1, binding = 1, std430) buffer restrict readonly InstanceDataBuffer {

View file

@ -15,8 +15,12 @@
struct SceneData {
mat4 projection_matrix;
mat4 inv_projection_matrix;
mat4 inv_view_matrix;
mat4 view_matrix;
mat3x4 inv_view_matrix;
mat3x4 view_matrix;
#ifdef USE_DOUBLE_PRECISION
vec4 inv_view_precision;
#endif
// only used for multiview
mat4 projection_matrix_view[MAX_VIEWS];

View file

@ -87,13 +87,13 @@ void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p
//store camera into ubo
RendererRD::MaterialStorage::store_camera(projection, ubo.projection_matrix);
RendererRD::MaterialStorage::store_camera(projection.inverse(), ubo.inv_projection_matrix);
RendererRD::MaterialStorage::store_transform(cam_transform, ubo.inv_view_matrix);
RendererRD::MaterialStorage::store_transform(cam_transform.affine_inverse(), ubo.view_matrix);
RendererRD::MaterialStorage::store_transform_transposed_3x4(cam_transform, ubo.inv_view_matrix);
RendererRD::MaterialStorage::store_transform_transposed_3x4(cam_transform.affine_inverse(), ubo.view_matrix);
#ifdef REAL_T_IS_DOUBLE
RendererRD::MaterialStorage::split_double(-cam_transform.origin.x, &ubo.inv_view_matrix[12], &ubo.inv_view_matrix[3]);
RendererRD::MaterialStorage::split_double(-cam_transform.origin.y, &ubo.inv_view_matrix[13], &ubo.inv_view_matrix[7]);
RendererRD::MaterialStorage::split_double(-cam_transform.origin.z, &ubo.inv_view_matrix[14], &ubo.inv_view_matrix[11]);
RendererRD::MaterialStorage::split_double(-cam_transform.origin.x, &ubo.inv_view_matrix[12], &ubo.inv_view_precision[0]);
RendererRD::MaterialStorage::split_double(-cam_transform.origin.y, &ubo.inv_view_matrix[13], &ubo.inv_view_precision[1]);
RendererRD::MaterialStorage::split_double(-cam_transform.origin.z, &ubo.inv_view_matrix[14], &ubo.inv_view_precision[2]);
#endif
for (uint32_t v = 0; v < view_count; v++) {
@ -266,13 +266,13 @@ void RenderSceneDataRD::update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p
//store camera into ubo
RendererRD::MaterialStorage::store_camera(prev_projection, prev_ubo.projection_matrix);
RendererRD::MaterialStorage::store_camera(prev_projection.inverse(), prev_ubo.inv_projection_matrix);
RendererRD::MaterialStorage::store_transform(prev_cam_transform, prev_ubo.inv_view_matrix);
RendererRD::MaterialStorage::store_transform(prev_cam_transform.affine_inverse(), prev_ubo.view_matrix);
RendererRD::MaterialStorage::store_transform_transposed_3x4(prev_cam_transform, prev_ubo.inv_view_matrix);
RendererRD::MaterialStorage::store_transform_transposed_3x4(prev_cam_transform.affine_inverse(), prev_ubo.view_matrix);
#ifdef REAL_T_IS_DOUBLE
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.x, &prev_ubo.inv_view_matrix[12], &prev_ubo.inv_view_matrix[3]);
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.y, &prev_ubo.inv_view_matrix[13], &prev_ubo.inv_view_matrix[7]);
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.z, &prev_ubo.inv_view_matrix[14], &prev_ubo.inv_view_matrix[11]);
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.x, &prev_ubo.inv_view_matrix[12], &prev_ubo.inv_view_precision[0]);
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.y, &prev_ubo.inv_view_matrix[13], &prev_ubo.inv_view_precision[1]);
RendererRD::MaterialStorage::split_double(-prev_cam_transform.origin.z, &prev_ubo.inv_view_matrix[14], &prev_ubo.inv_view_precision[2]);
#endif
for (uint32_t v = 0; v < view_count; v++) {

View file

@ -113,8 +113,12 @@ private:
struct UBO {
float projection_matrix[16];
float inv_projection_matrix[16];
float inv_view_matrix[16];
float view_matrix[16];
float inv_view_matrix[12];
float view_matrix[12];
#ifdef REAL_T_IS_DOUBLE
float inv_view_precision[4];
#endif
float projection_matrix_view[RendererSceneRender::MAX_RENDER_VIEWS][16];
float inv_projection_matrix_view[RendererSceneRender::MAX_RENDER_VIEWS][16];