diff --git a/drivers/gles3/rasterizer_canvas_gles3.cpp b/drivers/gles3/rasterizer_canvas_gles3.cpp index bafd693a7aa..816b313116e 100644 --- a/drivers/gles3/rasterizer_canvas_gles3.cpp +++ b/drivers/gles3/rasterizer_canvas_gles3.cpp @@ -1468,9 +1468,9 @@ void RasterizerCanvasGLES3::_render_batch(Light *p_lights, uint32_t p_index, Ren uint64_t vertex_input_mask = state.canvas_instance_batches[p_index].vertex_input_mask; if (mesh_instance.is_valid()) { - mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(mesh_instance, j, vertex_input_mask, vertex_array_gl); + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(mesh_instance, j, vertex_input_mask, false, vertex_array_gl); } else { - mesh_storage->mesh_surface_get_vertex_arrays_and_format(surface, vertex_input_mask, vertex_array_gl); + mesh_storage->mesh_surface_get_vertex_arrays_and_format(surface, vertex_input_mask, false, vertex_array_gl); } index_array_gl = mesh_storage->mesh_surface_get_index_buffer(surface, 0); diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 31eacc991ca..f82614998ec 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1476,147 +1476,147 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da correction.set_depth_correction(p_flip_y, true, false); Projection projection = correction * p_render_data->cam_projection; //store camera into ubo - GLES3::MaterialStorage::store_camera(projection, scene_state.ubo.projection_matrix); - GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.ubo.inv_projection_matrix); - GLES3::MaterialStorage::store_transform(p_render_data->cam_transform, scene_state.ubo.inv_view_matrix); - GLES3::MaterialStorage::store_transform(p_render_data->inv_cam_transform, scene_state.ubo.view_matrix); - GLES3::MaterialStorage::store_transform(p_render_data->main_cam_transform, scene_state.ubo.main_cam_inv_view_matrix); - scene_state.ubo.camera_visible_layers = p_render_data->camera_visible_layers; + GLES3::MaterialStorage::store_camera(projection, scene_state.data.projection_matrix); + GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.data.inv_projection_matrix); + GLES3::MaterialStorage::store_transform(p_render_data->cam_transform, scene_state.data.inv_view_matrix); + GLES3::MaterialStorage::store_transform(p_render_data->inv_cam_transform, scene_state.data.view_matrix); + GLES3::MaterialStorage::store_transform(p_render_data->main_cam_transform, scene_state.data.main_cam_inv_view_matrix); + scene_state.data.camera_visible_layers = p_render_data->camera_visible_layers; if (p_render_data->view_count > 1) { for (uint32_t v = 0; v < p_render_data->view_count; v++) { projection = correction * p_render_data->view_projection[v]; - GLES3::MaterialStorage::store_camera(projection, scene_state.multiview_ubo.projection_matrix_view[v]); - GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.multiview_ubo.inv_projection_matrix_view[v]); + GLES3::MaterialStorage::store_camera(projection, scene_state.multiview_data.projection_matrix_view[v]); + GLES3::MaterialStorage::store_camera(projection.inverse(), scene_state.multiview_data.inv_projection_matrix_view[v]); - scene_state.multiview_ubo.eye_offset[v][0] = p_render_data->view_eye_offset[v].x; - scene_state.multiview_ubo.eye_offset[v][1] = p_render_data->view_eye_offset[v].y; - scene_state.multiview_ubo.eye_offset[v][2] = p_render_data->view_eye_offset[v].z; - scene_state.multiview_ubo.eye_offset[v][3] = 0.0; + scene_state.multiview_data.eye_offset[v][0] = p_render_data->view_eye_offset[v].x; + scene_state.multiview_data.eye_offset[v][1] = p_render_data->view_eye_offset[v].y; + scene_state.multiview_data.eye_offset[v][2] = p_render_data->view_eye_offset[v].z; + scene_state.multiview_data.eye_offset[v][3] = 0.0; } } // Only render the lights without shadows in the base pass. - scene_state.ubo.directional_light_count = p_render_data->directional_light_count - p_render_data->directional_shadow_count; + scene_state.data.directional_light_count = p_render_data->directional_light_count - p_render_data->directional_shadow_count; - scene_state.ubo.z_far = p_render_data->z_far; - scene_state.ubo.z_near = p_render_data->z_near; + scene_state.data.z_far = p_render_data->z_far; + scene_state.data.z_near = p_render_data->z_near; - scene_state.ubo.viewport_size[0] = p_screen_size.x; - scene_state.ubo.viewport_size[1] = p_screen_size.y; + scene_state.data.viewport_size[0] = p_screen_size.x; + scene_state.data.viewport_size[1] = p_screen_size.y; Size2 screen_pixel_size = Vector2(1.0, 1.0) / Size2(p_screen_size); - scene_state.ubo.screen_pixel_size[0] = screen_pixel_size.x; - scene_state.ubo.screen_pixel_size[1] = screen_pixel_size.y; + scene_state.data.screen_pixel_size[0] = screen_pixel_size.x; + scene_state.data.screen_pixel_size[1] = screen_pixel_size.y; - scene_state.ubo.luminance_multiplier = p_render_data->luminance_multiplier; + scene_state.data.luminance_multiplier = p_render_data->luminance_multiplier; - scene_state.ubo.shadow_bias = p_shadow_bias; - scene_state.ubo.pancake_shadows = p_pancake_shadows; + scene_state.data.shadow_bias = p_shadow_bias; + scene_state.data.pancake_shadows = p_pancake_shadows; //time global variables - scene_state.ubo.time = time; + scene_state.data.time = time; if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { - scene_state.ubo.use_ambient_light = true; - scene_state.ubo.ambient_light_color_energy[0] = 1; - scene_state.ubo.ambient_light_color_energy[1] = 1; - scene_state.ubo.ambient_light_color_energy[2] = 1; - scene_state.ubo.ambient_light_color_energy[3] = 1.0; - scene_state.ubo.use_ambient_cubemap = false; - scene_state.ubo.use_reflection_cubemap = false; + scene_state.data.use_ambient_light = true; + scene_state.data.ambient_light_color_energy[0] = 1; + scene_state.data.ambient_light_color_energy[1] = 1; + scene_state.data.ambient_light_color_energy[2] = 1; + scene_state.data.ambient_light_color_energy[3] = 1.0; + scene_state.data.use_ambient_cubemap = false; + scene_state.data.use_reflection_cubemap = false; } else if (is_environment(p_render_data->environment)) { RS::EnvironmentBG env_bg = environment_get_background(p_render_data->environment); RS::EnvironmentAmbientSource ambient_src = environment_get_ambient_source(p_render_data->environment); float bg_energy_multiplier = environment_get_bg_energy_multiplier(p_render_data->environment); - scene_state.ubo.ambient_light_color_energy[3] = bg_energy_multiplier; + scene_state.data.ambient_light_color_energy[3] = bg_energy_multiplier; - scene_state.ubo.ambient_color_sky_mix = environment_get_ambient_sky_contribution(p_render_data->environment); + scene_state.data.ambient_color_sky_mix = environment_get_ambient_sky_contribution(p_render_data->environment); //ambient if (ambient_src == RS::ENV_AMBIENT_SOURCE_BG && (env_bg == RS::ENV_BG_CLEAR_COLOR || env_bg == RS::ENV_BG_COLOR)) { Color color = env_bg == RS::ENV_BG_CLEAR_COLOR ? p_default_bg_color : environment_get_bg_color(p_render_data->environment); color = color.srgb_to_linear(); - scene_state.ubo.ambient_light_color_energy[0] = color.r * bg_energy_multiplier; - scene_state.ubo.ambient_light_color_energy[1] = color.g * bg_energy_multiplier; - scene_state.ubo.ambient_light_color_energy[2] = color.b * bg_energy_multiplier; - scene_state.ubo.use_ambient_light = true; - scene_state.ubo.use_ambient_cubemap = false; + scene_state.data.ambient_light_color_energy[0] = color.r * bg_energy_multiplier; + scene_state.data.ambient_light_color_energy[1] = color.g * bg_energy_multiplier; + scene_state.data.ambient_light_color_energy[2] = color.b * bg_energy_multiplier; + scene_state.data.use_ambient_light = true; + scene_state.data.use_ambient_cubemap = false; } else { float energy = environment_get_ambient_light_energy(p_render_data->environment); Color color = environment_get_ambient_light(p_render_data->environment); color = color.srgb_to_linear(); - scene_state.ubo.ambient_light_color_energy[0] = color.r * energy; - scene_state.ubo.ambient_light_color_energy[1] = color.g * energy; - scene_state.ubo.ambient_light_color_energy[2] = color.b * energy; + scene_state.data.ambient_light_color_energy[0] = color.r * energy; + scene_state.data.ambient_light_color_energy[1] = color.g * energy; + scene_state.data.ambient_light_color_energy[2] = color.b * energy; Basis sky_transform = environment_get_sky_orientation(p_render_data->environment); sky_transform = sky_transform.inverse() * p_render_data->cam_transform.basis; - GLES3::MaterialStorage::store_transform_3x3(sky_transform, scene_state.ubo.radiance_inverse_xform); - scene_state.ubo.use_ambient_cubemap = (ambient_src == RS::ENV_AMBIENT_SOURCE_BG && env_bg == RS::ENV_BG_SKY) || ambient_src == RS::ENV_AMBIENT_SOURCE_SKY; - scene_state.ubo.use_ambient_light = scene_state.ubo.use_ambient_cubemap || ambient_src == RS::ENV_AMBIENT_SOURCE_COLOR; + GLES3::MaterialStorage::store_transform_3x3(sky_transform, scene_state.data.radiance_inverse_xform); + scene_state.data.use_ambient_cubemap = (ambient_src == RS::ENV_AMBIENT_SOURCE_BG && env_bg == RS::ENV_BG_SKY) || ambient_src == RS::ENV_AMBIENT_SOURCE_SKY; + scene_state.data.use_ambient_light = scene_state.data.use_ambient_cubemap || ambient_src == RS::ENV_AMBIENT_SOURCE_COLOR; } //specular RS::EnvironmentReflectionSource ref_src = environment_get_reflection_source(p_render_data->environment); if ((ref_src == RS::ENV_REFLECTION_SOURCE_BG && env_bg == RS::ENV_BG_SKY) || ref_src == RS::ENV_REFLECTION_SOURCE_SKY) { - scene_state.ubo.use_reflection_cubemap = true; + scene_state.data.use_reflection_cubemap = true; } else { - scene_state.ubo.use_reflection_cubemap = false; + scene_state.data.use_reflection_cubemap = false; } - scene_state.ubo.fog_enabled = environment_get_fog_enabled(p_render_data->environment); - scene_state.ubo.fog_mode = environment_get_fog_mode(p_render_data->environment); - scene_state.ubo.fog_density = environment_get_fog_density(p_render_data->environment); - scene_state.ubo.fog_height = environment_get_fog_height(p_render_data->environment); - scene_state.ubo.fog_depth_curve = environment_get_fog_depth_curve(p_render_data->environment); - scene_state.ubo.fog_depth_end = environment_get_fog_depth_end(p_render_data->environment) > 0.0 ? environment_get_fog_depth_end(p_render_data->environment) : scene_state.ubo.z_far; - scene_state.ubo.fog_depth_begin = MIN(environment_get_fog_depth_begin(p_render_data->environment), scene_state.ubo.fog_depth_end - 0.001); - scene_state.ubo.fog_height_density = environment_get_fog_height_density(p_render_data->environment); - scene_state.ubo.fog_aerial_perspective = environment_get_fog_aerial_perspective(p_render_data->environment); + scene_state.data.fog_enabled = environment_get_fog_enabled(p_render_data->environment); + scene_state.data.fog_mode = environment_get_fog_mode(p_render_data->environment); + scene_state.data.fog_density = environment_get_fog_density(p_render_data->environment); + scene_state.data.fog_height = environment_get_fog_height(p_render_data->environment); + scene_state.data.fog_depth_curve = environment_get_fog_depth_curve(p_render_data->environment); + scene_state.data.fog_depth_end = environment_get_fog_depth_end(p_render_data->environment) > 0.0 ? environment_get_fog_depth_end(p_render_data->environment) : scene_state.data.z_far; + scene_state.data.fog_depth_begin = MIN(environment_get_fog_depth_begin(p_render_data->environment), scene_state.data.fog_depth_end - 0.001); + scene_state.data.fog_height_density = environment_get_fog_height_density(p_render_data->environment); + scene_state.data.fog_aerial_perspective = environment_get_fog_aerial_perspective(p_render_data->environment); Color fog_color = environment_get_fog_light_color(p_render_data->environment).srgb_to_linear(); float fog_energy = environment_get_fog_light_energy(p_render_data->environment); - scene_state.ubo.fog_light_color[0] = fog_color.r * fog_energy; - scene_state.ubo.fog_light_color[1] = fog_color.g * fog_energy; - scene_state.ubo.fog_light_color[2] = fog_color.b * fog_energy; + scene_state.data.fog_light_color[0] = fog_color.r * fog_energy; + scene_state.data.fog_light_color[1] = fog_color.g * fog_energy; + scene_state.data.fog_light_color[2] = fog_color.b * fog_energy; - scene_state.ubo.fog_sun_scatter = environment_get_fog_sun_scatter(p_render_data->environment); + scene_state.data.fog_sun_scatter = environment_get_fog_sun_scatter(p_render_data->environment); } else { } if (p_render_data->camera_attributes.is_valid()) { - scene_state.ubo.emissive_exposure_normalization = RSG::camera_attributes->camera_attributes_get_exposure_normalization_factor(p_render_data->camera_attributes); - scene_state.ubo.IBL_exposure_normalization = 1.0; + scene_state.data.emissive_exposure_normalization = RSG::camera_attributes->camera_attributes_get_exposure_normalization_factor(p_render_data->camera_attributes); + scene_state.data.IBL_exposure_normalization = 1.0; if (is_environment(p_render_data->environment)) { RID sky_rid = environment_get_sky(p_render_data->environment); if (sky_rid.is_valid()) { float current_exposure = RSG::camera_attributes->camera_attributes_get_exposure_normalization_factor(p_render_data->camera_attributes) * environment_get_bg_intensity(p_render_data->environment); - scene_state.ubo.IBL_exposure_normalization = current_exposure / MAX(0.001, sky_get_baked_exposure(sky_rid)); + scene_state.data.IBL_exposure_normalization = current_exposure / MAX(0.001, sky_get_baked_exposure(sky_rid)); } } - } else if (scene_state.ubo.emissive_exposure_normalization > 0.0) { + } else if (scene_state.data.emissive_exposure_normalization > 0.0) { // This branch is triggered when using render_material(). // Emissive is set outside the function, so don't set it. // IBL isn't used don't set it. } else { - scene_state.ubo.emissive_exposure_normalization = 1.0; - scene_state.ubo.IBL_exposure_normalization = 1.0; + scene_state.data.emissive_exposure_normalization = 1.0; + scene_state.data.IBL_exposure_normalization = 1.0; } if (scene_state.ubo_buffer == 0) { glGenBuffers(1, &scene_state.ubo_buffer); glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_DATA_UNIFORM_LOCATION, scene_state.ubo_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_UNIFORM_BUFFER, scene_state.ubo_buffer, sizeof(SceneState::UBO), &scene_state.ubo, GL_STREAM_DRAW, "Scene state UBO"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_UNIFORM_BUFFER, scene_state.ubo_buffer, sizeof(SceneState::UBO) * 2, &scene_state.data, GL_STREAM_DRAW, "Scene state UBO"); glBindBuffer(GL_UNIFORM_BUFFER, 0); } else { glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_DATA_UNIFORM_LOCATION, scene_state.ubo_buffer); - glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::UBO), &scene_state.ubo, GL_STREAM_DRAW); + glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::UBO) * 2, &scene_state.data, GL_STREAM_DRAW); } glBindBuffer(GL_UNIFORM_BUFFER, 0); @@ -1625,10 +1625,10 @@ void RasterizerSceneGLES3::_setup_environment(const RenderDataGLES3 *p_render_da if (scene_state.multiview_buffer == 0) { glGenBuffers(1, &scene_state.multiview_buffer); glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_MULTIVIEW_UNIFORM_LOCATION, scene_state.multiview_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_UNIFORM_BUFFER, scene_state.multiview_buffer, sizeof(SceneState::MultiviewUBO), &scene_state.multiview_ubo, GL_STREAM_DRAW, "Multiview UBO"); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_UNIFORM_BUFFER, scene_state.multiview_buffer, sizeof(SceneState::MultiviewUBO) * 2, &scene_state.multiview_data, GL_STREAM_DRAW, "Multiview UBO"); } else { glBindBufferBase(GL_UNIFORM_BUFFER, SCENE_MULTIVIEW_UNIFORM_LOCATION, scene_state.multiview_buffer); - glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::MultiviewUBO), &scene_state.multiview_ubo, GL_STREAM_DRAW); + glBufferData(GL_UNIFORM_BUFFER, sizeof(SceneState::MultiviewUBO) * 2, &scene_state.multiview_data, GL_STREAM_DRAW); } glBindBuffer(GL_UNIFORM_BUFFER, 0); @@ -2382,7 +2382,7 @@ void RasterizerSceneGLES3::render_scene(const Ref &p_render_ glBindBuffer(GL_UNIFORM_BUFFER, 0); - scene_state.ubo.emissive_exposure_normalization = -1.0; // Use default exposure normalization. + scene_state.data.emissive_exposure_normalization = -1.0; // Use default exposure normalization. bool flip_y = !is_reflection_probe; @@ -2493,6 +2493,46 @@ void RasterizerSceneGLES3::render_scene(const Ref &p_render_ } } + scene_state.reset_gl_state(); + + GLuint motion_vectors_fbo = rt->overridden.velocity_fbo; + if (motion_vectors_fbo != 0 && GLES3::Config::get_singleton()->max_vertex_attribs >= 22) { + RENDER_TIMESTAMP("Motion Vectors Pass"); + glBindFramebuffer(GL_FRAMEBUFFER, motion_vectors_fbo); + + Size2i motion_vectors_target_size = rt->velocity_target_size; + glViewport(0, 0, motion_vectors_target_size.x, motion_vectors_target_size.y); + + if (!scene_state.is_prev_data_stored) { + scene_state.prev_data = scene_state.data; + scene_state.prev_multiview_data = scene_state.multiview_data; + scene_state.is_prev_data_stored = true; + } + + scene_state.enable_gl_depth_test(true); + scene_state.enable_gl_depth_draw(true); + scene_state.enable_gl_blend(false); + glDepthFunc(GL_GEQUAL); + scene_state.enable_gl_scissor_test(false); + + glColorMask(1, 1, 1, 1); + RasterizerGLES3::clear_depth(0.0); + glClearColor(0.0, 0.0, 0.0, 0.0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + GLuint db = GL_COLOR_ATTACHMENT0; + glDrawBuffers(1, &db); + + uint64_t spec_constant = SceneShaderGLES3::DISABLE_FOG | SceneShaderGLES3::DISABLE_LIGHT_DIRECTIONAL | + SceneShaderGLES3::DISABLE_LIGHTMAP | SceneShaderGLES3::DISABLE_LIGHT_OMNI | + SceneShaderGLES3::DISABLE_LIGHT_SPOT; + + RenderListParameters render_list_params(render_list[RENDER_LIST_OPAQUE].elements.ptr(), render_list[RENDER_LIST_OPAQUE].elements.size(), reverse_cull, spec_constant, use_wireframe); + _render_list_template(&render_list_params, &render_data, 0, render_list[RENDER_LIST_OPAQUE].elements.size()); + + scene_state.prev_data = scene_state.data; + scene_state.prev_multiview_data = scene_state.multiview_data; + } + GLuint fbo = 0; if (is_reflection_probe && GLES3::LightStorage::get_singleton()->reflection_probe_has_atlas_index(render_data.reflection_probe)) { fbo = GLES3::LightStorage::get_singleton()->reflection_probe_instance_get_framebuffer(render_data.reflection_probe, render_data.reflection_probe_pass); @@ -2504,8 +2544,6 @@ void RasterizerSceneGLES3::render_scene(const Ref &p_render_ glBindFramebuffer(GL_FRAMEBUFFER, fbo); glViewport(0, 0, rb->internal_size.x, rb->internal_size.y); - scene_state.reset_gl_state(); - // Do depth prepass if it's explicitly enabled bool use_depth_prepass = config->use_depth_prepass; @@ -2990,6 +3028,8 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } else if constexpr (p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_SHADOW) { shader_variant = SceneShaderGLES3::MODE_DEPTH; + } else if constexpr (p_pass_mode == PASS_MODE_MOTION_VECTORS) { + base_spec_constants |= SceneShaderGLES3::RENDER_MOTION_VECTORS; } if (p_render_data->view_count > 1) { @@ -2997,8 +3037,8 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } bool should_request_redraw = false; - if constexpr (p_pass_mode != PASS_MODE_DEPTH) { - // Don't count elements during depth pre-pass to match the RD renderers. + if constexpr (p_pass_mode != PASS_MODE_DEPTH && p_pass_mode != PASS_MODE_MOTION_VECTORS) { + // Don't count elements during depth pre-pass or motion vector pass to match the RD renderers. if (p_render_data->render_info) { p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE][RS::VIEWPORT_RENDER_INFO_OBJECTS_IN_FRAME] += p_to_element - p_from_element; } @@ -3059,7 +3099,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, if constexpr (p_pass_mode != PASS_MODE_SHADOW) { if (shader->depth_draw == GLES3::SceneShaderData::DEPTH_DRAW_OPAQUE) { - scene_state.enable_gl_depth_draw((p_pass_mode == PASS_MODE_COLOR && !GLES3::Config::get_singleton()->use_depth_prepass) || p_pass_mode == PASS_MODE_DEPTH); + scene_state.enable_gl_depth_draw((p_pass_mode == PASS_MODE_COLOR && !GLES3::Config::get_singleton()->use_depth_prepass) || p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_MOTION_VECTORS); } else { scene_state.enable_gl_depth_draw(shader->depth_draw == GLES3::SceneShaderData::DEPTH_DRAW_ALWAYS); } @@ -3075,9 +3115,9 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, */ for (int32_t pass = 0; pass < MAX(1, int32_t(inst->light_passes.size() + p_render_data->directional_shadow_count)); pass++) { - if constexpr (p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_SHADOW) { + if constexpr (p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_SHADOW || p_pass_mode == PASS_MODE_MOTION_VECTORS) { if (pass > 0) { - // Don't render shadow passes when doing depth or shadow pass. + // Don't render shadow passes when doing depth, shadow, or motion vector pass. break; } } @@ -3232,9 +3272,9 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, // Skeleton and blend shapes. if (surf->owner->mesh_instance.is_valid()) { - mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, vertex_input_mask, vertex_array_gl); + mesh_storage->mesh_instance_surface_get_vertex_arrays_and_format(surf->owner->mesh_instance, surf->surface_index, vertex_input_mask, p_pass_mode == PASS_MODE_MOTION_VECTORS, vertex_array_gl); } else { - mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, vertex_input_mask, vertex_array_gl); + mesh_storage->mesh_surface_get_vertex_arrays_and_format(mesh_surface, vertex_input_mask, p_pass_mode == PASS_MODE_MOTION_VECTORS, vertex_array_gl); } index_array_gl = mesh_storage->mesh_surface_get_index_buffer(mesh_surface, surf->lod_index); @@ -3420,7 +3460,7 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } float opaque_prepass_threshold = 0.0; - if constexpr (p_pass_mode == PASS_MODE_DEPTH) { + if constexpr (p_pass_mode == PASS_MODE_DEPTH || p_pass_mode == PASS_MODE_MOTION_VECTORS) { opaque_prepass_threshold = 0.99; } else if constexpr (p_pass_mode == PASS_MODE_SHADOW) { opaque_prepass_threshold = 0.1; @@ -3605,6 +3645,16 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, } } + if constexpr (p_pass_mode == PASS_MODE_MOTION_VECTORS) { + if (unlikely(!inst->is_prev_transform_stored)) { + inst->prev_transform = world_transform; + inst->is_prev_transform_stored = true; + } + + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::PREV_WORLD_TRANSFORM, inst->prev_transform, shader->version, instance_variant, spec_constants); + inst->prev_transform = world_transform; + } + material_storage->shaders.scene_shader.version_set_uniform(SceneShaderGLES3::WORLD_TRANSFORM, world_transform, shader->version, instance_variant, spec_constants); { GLES3::Mesh::Surface *s = reinterpret_cast(surf->surface); @@ -3639,8 +3689,8 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, count = count * 2; } - if constexpr (p_pass_mode != PASS_MODE_DEPTH) { - // Don't count draw calls during depth pre-pass to match the RD renderers. + if constexpr (p_pass_mode != PASS_MODE_DEPTH && p_pass_mode != PASS_MODE_MOTION_VECTORS) { + // Don't count draw calls during depth pre-pass or motion vector pass to match the RD renderers. if (p_render_data->render_info) { p_render_data->render_info->info[RS::VIEWPORT_RENDER_INFO_TYPE_VISIBLE][RS::VIEWPORT_RENDER_INFO_DRAW_CALLS_IN_FRAME]++; } @@ -3665,32 +3715,40 @@ void RasterizerSceneGLES3::_render_list_template(RenderListParameters *p_params, break; } - glBindBuffer(GL_ARRAY_BUFFER, instance_buffer); + bool uses_format_2d = inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D; + bool has_color_or_custom_data = (inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR) || (inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA); + // Current data multimesh vertex attrib data begins at index 12. + mesh_storage->multimesh_vertex_attrib_setup(instance_buffer, stride, uses_format_2d, has_color_or_custom_data, 12); - glEnableVertexAttribArray(12); - glVertexAttribPointer(12, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); - glVertexAttribDivisor(12, 1); - glEnableVertexAttribArray(13); - glVertexAttribPointer(13, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4)); - glVertexAttribDivisor(13, 1); - if (!(inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D)) { - glEnableVertexAttribArray(14); - glVertexAttribPointer(14, 4, GL_FLOAT, GL_FALSE, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 8)); - glVertexAttribDivisor(14, 1); - } + if (p_pass_mode == PASS_MODE_MOTION_VECTORS) { + GLuint prev_instance_buffer = 0; + if (inst->flags_cache & INSTANCE_DATA_FLAG_PARTICLES) { + prev_instance_buffer = particles_storage->particles_get_prev_gl_buffer(inst->data->base); + } else { + prev_instance_buffer = mesh_storage->multimesh_get_prev_gl_buffer(inst->data->base); + } - if ((inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_COLOR) || (inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_HAS_CUSTOM_DATA)) { - uint32_t color_custom_offset = inst->flags_cache & INSTANCE_DATA_FLAG_MULTIMESH_FORMAT_2D ? 8 : 12; - glEnableVertexAttribArray(15); - glVertexAttribIPointer(15, 4, GL_UNSIGNED_INT, stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_custom_offset * sizeof(float))); - glVertexAttribDivisor(15, 1); - } else { - // Set all default instance color and custom data values to 1.0 or 0.0 using a compressed format. - uint16_t zero = Math::make_half_float(0.0f); - uint16_t one = Math::make_half_float(1.0f); - GLuint default_color = (uint32_t(one) << 16) | one; - GLuint default_custom = (uint32_t(zero) << 16) | zero; - glVertexAttribI4ui(15, default_color, default_color, default_custom, default_custom); + if (prev_instance_buffer == 0) { + break; + } + + GLuint secondary_instance_buffer = 0; + if (inst->flags_cache & INSTANCE_DATA_FLAG_PARTICLES) { + if (particles_storage->particles_get_last_change(inst->data->base) == RSG::rasterizer->get_frame_number()) { + secondary_instance_buffer = prev_instance_buffer; + } else { + secondary_instance_buffer = instance_buffer; + } + } else { + if (mesh_storage->multimesh_get_last_change(inst->data->base) == RSG::rasterizer->get_frame_number()) { + secondary_instance_buffer = prev_instance_buffer; + } else { + secondary_instance_buffer = instance_buffer; + } + } + + // Previous data multimesh vertex attrib data begins at index 18. + mesh_storage->multimesh_vertex_attrib_setup(secondary_instance_buffer, stride, uses_format_2d, has_color_or_custom_data, 18); } if (use_wireframe) { @@ -3814,7 +3872,7 @@ void RasterizerSceneGLES3::_render_uv2(const PagedArray 0.5 ? 1.0 : -1.0; // 0.5 does not exist in UNORM16, so values are either greater or smaller. angle = abs(angle * 2.0 - 1.0) * M_PI; // 0.5 is basically zero, allowing to encode both signs reliably. vec3 axis = normal; @@ -548,29 +611,29 @@ void main() { #endif #if defined(COLOR_USED) - color_interp = color_attrib; + color_interp = color_attrib_input; #ifdef USE_INSTANCING vec4 instance_color; - instance_color.xy = unpackHalf2x16(instance_color_custom_data.x); - instance_color.zw = unpackHalf2x16(instance_color_custom_data.y); + instance_color.xy = unpackHalf2x16(instance_color_custom_data_input.x); + instance_color.zw = unpackHalf2x16(instance_color_custom_data_input.y); color_interp *= instance_color; #endif #endif #if defined(UV_USED) - uv_interp = uv_attrib; + uv_interp = uv_attrib_input; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) - uv2_interp = uv2_attrib; + uv2_interp = uv2_attrib_input; #endif - if (uv_scale != vec4(0.0)) { // Compression enabled + if (uv_scale_input != vec4(0.0)) { // Compression enabled #ifdef UV_USED - uv_interp = (uv_interp - 0.5) * uv_scale.xy; + uv_interp = (uv_interp - 0.5) * uv_scale_input.xy; #endif #if defined(UV2_USED) || defined(USE_LIGHTMAP) - uv2_interp = (uv2_interp - 0.5) * uv_scale.zw; + uv2_interp = (uv2_interp - 0.5) * uv_scale_input.zw; #endif } @@ -578,20 +641,15 @@ void main() { highp vec4 position; #endif -#ifdef USE_MULTIVIEW - mat4 projection_matrix = multiview_data.projection_matrix_view[ViewIndex]; - mat4 inv_projection_matrix = multiview_data.inv_projection_matrix_view[ViewIndex]; - vec3 eye_offset = multiview_data.eye_offset[ViewIndex].xyz; -#else - mat4 projection_matrix = scene_data.projection_matrix; - mat4 inv_projection_matrix = scene_data.inv_projection_matrix; - vec3 eye_offset = vec3(0.0, 0.0, 0.0); -#endif //USE_MULTIVIEW +#ifndef USE_MULTIVIEW + mat4 projection_matrix = scene_data_input.projection_matrix; + mat4 inv_projection_matrix = scene_data_input.inv_projection_matrix; +#endif //!USE_MULTIVIEW #ifdef USE_INSTANCING vec4 instance_custom; - instance_custom.xy = unpackHalf2x16(instance_color_custom_data.z); - instance_custom.zw = unpackHalf2x16(instance_color_custom_data.w); + instance_custom.xy = unpackHalf2x16(instance_color_custom_data_input.z); + instance_custom.zw = unpackHalf2x16(instance_color_custom_data_input.w); #else vec4 instance_custom = vec4(0.0); #endif @@ -619,8 +677,8 @@ void main() { float roughness = 1.0; - highp mat4 modelview = scene_data.view_matrix * model_matrix; - highp mat3 modelview_normal = mat3(scene_data.view_matrix) * model_normal_matrix; + highp mat4 modelview = scene_data_input.view_matrix * model_matrix; + highp mat3 modelview_normal = mat3(scene_data_input.view_matrix) * model_normal_matrix; float point_size = 1.0; @@ -648,14 +706,14 @@ void main() { // Using world coordinates #if !defined(SKIP_TRANSFORM_USED) && defined(VERTEX_WORLD_COORDS_USED) - vertex = (scene_data.view_matrix * vec4(vertex, 1.0)).xyz; + vertex = (scene_data_input.view_matrix * vec4(vertex, 1.0)).xyz; #ifdef NORMAL_USED - normal = (scene_data.view_matrix * vec4(normal, 0.0)).xyz; + normal = (scene_data_input.view_matrix * vec4(normal, 0.0)).xyz; #endif #if defined(TANGENT_USED) || defined(NORMAL_MAP_USED) || defined(LIGHT_ANISOTROPY_USED) - binormal = (scene_data.view_matrix * vec4(binormal, 0.0)).xyz; - tangent = (scene_data.view_matrix * vec4(tangent, 0.0)).xyz; + binormal = (scene_data_input.view_matrix * vec4(binormal, 0.0)).xyz; + tangent = (scene_data_input.view_matrix * vec4(tangent, 0.0)).xyz; #endif #endif @@ -672,6 +730,7 @@ void main() { binormal_interp = normalize(binormal); #endif +#ifndef RENDER_MOTION_VECTORS // Calculate shadows. #ifdef USE_ADDITIVE_LIGHTING #if defined(ADDITIVE_OMNI) || defined(ADDITIVE_SPOT) @@ -711,15 +770,16 @@ void main() { #endif // USE_ADDITIVE_LIGHTING #if defined(RENDER_SHADOWS) && !defined(RENDER_SHADOWS_LINEAR) - // This is an optimized version of normalize(vertex_interp) * scene_data.shadow_bias / length(vertex_interp). + // This is an optimized version of normalize(vertex_interp) * scene_data_input.shadow_bias / length(vertex_interp). float light_length_sq = dot(vertex_interp, vertex_interp); - vertex_interp += vertex_interp * scene_data.shadow_bias / light_length_sq; + vertex_interp += vertex_interp * scene_data_input.shadow_bias / light_length_sq; #endif +#endif // RENDER_MOTION_VECTORS #if defined(OVERRIDE_POSITION) - gl_Position = position; + clip_position_output = position; #else - gl_Position = projection_matrix * vec4(vertex_interp, 1.0); + clip_position_output = projection_matrix * vec4(vertex_interp, 1.0); #endif #if !defined(RENDER_SHADOWS) && !defined(RENDER_SHADOWS_LINEAR) @@ -730,17 +790,18 @@ void main() { #ifdef RENDER_MATERIAL vec2 uv_dest_attrib; - if (uv_scale != vec4(0.0)) { - uv_dest_attrib = (uv2_attrib.xy - 0.5) * uv_scale.zw; + if (uv_scale_input != vec4(0.0)) { + uv_dest_attrib = (uv2_attrib_input.xy - 0.5) * uv_scale_input.zw; } else { - uv_dest_attrib = uv2_attrib.xy; + uv_dest_attrib = uv2_attrib_input.xy; } - gl_Position.xy = (uv_dest_attrib + uv_offset) * 2.0 - 1.0; - gl_Position.z = 0.00001; - gl_Position.w = 1.0; + clip_position_output.xy = (uv_dest_attrib + uv_offset) * 2.0 - 1.0; + clip_position_output.z = 0.00001; + clip_position_output.w = 1.0; #endif +#ifndef RENDER_MOTION_VECTORS #ifdef USE_VERTEX_LIGHTING #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #ifdef USE_MULTIVIEW @@ -752,7 +813,7 @@ void main() { specular_light_interp = vec3(0.0); #ifdef BASE_PASS #ifndef DISABLE_LIGHT_DIRECTIONAL - for (uint i = uint(0); i < scene_data.directional_light_count; i++) { + for (uint i = uint(0); i < scene_data_input.directional_light_count; i++) { #if defined(USE_LIGHTMAP) && !defined(DISABLE_LIGHTMAP) if (directional_lights[i].bake_mode == LIGHT_BAKE_STATIC) { continue; @@ -803,7 +864,97 @@ void main() { #endif // USE_ADDITIVE_LIGHTING #endif // !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) #endif // USE_VERTEX_LIGHTING +#endif // RENDER_MOTION_VECTORS } + +void main() { +#if defined(RENDER_MOTION_VECTORS) + +#ifdef USE_INSTANCING + // Check for inactive particle instances. + highp vec4 input_instance_xform0; + highp vec4 input_instance_xform1; + highp vec4 input_instance_xform2; + highp uvec4 input_instance_color_custom_data; + if (prev_instance_xform0.xyz == vec3(0.0, 0.0, 0.0)) { + input_instance_xform0 = instance_xform0; + input_instance_xform1 = instance_xform1; + input_instance_xform2 = instance_xform2; + input_instance_color_custom_data = instance_color_custom_data; + } else { + input_instance_xform0 = prev_instance_xform0; + input_instance_xform1 = prev_instance_xform1; + input_instance_xform2 = prev_instance_xform2; + input_instance_color_custom_data = prev_instance_color_custom_data; + } +#endif + + vertex_shader(prev_vertex_attrib, + compressed_aabb_size, + compressed_aabb_position, + prev_world_transform, + model_flags, + scene_data_block.prev_data, +#ifdef USE_INSTANCING + input_instance_xform0, input_instance_xform1, input_instance_xform2, + input_instance_color_custom_data, +#endif +#ifdef NORMAL_USED + prev_normal_attrib, +#endif +#if defined(COLOR_USED) + color_attrib, +#endif +#if defined(UV_USED) + uv_attrib, +#endif +#if defined(UV2_USED) || defined(USE_LIGHTMAP) + uv2_attrib, +#endif +#ifdef USE_MULTIVIEW + multiview_data_block.prev_data.projection_matrix_view[ViewIndex], + multiview_data_block.prev_data.inv_projection_matrix_view[ViewIndex], + multiview_data_block.prev_data.eye_offset[ViewIndex].xyz, +#endif + uv_scale, + prev_clip_position); +#else + vec4 clip_position; +#endif // defined(RENDER_MOTION_VECTORS) + + vertex_shader(vertex_angle_attrib, + compressed_aabb_size, + compressed_aabb_position, + world_transform, + model_flags, + scene_data_block.data, +#ifdef USE_INSTANCING + instance_xform0, instance_xform1, instance_xform2, + instance_color_custom_data, +#endif +#ifdef NORMAL_USED + axis_tangent_attrib, +#endif +#if defined(COLOR_USED) + color_attrib, +#endif +#if defined(UV_USED) + uv_attrib, +#endif +#if defined(UV2_USED) || defined(USE_LIGHTMAP) + uv2_attrib, +#endif +#ifdef USE_MULTIVIEW + multiview_data_block.data.projection_matrix_view[ViewIndex], + multiview_data_block.data.inv_projection_matrix_view[ViewIndex], + multiview_data_block.data.eye_offset[ViewIndex].xyz, +#endif + uv_scale, + clip_position); + + gl_Position = clip_position; +} + /* clang-format off */ #[fragment] @@ -859,6 +1010,12 @@ void main() { /* Varyings */ +#if defined(RENDER_MOTION_VECTORS) +in highp vec4 clip_position; +in highp vec4 prev_clip_position; +#endif + +#ifndef RENDER_MOTION_VECTORS #if defined(COLOR_USED) in vec4 color_interp; #endif @@ -958,7 +1115,7 @@ layout(std140) uniform MaterialUniforms { // ubo:3 #endif -layout(std140) uniform SceneData { // ubo:2 +struct SceneData { highp mat4 projection_matrix; highp mat4 inv_projection_matrix; highp mat4 inv_view_matrix; @@ -1006,16 +1163,26 @@ layout(std140) uniform SceneData { // ubo:2 float luminance_multiplier; uint camera_visible_layers; bool pancake_shadows; +}; + +layout(std140) uniform SceneDataBlock { // ubo:2 + SceneData data; + SceneData prev_data; } -scene_data; +scene_data_block; #ifdef USE_MULTIVIEW -layout(std140) uniform MultiviewData { // ubo:8 +struct MultiviewData { highp mat4 projection_matrix_view[MAX_VIEWS]; highp mat4 inv_projection_matrix_view[MAX_VIEWS]; highp vec4 eye_offset[MAX_VIEWS]; +}; + +layout(std140) uniform MultiviewDataBlock { // ubo:8 + MultiviewData data; + MultiviewData prev_data; } -multiview_data; +multiview_data_block; #endif uniform highp mat4 world_transform; @@ -1249,6 +1416,7 @@ ivec2 multiview_uv(ivec2 uv) { #endif uniform mediump float opaque_prepass_threshold; +#endif // !RENDER_MOTION_VECTORS #if defined(RENDER_MATERIAL) layout(location = 0) out vec4 albedo_output_buffer; @@ -1257,8 +1425,13 @@ layout(location = 2) out vec4 orm_output_buffer; layout(location = 3) out vec4 emission_output_buffer; #else // !RENDER_MATERIAL + +#ifndef RENDER_MOTION_VECTORS // Normal color rendering. layout(location = 0) out vec4 frag_color; +#else +layout(location = 0) out vec4 motion_vectors; +#endif // !RENDER_MOTION_VECTORS #endif // !RENDER_MATERIAL @@ -1268,6 +1441,7 @@ layout(location = 0) out vec4 frag_color; /* clang-format on */ +#ifndef RENDER_MOTION_VECTORS vec3 F0(float metallic, float specular, vec3 albedo) { float dielectric = 0.16 * specular * specular; // use albedo * metallic as colored specular reflectance at 0 angle for metallic materials; @@ -1330,8 +1504,8 @@ void light_compute(vec3 N, vec3 L, vec3 V, float A, vec3 light_color, bool is_di // light is written by the light shader highp mat4 model_matrix = world_transform; - mat4 projection_matrix = scene_data.projection_matrix; - mat4 inv_projection_matrix = scene_data.inv_projection_matrix; + mat4 projection_matrix = scene_data_block.data.projection_matrix; + mat4 inv_projection_matrix = scene_data_block.data.inv_projection_matrix; vec3 normal = N; vec3 light = L; @@ -1579,32 +1753,32 @@ void light_process_spot(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 f #endif // !USE_VERTEX_LIGHTING vec4 fog_process(vec3 vertex) { - vec3 fog_color = scene_data.fog_light_color; + vec3 fog_color = scene_data_block.data.fog_light_color; #ifdef USE_RADIANCE_MAP /* - if (scene_data.fog_aerial_perspective > 0.0) { + if (scene_data_block.data.fog_aerial_perspective > 0.0) { vec3 sky_fog_color = vec3(0.0); - vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + vec3 cube_view = scene_data_block.data.radiance_inverse_xform * vertex; // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred - float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data_block.data.z_near) / (scene_data_block.data.z_far - scene_data_block.data.z_near)); sky_fog_color = textureLod(radiance_map, cube_view, mip_level * RADIANCE_MAX_LOD).rgb; - fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + fog_color = mix(fog_color, sky_fog_color, scene_data_block.data.fog_aerial_perspective); } */ #endif #ifndef DISABLE_LIGHT_DIRECTIONAL - if (scene_data.fog_sun_scatter > 0.001) { + if (scene_data_block.data.fog_sun_scatter > 0.001) { vec4 sun_scatter = vec4(0.0); float sun_total = 0.0; vec3 view = normalize(vertex); - for (uint i = uint(0); i < scene_data.directional_light_count; i++) { + for (uint i = uint(0); i < scene_data_block.data.directional_light_count; i++) { vec3 light_color = directional_lights[i].color * directional_lights[i].energy; float light_amount = pow(max(dot(view, directional_lights[i].direction), 0.0), 8.0); - fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; } } #endif // !DISABLE_LIGHT_DIRECTIONAL @@ -1612,18 +1786,18 @@ vec4 fog_process(vec3 vertex) { float fog_amount = 0.0; #ifdef USE_DEPTH_FOG - float fog_z = smoothstep(scene_data.fog_depth_begin, scene_data.fog_depth_end, length(vertex)); - fog_amount = pow(fog_z, scene_data.fog_depth_curve) * scene_data.fog_density; + float fog_z = smoothstep(scene_data_block.data.fog_depth_begin, scene_data_block.data.fog_depth_end, length(vertex)); + fog_amount = pow(fog_z, scene_data_block.data.fog_depth_curve) * scene_data_block.data.fog_density; #else - fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); + fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data_block.data.fog_density)); #endif // USE_DEPTH_FOG - if (abs(scene_data.fog_height_density) >= 0.0001) { - float y = (scene_data.inv_view_matrix * vec4(vertex, 1.0)).y; + if (abs(scene_data_block.data.fog_height_density) >= 0.0001) { + float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y; - float y_dist = y - scene_data.fog_height; + float y_dist = y - scene_data_block.data.fog_height; - float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data.fog_height_density)); + float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data_block.data.fog_height_density)); fog_amount = max(vfog_amount, fog_amount); } @@ -1781,20 +1955,22 @@ vec4 textureArray_bicubic(sampler2DArray tex, vec3 uv, vec2 texture_size) { (g1(fuv.y) * (g0x * texture(tex, vec3(p2, uv.z)) + g1x * texture(tex, vec3(p3, uv.z)))); } #endif //LIGHTMAP_BICUBIC_FILTER +#endif // RENDER_MOTION_VECTORS void main() { +#ifndef RENDER_MOTION_VECTORS //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; #ifdef USE_MULTIVIEW - vec3 eye_offset = multiview_data.eye_offset[ViewIndex].xyz; + vec3 eye_offset = multiview_data_block.data.eye_offset[ViewIndex].xyz; vec3 view = -normalize(vertex_interp - eye_offset); - mat4 projection_matrix = multiview_data.projection_matrix_view[ViewIndex]; - mat4 inv_projection_matrix = multiview_data.inv_projection_matrix_view[ViewIndex]; + mat4 projection_matrix = multiview_data_block.data.projection_matrix_view[ViewIndex]; + mat4 inv_projection_matrix = multiview_data_block.data.inv_projection_matrix_view[ViewIndex]; #else vec3 eye_offset = vec3(0.0, 0.0, 0.0); vec3 view = -normalize(vertex_interp); - mat4 projection_matrix = scene_data.projection_matrix; - mat4 inv_projection_matrix = scene_data.inv_projection_matrix; + mat4 projection_matrix = scene_data_block.data.projection_matrix; + mat4 inv_projection_matrix = scene_data_block.data.inv_projection_matrix; #endif highp mat4 model_matrix = world_transform; vec3 albedo = vec3(1.0); @@ -1870,7 +2046,7 @@ void main() { float normal_map_depth = 1.0; - vec2 screen_uv = gl_FragCoord.xy * scene_data.screen_pixel_size; + vec2 screen_uv = gl_FragCoord.xy * scene_data_block.data.screen_pixel_size; float sss_strength = 0.0; @@ -1976,7 +2152,7 @@ void main() { // fog must be processed as early as possible and then packed. // to maximize VGPR usage - if (scene_data.fog_enabled) { + if (scene_data_block.data.fog_enabled) { fog = fog_process(vertex); } #endif // !DISABLE_FOG @@ -2005,7 +2181,7 @@ void main() { vec3 F = f0 + (max(vec3(1.0 - roughness), f0) - f0) * pow(1.0 - ndotv, 5.0); #ifdef USE_RADIANCE_MAP - if (scene_data.use_reflection_cubemap) { + if (scene_data_block.data.use_reflection_cubemap) { #ifdef LIGHT_ANISOTROPY_USED // https://google.github.io/filament/Filament.html#lighting/imagebasedlights/anisotropy vec3 anisotropic_direction = anisotropy >= 0.0 ? binormal : tangent; @@ -2018,11 +2194,11 @@ void main() { #endif ref_vec = mix(ref_vec, normal, roughness * roughness); float horizon = min(1.0 + dot(ref_vec, normal), 1.0); - ref_vec = scene_data.radiance_inverse_xform * ref_vec; + ref_vec = scene_data_block.data.radiance_inverse_xform * ref_vec; specular_light = textureLod(radiance_map, ref_vec, sqrt(roughness) * RADIANCE_MAX_LOD).rgb; specular_light = srgb_to_linear(specular_light); specular_light *= horizon * horizon; - specular_light *= scene_data.ambient_light_color_energy.a; + specular_light *= scene_data_block.data.ambient_light_color_energy.a; } #endif // USE_RADIANCE_MAP @@ -2058,21 +2234,21 @@ void main() { #if !defined(USE_LIGHTMAP) && !defined(USE_LIGHTMAP_CAPTURE) //lightmap overrides everything - if (scene_data.use_ambient_light) { - ambient_light = scene_data.ambient_light_color_energy.rgb; + if (scene_data_block.data.use_ambient_light) { + ambient_light = scene_data_block.data.ambient_light_color_energy.rgb; #ifdef USE_RADIANCE_MAP - if (scene_data.use_ambient_cubemap) { - vec3 ambient_dir = scene_data.radiance_inverse_xform * normal; + if (scene_data_block.data.use_ambient_cubemap) { + vec3 ambient_dir = scene_data_block.data.radiance_inverse_xform * normal; vec3 cubemap_ambient = textureLod(radiance_map, ambient_dir, RADIANCE_MAX_LOD).rgb; cubemap_ambient = srgb_to_linear(cubemap_ambient); - ambient_light = mix(ambient_light, cubemap_ambient * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix); + ambient_light = mix(ambient_light, cubemap_ambient * scene_data_block.data.ambient_light_color_energy.a, scene_data_block.data.ambient_color_sky_mix); } #endif // USE_RADIANCE_MAP #ifndef DISABLE_REFLECTION_PROBE if (ambient_accum.a > 0.0) { - ambient_light = mix(ambient_light, (ambient_accum.rgb / ambient_accum.a) * scene_data.ambient_light_color_energy.a, scene_data.ambient_color_sky_mix); + ambient_light = mix(ambient_light, (ambient_accum.rgb / ambient_accum.a) * scene_data_block.data.ambient_light_color_energy.a, scene_data_block.data.ambient_color_sky_mix); } #endif // DISABLE_REFLECTION_PROBE } @@ -2086,7 +2262,7 @@ void main() { #ifdef USE_LIGHTMAP_CAPTURE { // The world normal. - vec3 wnormal = mat3(scene_data.inv_view_matrix) * normal; + vec3 wnormal = mat3(scene_data_block.data.inv_view_matrix) * normal; // The SH coefficients used for evaluating diffuse data from SH probes. const float c0 = 0.886227; // l0 sqrt(1.0/(4.0*PI)) * PI @@ -2104,7 +2280,7 @@ void main() { c3 * lightmap_captures[6].rgb * (3.0 * wnormal.z * wnormal.z - 1.0) + c2 * lightmap_captures[7].rgb * wnormal.x * wnormal.z + c4 * lightmap_captures[8].rgb * (wnormal.x * wnormal.x - wnormal.y * wnormal.y)) * - scene_data.IBL_exposure_normalization; + scene_data_block.data.IBL_exposure_normalization; } #else #ifdef USE_LIGHTMAP @@ -2192,7 +2368,7 @@ void main() { #else #ifndef DISABLE_LIGHT_DIRECTIONAL - for (uint i = uint(0); i < scene_data.directional_light_count; i++) { + for (uint i = uint(0); i < scene_data_block.data.directional_light_count; i++) { #if defined(USE_LIGHTMAP) && !defined(DISABLE_LIGHTMAP) if (directional_lights[i].bake_mode == LIGHT_BAKE_STATIC) { continue; @@ -2299,7 +2475,7 @@ void main() { #ifdef MODE_RENDER_DEPTH #ifdef RENDER_SHADOWS_LINEAR // Linearize the depth buffer if rendering cubemap shadows. - gl_FragDepth = (scene_data.z_far - (length(vertex) + scene_data.shadow_bias)) / scene_data.z_far; + gl_FragDepth = (scene_data_block.data.z_far - (length(vertex) + scene_data_block.data.shadow_bias)) / scene_data_block.data.z_far; #endif // Nothing happens, so a tree-ssa optimizer will result in no fragment shader :) @@ -2618,7 +2794,7 @@ void main() { frag_color.rgb += additive_light_color; #endif // USE_ADDITIVE_LIGHTING - frag_color.rgb *= scene_data.luminance_multiplier; + frag_color.rgb *= scene_data_block.data.luminance_multiplier; #endif // !RENDER_MATERIAL #endif // !MODE_RENDER_DEPTH @@ -2626,4 +2802,14 @@ void main() { #ifdef PREMUL_ALPHA_USED frag_color.rgb *= premul_alpha; #endif // PREMUL_ALPHA_USED +#endif // !RENDER_MOTION_VECTORS + +#if defined(RENDER_MOTION_VECTORS) + // These motion vectors are in NDC space (as opposed to screen space) to fit the OpenXR XR_FB_space_warp specification. + // https://registry.khronos.org/OpenXR/specs/1.0/html/xrspec.html#XR_FB_space_warp + + vec3 ndc = clip_position.xyz / clip_position.w; + vec3 prev_ndc = prev_clip_position.xyz / prev_clip_position.w; + motion_vectors = vec4(ndc - prev_ndc, 0.0); +#endif // RENDER_MOTION_VECTORS } diff --git a/drivers/gles3/storage/config.cpp b/drivers/gles3/storage/config.cpp index e6eae95538d..a9d8e4e2883 100644 --- a/drivers/gles3/storage/config.cpp +++ b/drivers/gles3/storage/config.cpp @@ -112,6 +112,7 @@ Config::Config() { glGetIntegerv(GL_MAX_TEXTURE_IMAGE_UNITS, &max_texture_image_units); glGetIntegerv(GL_MAX_TEXTURE_SIZE, &max_texture_size); glGetIntegerv(GL_MAX_VIEWPORT_DIMS, max_viewport_size); + glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attribs); glGetInteger64v(GL_MAX_UNIFORM_BLOCK_SIZE, &max_uniform_buffer_size); GLint max_vertex_output; glGetIntegerv(GL_MAX_VERTEX_OUTPUT_COMPONENTS, &max_vertex_output); diff --git a/drivers/gles3/storage/config.h b/drivers/gles3/storage/config.h index 81142594359..4ddfba3c0e0 100644 --- a/drivers/gles3/storage/config.h +++ b/drivers/gles3/storage/config.h @@ -60,6 +60,7 @@ public: GLint max_texture_image_units = 0; GLint max_texture_size = 0; GLint max_viewport_size[2] = { 0, 0 }; + GLint max_vertex_attribs = 0; GLint64 max_uniform_buffer_size = 0; uint32_t max_shader_varyings = 0; @@ -113,7 +114,13 @@ public: PFNGLFRAMEBUFFERTEXTURE2DMULTISAMPLEEXTPROC eglFramebufferTexture2DMultisampleEXT = nullptr; PFNGLFRAMEBUFFERTEXTUREMULTISAMPLEMULTIVIEWOVRPROC eglFramebufferTextureMultisampleMultiviewOVR = nullptr; PFNEGLIMAGETARGETTEXTURE2DOESPROC eglEGLImageTargetTexture2DOES = nullptr; -#endif + +#define glFramebufferTextureMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultiviewOVR +#define glTexStorage3DMultisample GLES3::Config::get_singleton()->eglTexStorage3DMultisample +#define glFramebufferTexture2DMultisampleEXT GLES3::Config::get_singleton()->eglFramebufferTexture2DMultisampleEXT +#define glFramebufferTextureMultisampleMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultisampleMultiviewOVR +#define glEGLImageTargetTexture2DOES GLES3::Config::get_singleton()->eglEGLImageTargetTexture2DOES +#endif // ANDROID_ENABLED static Config *get_singleton() { return singleton; } diff --git a/drivers/gles3/storage/material_storage.cpp b/drivers/gles3/storage/material_storage.cpp index 17eca5a85b5..87244fcc20c 100644 --- a/drivers/gles3/storage/material_storage.cpp +++ b/drivers/gles3/storage/material_storage.cpp @@ -1244,13 +1244,13 @@ MaterialStorage::MaterialStorage() { actions.renames["MODEL_MATRIX"] = "model_matrix"; actions.renames["MODEL_NORMAL_MATRIX"] = "model_normal_matrix"; - actions.renames["VIEW_MATRIX"] = "scene_data.view_matrix"; - actions.renames["INV_VIEW_MATRIX"] = "scene_data.inv_view_matrix"; + actions.renames["VIEW_MATRIX"] = "scene_data_block.data.view_matrix"; + actions.renames["INV_VIEW_MATRIX"] = "scene_data_block.data.inv_view_matrix"; actions.renames["PROJECTION_MATRIX"] = "projection_matrix"; actions.renames["INV_PROJECTION_MATRIX"] = "inv_projection_matrix"; actions.renames["MODELVIEW_MATRIX"] = "modelview"; actions.renames["MODELVIEW_NORMAL_MATRIX"] = "modelview_normal"; - actions.renames["MAIN_CAM_INV_VIEW_MATRIX"] = "scene_data.main_cam_inv_view_matrix"; + actions.renames["MAIN_CAM_INV_VIEW_MATRIX"] = "scene_data_block.data.main_cam_inv_view_matrix"; actions.renames["VERTEX"] = "vertex"; actions.renames["NORMAL"] = "normal"; @@ -1272,15 +1272,15 @@ MaterialStorage::MaterialStorage() { //builtins - actions.renames["TIME"] = "scene_data.time"; - actions.renames["EXPOSURE"] = "(1.0 / scene_data.emissive_exposure_normalization)"; + actions.renames["TIME"] = "scene_data_block.data.time"; + actions.renames["EXPOSURE"] = "(1.0 / scene_data_block.data.emissive_exposure_normalization)"; actions.renames["PI"] = String::num(Math::PI); actions.renames["TAU"] = String::num(Math::TAU); actions.renames["E"] = String::num(Math::E); actions.renames["OUTPUT_IS_SRGB"] = "SHADER_IS_SRGB"; actions.renames["CLIP_SPACE_FAR"] = "SHADER_SPACE_FAR"; actions.renames["IN_SHADOW_PASS"] = "IN_SHADOW_PASS"; - actions.renames["VIEWPORT_SIZE"] = "scene_data.viewport_size"; + actions.renames["VIEWPORT_SIZE"] = "scene_data_block.data.viewport_size"; actions.renames["FRAGCOORD"] = "gl_FragCoord"; actions.renames["FRONT_FACING"] = "gl_FrontFacing"; @@ -1323,10 +1323,10 @@ MaterialStorage::MaterialStorage() { actions.renames["LIGHT_VERTEX"] = "light_vertex"; actions.renames["NODE_POSITION_WORLD"] = "model_matrix[3].xyz"; - actions.renames["CAMERA_POSITION_WORLD"] = "scene_data.inv_view_matrix[3].xyz"; - actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data.inv_view_matrix[2].xyz"; - actions.renames["CAMERA_VISIBLE_LAYERS"] = "scene_data.camera_visible_layers"; - actions.renames["NODE_POSITION_VIEW"] = "(scene_data.view_matrix * model_matrix)[3].xyz"; + actions.renames["CAMERA_POSITION_WORLD"] = "scene_data_block.data.inv_view_matrix[3].xyz"; + actions.renames["CAMERA_DIRECTION_WORLD"] = "scene_data_block.data.inv_view_matrix[2].xyz"; + actions.renames["CAMERA_VISIBLE_LAYERS"] = "scene_data_block.data.camera_visible_layers"; + actions.renames["NODE_POSITION_VIEW"] = "(scene_data_block.data.view_matrix * model_matrix)[3].xyz"; actions.renames["VIEW_INDEX"] = "ViewIndex"; actions.renames["VIEW_MONO_LEFT"] = "uint(0)"; diff --git a/drivers/gles3/storage/mesh_storage.cpp b/drivers/gles3/storage/mesh_storage.cpp index 2d9b37192e0..8b901f74cc3 100644 --- a/drivers/gles3/storage/mesh_storage.cpp +++ b/drivers/gles3/storage/mesh_storage.cpp @@ -872,7 +872,7 @@ void MeshStorage::mesh_clear(RID p_mesh) { } } -void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, MeshInstance::Surface *mis) { +void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, bool p_uses_motion_vectors, MeshInstance::Surface *mis, int p_current_vertex_buffer, int p_prev_vertex_buffer) { Mesh::Surface::Attrib attribs[RS::ARRAY_MAX]; int position_stride = 0; // Vertex position only. @@ -1024,7 +1024,7 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V if (i <= RS::ARRAY_TANGENT) { attribs[i].stride = (i == RS::ARRAY_VERTEX) ? position_stride : normal_tangent_stride; if (mis) { - glBindBuffer(GL_ARRAY_BUFFER, mis->vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, mis->vertex_buffers[p_current_vertex_buffer]); } else { glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); } @@ -1044,12 +1044,28 @@ void MeshStorage::_mesh_surface_generate_version_for_input_mask(Mesh::Surface::V glEnableVertexAttribArray(i); } + if (p_uses_motion_vectors) { + for (int i = 0; i < RS::ARRAY_TANGENT; i++) { + if (mis) { + glBindBuffer(GL_ARRAY_BUFFER, mis->vertex_buffers[mis->prev_vertex_buffer]); + } else { + glBindBuffer(GL_ARRAY_BUFFER, s->vertex_buffer); + } + + glVertexAttribPointer(i + 16, attribs[i].size, attribs[i].type, attribs[i].normalized, attribs[i].stride, CAST_INT_TO_UCHAR_PTR(attribs[i].offset)); + glEnableVertexAttribArray(i + 16); + } + } + // Do not bind index here as we want to switch between index buffers for LOD glBindVertexArray(0); glBindBuffer(GL_ARRAY_BUFFER, 0); v.input_mask = p_input_mask; + v.uses_motion_vectors = p_uses_motion_vectors; + v.current_vertex_buffer = p_current_vertex_buffer; + v.prev_vertex_buffer = p_prev_vertex_buffer; } void MeshStorage::mesh_surface_remove(RID p_mesh, int p_surface) { @@ -1189,16 +1205,17 @@ void MeshStorage::_mesh_instance_add_surface(MeshInstance *mi, Mesh *mesh, uint3 int buffer_size = s.vertex_stride_cache * mesh->surfaces[p_surface]->vertex_count; - // Buffer to be used for rendering. Final output of skeleton and blend shapes. - glGenBuffers(1, &s.vertex_buffer); - glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffer, buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance vertex buffer"); + // First buffer to be used for rendering. Final output of skeleton and blend shapes. + // If motion vectors are enabled, a second buffer will be created on demand, and they'll be swapped every frame. + glGenBuffers(1, &s.vertex_buffers[0]); + glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffers[0]); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffers[0], buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance vertex buffer"); if (mesh->blend_shape_count > 0) { // Ping-Pong buffers for processing blendshapes. - glGenBuffers(2, s.vertex_buffers); + glGenBuffers(2, s.blend_shape_vertex_buffers); for (uint32_t i = 0; i < 2; i++) { - glBindBuffer(GL_ARRAY_BUFFER, s.vertex_buffers[i]); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.vertex_buffers[i], buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance process buffer[" + itos(i) + "]"); + glBindBuffer(GL_ARRAY_BUFFER, s.blend_shape_vertex_buffers[i]); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, s.blend_shape_vertex_buffers[i], buffer_size, nullptr, GL_DYNAMIC_DRAW, "MeshInstance process buffer[" + itos(i) + "]"); } } glBindBuffer(GL_ARRAY_BUFFER, 0); //unbind @@ -1226,9 +1243,11 @@ void MeshStorage::_mesh_instance_remove_surface(MeshInstance *mi, int p_surface) surface.vertex_buffers[1] = 0; } - if (surface.vertex_buffer != 0) { - GLES3::Utilities::get_singleton()->buffer_free_data(surface.vertex_buffer); - surface.vertex_buffer = 0; + for (int i = 0; i < 2; i++) { + if (surface.vertex_buffers[i] != 0) { + GLES3::Utilities::get_singleton()->buffer_free_data(surface.vertex_buffers[i]); + surface.vertex_buffers[i] = 0; + } } mi->surfaces.remove_at(p_surface); @@ -1268,7 +1287,7 @@ void MeshStorage::mesh_instance_set_canvas_item_transform(RID p_mesh_instance, c } void MeshStorage::_blend_shape_bind_mesh_instance_buffer(MeshInstance *p_mi, uint32_t p_surface) { - glBindBuffer(GL_ARRAY_BUFFER, p_mi->surfaces[p_surface].vertex_buffers[0]); + glBindBuffer(GL_ARRAY_BUFFER, p_mi->surfaces[p_surface].blend_shape_vertex_buffers[0]); if ((p_mi->surfaces[p_surface].format_cache & (1ULL << RS::ARRAY_VERTEX))) { glEnableVertexAttribArray(RS::ARRAY_VERTEX); @@ -1310,7 +1329,7 @@ void MeshStorage::_compute_skeleton(MeshInstance *p_mi, Skeleton *p_sk, uint32_t glVertexAttribPointer(RS::ARRAY_WEIGHTS, 4, GL_UNSIGNED_SHORT, GL_TRUE, skin_stride, CAST_INT_TO_UCHAR_PTR(4 * sizeof(uint16_t))); } - glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, p_mi->surfaces[p_surface].vertex_buffer); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, p_mi->surfaces[p_surface].vertex_buffers[p_mi->surfaces[p_surface].current_vertex_buffer]); glActiveTexture(GL_TEXTURE0); glBindTexture(GL_TEXTURE_2D, p_sk->transforms_texture); @@ -1337,6 +1356,36 @@ void MeshStorage::update_mesh_instances() { while (dirty_mesh_instance_arrays.first()) { MeshInstance *mi = dirty_mesh_instance_arrays.first()->self(); + bool uses_motion_vectors = RSG::viewport->get_num_viewports_with_motion_vectors() > 0; + int frame = RSG::rasterizer->get_frame_number(); + if (uses_motion_vectors) { + for (uint32_t i = 0; i < mi->surfaces.size(); i++) { + mi->surfaces[i].prev_vertex_buffer = mi->surfaces[i].current_vertex_buffer; + + if (frame - mi->surfaces[i].last_change == 1) { + // Previous buffer's data can only be one frame old to be able to use motion vectors. + uint32_t new_buffer_index = mi->surfaces[i].current_vertex_buffer ^ 1; + + if (mi->surfaces[i].vertex_buffers[new_buffer_index] == 0) { + // Create the new vertex buffer on demand where the result for the current frame will be stored. + GLuint new_vertex_buffer = 0; + GLES3::Mesh::Surface *surface = mi->mesh->surfaces[i]; + int buffer_size = mi->surfaces[i].vertex_stride_cache * surface->vertex_count; + glGenBuffers(1, &new_vertex_buffer); + glBindBuffer(GL_ARRAY_BUFFER, new_vertex_buffer); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, new_vertex_buffer, buffer_size, nullptr, (surface->format & RS::ARRAY_FLAG_USE_DYNAMIC_UPDATE) ? GL_DYNAMIC_DRAW : GL_STATIC_DRAW, "Secondary mesh vertex buffer"); + glBindBuffer(GL_ARRAY_BUFFER, 0); + + mi->surfaces[i].vertex_buffers[new_buffer_index] = new_vertex_buffer; + } + + mi->surfaces[i].current_vertex_buffer = new_buffer_index; + } + + mi->surfaces[i].last_change = frame; + } + } + Skeleton *sk = skeleton_owner.get_or_null(mi->skeleton); // Precompute base weight if using blend shapes. @@ -1348,7 +1397,7 @@ void MeshStorage::update_mesh_instances() { } for (uint32_t i = 0; i < mi->surfaces.size(); i++) { - if (mi->surfaces[i].vertex_buffer == 0) { + if (mi->surfaces[i].vertex_buffers[mi->surfaces[i].current_vertex_buffer] == 0) { continue; } @@ -1383,9 +1432,9 @@ void MeshStorage::update_mesh_instances() { GLuint vertex_array_gl = 0; uint64_t mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_FORMAT_NORMAL | RS::ARRAY_FORMAT_VERTEX; uint64_t format = mi->mesh->surfaces[i]->format & mask; // Format should only have vertex, normal, tangent (as necessary). - mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, vertex_array_gl); + mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, false, vertex_array_gl); glBindVertexArray(vertex_array_gl); - glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[0]); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].blend_shape_vertex_buffers[0]); glBeginTransformFeedback(GL_POINTS); glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); glEndTransformFeedback(); @@ -1407,15 +1456,17 @@ void MeshStorage::update_mesh_instances() { skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_WEIGHT, weight, skeleton_shader.shader_version, variant, specialization); skeleton_shader.shader.version_set_uniform(SkeletonShaderGLES3::BLEND_SHAPE_COUNT, float(mi->mesh->blend_shape_count), skeleton_shader.shader_version, variant, specialization); + // Ensure the skeleton shader outputs to the correct (current) VBO. + glBindVertexArray(mi->mesh->surfaces[i]->blend_shapes[bs].vertex_array); _blend_shape_bind_mesh_instance_buffer(mi, i); - glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[1]); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].blend_shape_vertex_buffers[1]); glBeginTransformFeedback(GL_POINTS); glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); glEndTransformFeedback(); - SWAP(mi->surfaces[i].vertex_buffers[0], mi->surfaces[i].vertex_buffers[1]); + SWAP(mi->surfaces[i].blend_shape_vertex_buffers[0], mi->surfaces[i].blend_shape_vertex_buffers[1]); } uint32_t bs = mi->mesh->blend_shape_count - 1; @@ -1451,7 +1502,7 @@ void MeshStorage::update_mesh_instances() { can_use_skeleton = false; } else { // Do last blendshape by itself and prepare vertex data for use by the renderer. - glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffer); + glBindBufferBase(GL_TRANSFORM_FEEDBACK_BUFFER, 0, mi->surfaces[i].vertex_buffers[mi->surfaces[i].current_vertex_buffer]); glBeginTransformFeedback(GL_POINTS); glDrawArrays(GL_POINTS, 0, mi->mesh->surfaces[i]->vertex_count); @@ -1497,7 +1548,7 @@ void MeshStorage::update_mesh_instances() { GLuint vertex_array_gl = 0; uint64_t mask = RS::ARRAY_FORMAT_VERTEX | RS::ARRAY_FORMAT_NORMAL | RS::ARRAY_FORMAT_VERTEX; uint64_t format = mi->mesh->surfaces[i]->format & mask; // Format should only have vertex, normal, tangent (as necessary). - mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, vertex_array_gl); + mesh_surface_get_vertex_arrays_and_format(mi->mesh->surfaces[i], format, false, vertex_array_gl); glBindVertexArray(vertex_array_gl); _compute_skeleton(mi, sk, i); } @@ -1541,9 +1592,11 @@ void MeshStorage::_multimesh_allocate_data(RID p_multimesh, int p_instances, RS: return; } - if (multimesh->buffer) { - GLES3::Utilities::get_singleton()->buffer_free_data(multimesh->buffer); - multimesh->buffer = 0; + for (int i = 0; i < 2; i++) { + if (multimesh->buffer[i] != 0) { + GLES3::Utilities::get_singleton()->buffer_free_data(multimesh->buffer[i]); + multimesh->buffer[i] = 0; + } } if (multimesh->data_cache_dirty_regions) { @@ -1571,9 +1624,9 @@ void MeshStorage::_multimesh_allocate_data(RID p_multimesh, int p_instances, RS: multimesh->visible_instances = MIN(multimesh->visible_instances, multimesh->instances); if (multimesh->instances) { - glGenBuffers(1, &multimesh->buffer); - glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); - GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float), nullptr, GL_STATIC_DRAW, "MultiMesh buffer"); + glGenBuffers(1, &multimesh->buffer[0]); + glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer[0]); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, multimesh->buffer[0], multimesh->instances * multimesh->stride_cache * sizeof(float), nullptr, GL_STATIC_DRAW, "MultiMesh buffer"); glBindBuffer(GL_ARRAY_BUFFER, 0); } @@ -1604,7 +1657,7 @@ void MeshStorage::_multimesh_set_mesh(RID p_multimesh, RID p_mesh) { } else if (multimesh->instances) { // Need to re-create AABB. Unfortunately, calling this has a penalty. if (multimesh->buffer_set) { - Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float)); + Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer[multimesh->current_buffer], multimesh->instances * multimesh->stride_cache * sizeof(float)); const uint8_t *r = buffer.ptr(); const float *data = (const float *)r; _multimesh_re_create_aabb(multimesh, data, multimesh->instances); @@ -1628,7 +1681,7 @@ void MeshStorage::_multimesh_make_local(MultiMesh *multimesh) const { float *w = multimesh->data_cache.ptrw(); if (multimesh->buffer_set) { - Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float)); + Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer[multimesh->current_buffer], multimesh->instances * multimesh->stride_cache * sizeof(float)); { const uint8_t *r = buffer.ptr(); @@ -1971,6 +2024,10 @@ void MeshStorage::_multimesh_set_buffer(RID p_multimesh, const Vector &p_ MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL(multimesh); + // Assign data to previous buffer if motion vectors are used, that data will be made current in _update_dirty_multimeshes(). + bool uses_motion_vectors = RSG::viewport->get_num_viewports_with_motion_vectors() > 0; + int buffer_index = uses_motion_vectors ? multimesh->prev_buffer : multimesh->current_buffer; + if (multimesh->uses_colors || multimesh->uses_custom_data) { // Color and custom need to be packed so copy buffer to data_cache and pack. @@ -2016,7 +2073,7 @@ void MeshStorage::_multimesh_set_buffer(RID p_multimesh, const Vector &p_ multimesh->data_cache.resize(multimesh->instances * (int)multimesh->stride_cache); const float *r = multimesh->data_cache.ptr(); - glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); + glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer[buffer_index]); glBufferData(GL_ARRAY_BUFFER, multimesh->data_cache.size() * sizeof(float), r, GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); @@ -2029,7 +2086,7 @@ void MeshStorage::_multimesh_set_buffer(RID p_multimesh, const Vector &p_ // Only Transform is being used, so we can upload directly. ERR_FAIL_COND(p_buffer.size() != (multimesh->instances * (int)multimesh->stride_cache)); const float *r = p_buffer.ptr(); - glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); + glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer[buffer_index]); glBufferData(GL_ARRAY_BUFFER, p_buffer.size() * sizeof(float), r, GL_STATIC_DRAW); glBindBuffer(GL_ARRAY_BUFFER, 0); } @@ -2068,14 +2125,14 @@ Vector MeshStorage::_multimesh_get_buffer(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, Vector()); Vector ret; - if (multimesh->buffer == 0 || multimesh->instances == 0) { + if (multimesh->buffer[multimesh->current_buffer] == 0 || multimesh->instances == 0) { return Vector(); } else if (multimesh->data_cache.size()) { ret = multimesh->data_cache; } else { // Buffer not cached, so fetch from GPU memory. This can be a stalling operation, avoid whenever possible. - Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer, multimesh->instances * multimesh->stride_cache * sizeof(float)); + Vector buffer = Utilities::buffer_get_data(GL_ARRAY_BUFFER, multimesh->buffer[multimesh->current_buffer], multimesh->instances * multimesh->stride_cache * sizeof(float)); ret.resize(multimesh->instances * multimesh->stride_cache); { float *w = ret.ptrw(); @@ -2175,53 +2232,27 @@ void MeshStorage::_update_dirty_multimeshes() { while (multimesh_dirty_list) { MultiMesh *multimesh = multimesh_dirty_list; - if (multimesh->data_cache.size()) { //may have been cleared, so only process if it exists - const float *data = multimesh->data_cache.ptr(); + bool uses_motion_vectors = RSG::viewport->get_num_viewports_with_motion_vectors() > 0; + if (uses_motion_vectors) { + multimesh->prev_buffer = multimesh->current_buffer; + uint32_t new_buffer_index = multimesh->current_buffer ^ 1; - uint32_t visible_instances = multimesh->visible_instances >= 0 ? multimesh->visible_instances : multimesh->instances; - - if (multimesh->data_cache_used_dirty_regions) { - uint32_t data_cache_dirty_region_count = Math::division_round_up(multimesh->instances, (int)MULTIMESH_DIRTY_REGION_SIZE); - uint32_t visible_region_count = visible_instances == 0 ? 0 : Math::division_round_up(visible_instances, (uint32_t)MULTIMESH_DIRTY_REGION_SIZE); - - GLint region_size = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float); - - if (multimesh->data_cache_used_dirty_regions > 32 || multimesh->data_cache_used_dirty_regions > visible_region_count / 2) { - // If there too many dirty regions, or represent the majority of regions, just copy all, else transfer cost piles up too much - glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); - glBufferSubData(GL_ARRAY_BUFFER, 0, MIN(visible_region_count * region_size, multimesh->instances * multimesh->stride_cache * sizeof(float)), data); - glBindBuffer(GL_ARRAY_BUFFER, 0); - } else { - // Not that many regions? update them all - // TODO: profile the performance cost on low end - glBindBuffer(GL_ARRAY_BUFFER, multimesh->buffer); - for (uint32_t i = 0; i < visible_region_count; i++) { - if (multimesh->data_cache_dirty_regions[i]) { - GLint offset = i * region_size; - GLint size = multimesh->stride_cache * (uint32_t)multimesh->instances * (uint32_t)sizeof(float); - uint32_t region_start_index = multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * i; - glBufferSubData(GL_ARRAY_BUFFER, offset, MIN(region_size, size - offset), &data[region_start_index]); - } - } - glBindBuffer(GL_ARRAY_BUFFER, 0); - } - - for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { - multimesh->data_cache_dirty_regions[i] = false; - } - - multimesh->data_cache_used_dirty_regions = 0; + // Generate secondary buffer if it doesn't exist. + if (multimesh->buffer[new_buffer_index] == 0 && multimesh->instances) { + GLuint new_buffer = 0; + glGenBuffers(1, &new_buffer); + glBindBuffer(GL_ARRAY_BUFFER, new_buffer); + GLES3::Utilities::get_singleton()->buffer_allocate_data(GL_ARRAY_BUFFER, new_buffer, multimesh->instances * multimesh->stride_cache * sizeof(float), nullptr, GL_STATIC_DRAW, "MultiMesh secondary buffer"); + glBindBuffer(GL_ARRAY_BUFFER, 0); + multimesh->buffer[new_buffer_index] = new_buffer; } - if (multimesh->aabb_dirty && multimesh->mesh.is_valid()) { - multimesh->aabb_dirty = false; - if (multimesh->custom_aabb == AABB()) { - _multimesh_re_create_aabb(multimesh, data, visible_instances); - multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); - } - } + multimesh->current_buffer = new_buffer_index; + multimesh->last_change = RSG::rasterizer->get_frame_number(); } + _update_dirty_multimesh(multimesh, uses_motion_vectors); + multimesh_dirty_list = multimesh->dirty_list; multimesh->dirty_list = nullptr; @@ -2231,6 +2262,86 @@ void MeshStorage::_update_dirty_multimeshes() { multimesh_dirty_list = nullptr; } +void MeshStorage::_update_dirty_multimesh(MultiMesh *p_multimesh, bool p_uses_motion_vectors) { + if (p_multimesh->data_cache.size()) { // May have been cleared, so only process if it exists. + const float *data = p_multimesh->data_cache.ptr(); + + uint32_t visible_instances = p_multimesh->visible_instances >= 0 ? p_multimesh->visible_instances : p_multimesh->instances; + + if (p_multimesh->data_cache_used_dirty_regions) { + uint32_t data_cache_dirty_region_count = Math::division_round_up(p_multimesh->instances, (int)MULTIMESH_DIRTY_REGION_SIZE); + uint32_t visible_region_count = visible_instances == 0 ? 0 : Math::division_round_up(visible_instances, (uint32_t)MULTIMESH_DIRTY_REGION_SIZE); + + GLint region_size = p_multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * sizeof(float); + + if (p_multimesh->data_cache_used_dirty_regions > 32 || p_multimesh->data_cache_used_dirty_regions > visible_region_count / 2 || p_uses_motion_vectors) { + // If there are too many dirty regions, the dirty regions represent the majority of visible regions, or motion vectors are used: + // Just copy all, else transfer cost piles up too much. + glBindBuffer(GL_ARRAY_BUFFER, p_multimesh->buffer[p_multimesh->current_buffer]); + glBufferSubData(GL_ARRAY_BUFFER, 0, MIN(visible_region_count * region_size, p_multimesh->instances * p_multimesh->stride_cache * sizeof(float)), data); + glBindBuffer(GL_ARRAY_BUFFER, 0); + } else { + // Not that many regions? Update them all. + // TODO: profile the performance cost on low end + glBindBuffer(GL_ARRAY_BUFFER, p_multimesh->buffer[p_multimesh->current_buffer]); + for (uint32_t i = 0; i < visible_region_count; i++) { + if (p_multimesh->data_cache_dirty_regions[i]) { + GLint offset = i * region_size; + GLint size = p_multimesh->stride_cache * (uint32_t)p_multimesh->instances * (uint32_t)sizeof(float); + uint32_t region_start_index = p_multimesh->stride_cache * MULTIMESH_DIRTY_REGION_SIZE * i; + glBufferSubData(GL_ARRAY_BUFFER, offset, MIN(region_size, size - offset), &data[region_start_index]); + } + } + glBindBuffer(GL_ARRAY_BUFFER, 0); + } + + for (uint32_t i = 0; i < data_cache_dirty_region_count; i++) { + p_multimesh->data_cache_dirty_regions[i] = false; + } + + p_multimesh->data_cache_used_dirty_regions = 0; + } + + if (p_multimesh->aabb_dirty && p_multimesh->mesh.is_valid()) { + p_multimesh->aabb_dirty = false; + if (p_multimesh->custom_aabb == AABB()) { + _multimesh_re_create_aabb(p_multimesh, data, visible_instances); + p_multimesh->dependency.changed_notify(Dependency::DEPENDENCY_CHANGED_AABB); + } + } + } +} + +void GLES3::MeshStorage::multimesh_vertex_attrib_setup(GLuint p_instance_buffer, uint32_t p_stride, bool p_uses_format_2d, bool p_has_color_or_custom_data, int p_attrib_base_index) { + glBindBuffer(GL_ARRAY_BUFFER, p_instance_buffer); + + glEnableVertexAttribArray(p_attrib_base_index + 0); + glVertexAttribPointer(p_attrib_base_index + 0, 4, GL_FLOAT, GL_FALSE, p_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(0)); + glVertexAttribDivisor(p_attrib_base_index + 0, 1); + glEnableVertexAttribArray(p_attrib_base_index + 1); + glVertexAttribPointer(p_attrib_base_index + 1, 4, GL_FLOAT, GL_FALSE, p_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 4)); + glVertexAttribDivisor(p_attrib_base_index + 1, 1); + if (!p_uses_format_2d) { + glEnableVertexAttribArray(p_attrib_base_index + 2); + glVertexAttribPointer(p_attrib_base_index + 2, 4, GL_FLOAT, GL_FALSE, p_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(sizeof(float) * 8)); + glVertexAttribDivisor(p_attrib_base_index + 2, 1); + } + + if (p_has_color_or_custom_data) { + uint32_t color_custom_offset = p_uses_format_2d ? 8 : 12; + glEnableVertexAttribArray(p_attrib_base_index + 3); + glVertexAttribIPointer(p_attrib_base_index + 3, 4, GL_UNSIGNED_INT, p_stride * sizeof(float), CAST_INT_TO_UCHAR_PTR(color_custom_offset * sizeof(float))); + glVertexAttribDivisor(p_attrib_base_index + 3, 1); + } else { + // Set all default instance color and custom data values to 1.0 or 0.0 using a compressed format. + uint16_t zero = Math::make_half_float(0.0f); + uint16_t one = Math::make_half_float(1.0f); + GLuint default_color = (uint32_t(one) << 16) | one; + GLuint default_custom = (uint32_t(zero) << 16) | zero; + glVertexAttribI4ui(p_attrib_base_index + 3, default_color, default_color, default_custom, default_custom); + } +} + /* SKELETON API */ RID MeshStorage::skeleton_allocate() { diff --git a/drivers/gles3/storage/mesh_storage.h b/drivers/gles3/storage/mesh_storage.h index 06684e22e60..52074c94cc8 100644 --- a/drivers/gles3/storage/mesh_storage.h +++ b/drivers/gles3/storage/mesh_storage.h @@ -36,6 +36,7 @@ #include "core/templates/rid_owner.h" #include "core/templates/self_list.h" #include "drivers/gles3/shaders/skeleton.glsl.gen.h" +#include "servers/rendering/rendering_server_globals.h" #include "servers/rendering/storage/mesh_storage.h" #include "servers/rendering/storage/utilities.h" @@ -70,6 +71,9 @@ struct Mesh { // Cache vertex arrays so they can be created struct Version { uint32_t input_mask = 0; + bool uses_motion_vectors = false; + uint32_t current_vertex_buffer = 0; + uint32_t prev_vertex_buffer = 0; GLuint vertex_array = 0; Attrib attribs[RS::ARRAY_MAX]; @@ -152,9 +156,9 @@ struct MeshInstance { Mesh *mesh = nullptr; RID skeleton; struct Surface { - GLuint vertex_buffers[2] = { 0, 0 }; + GLuint blend_shape_vertex_buffers[2] = { 0, 0 }; GLuint vertex_arrays[2] = { 0, 0 }; - GLuint vertex_buffer = 0; + GLuint vertex_buffers[2] = { 0, 0 }; int vertex_stride_cache = 0; int vertex_size_cache = 0; int vertex_normal_offset_cache = 0; @@ -163,6 +167,11 @@ struct MeshInstance { Mesh::Surface::Version *versions = nullptr; //allocated on demand uint32_t version_count = 0; + + bool uses_motion_vectors = false; + int current_vertex_buffer = 0; + int prev_vertex_buffer = 0; + uint64_t last_change = 0; }; LocalVector surfaces; LocalVector blend_weights; @@ -199,7 +208,10 @@ struct MultiMesh { bool *data_cache_dirty_regions = nullptr; uint32_t data_cache_used_dirty_regions = 0; - GLuint buffer = 0; + GLuint buffer[2] = { 0, 0 }; + int current_buffer = 0; + int prev_buffer = 0; + uint64_t last_change = 0; bool dirty = false; MultiMesh *dirty_list = nullptr; @@ -239,7 +251,7 @@ private: mutable RID_Owner mesh_owner; - void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, MeshInstance::Surface *mis = nullptr); + void _mesh_surface_generate_version_for_input_mask(Mesh::Surface::Version &v, Mesh::Surface *s, uint64_t p_input_mask, bool p_uses_motion_vectors, MeshInstance::Surface *mis = nullptr, int p_current_vertex_buffer = 0, int p_prev_vertex_buffer = 0); void _mesh_surface_clear(Mesh *mesh, int p_surface); /* Mesh Instance API */ @@ -418,7 +430,7 @@ public: } // Use this to cache Vertex Array Objects so they are only generated once - _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint64_t p_input_mask, GLuint &r_vertex_array_gl) { + _FORCE_INLINE_ void mesh_surface_get_vertex_arrays_and_format(void *p_surface, uint64_t p_input_mask, bool p_uses_motion_vectors, GLuint &r_vertex_array_gl) { Mesh::Surface *s = reinterpret_cast(p_surface); s->version_lock.lock(); @@ -426,7 +438,7 @@ public: // There will never be more than 3 or 4 versions, so iterating is the fastest way. for (uint32_t i = 0; i < s->version_count; i++) { - if (s->versions[i].input_mask != p_input_mask) { + if (s->versions[i].input_mask != p_input_mask || s->versions[i].uses_motion_vectors != p_uses_motion_vectors) { continue; } // We have this version, hooray. @@ -439,7 +451,7 @@ public: s->version_count++; s->versions = (Mesh::Surface::Version *)memrealloc(s->versions, sizeof(Mesh::Surface::Version) * s->version_count); - _mesh_surface_generate_version_for_input_mask(s->versions[version], s, p_input_mask); + _mesh_surface_generate_version_for_input_mask(s->versions[version], s, p_input_mask, p_uses_motion_vectors); r_vertex_array_gl = s->versions[version].vertex_array; @@ -461,7 +473,7 @@ public: // TODO: considering hashing versions with multimesh buffer RID. // Doing so would allow us to avoid specifying multimesh buffer pointers every frame and may improve performance. - _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint64_t p_input_mask, GLuint &r_vertex_array_gl) { + _FORCE_INLINE_ void mesh_instance_surface_get_vertex_arrays_and_format(RID p_mesh_instance, uint32_t p_surface_index, uint64_t p_input_mask, bool p_uses_motion_vectors, GLuint &r_vertex_array_gl) { MeshInstance *mi = mesh_instance_owner.get_or_null(p_mesh_instance); ERR_FAIL_NULL(mi); Mesh *mesh = mi->mesh; @@ -470,14 +482,24 @@ public: MeshInstance::Surface *mis = &mi->surfaces[p_surface_index]; Mesh::Surface *s = mesh->surfaces[p_surface_index]; + uint32_t current_buffer = mis->current_vertex_buffer; + + // Using the previous buffer is only allowed if the surface was updated this frame and motion vectors are required. + uint32_t previous_buffer = p_uses_motion_vectors && (RSG::rasterizer->get_frame_number() == mis->last_change) ? mis->prev_vertex_buffer : current_buffer; + s->version_lock.lock(); //there will never be more than, at much, 3 or 4 versions, so iterating is the fastest way for (uint32_t i = 0; i < mis->version_count; i++) { - if (mis->versions[i].input_mask != p_input_mask) { + if (mis->versions[i].input_mask != p_input_mask || mis->versions[i].uses_motion_vectors != p_uses_motion_vectors) { continue; } + + if (mis->versions[i].current_vertex_buffer != current_buffer || mis->versions[i].prev_vertex_buffer != previous_buffer) { + continue; + } + //we have this version, hooray r_vertex_array_gl = mis->versions[i].vertex_array; s->version_lock.unlock(); @@ -488,7 +510,7 @@ public: mis->version_count++; mis->versions = (Mesh::Surface::Version *)memrealloc(mis->versions, sizeof(Mesh::Surface::Version) * mis->version_count); - _mesh_surface_generate_version_for_input_mask(mis->versions[version], s, p_input_mask, mis); + _mesh_surface_generate_version_for_input_mask(mis->versions[version], s, p_input_mask, p_uses_motion_vectors, mis, current_buffer, previous_buffer); r_vertex_array_gl = mis->versions[version].vertex_array; @@ -532,6 +554,9 @@ public: virtual MultiMeshInterpolator *_multimesh_get_interpolator(RID p_multimesh) const override; void _update_dirty_multimeshes(); + void _update_dirty_multimesh(MultiMesh *p_multimesh, bool p_uses_motion_vectors); + + void multimesh_vertex_attrib_setup(GLuint p_instance_buffer, uint32_t p_stride, bool p_uses_format_2d, bool p_has_color_or_custom_data, int p_attrib_base_index); _FORCE_INLINE_ RS::MultimeshTransformFormat multimesh_get_transform_format(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); @@ -563,7 +588,17 @@ public: _FORCE_INLINE_ GLuint multimesh_get_gl_buffer(RID p_multimesh) const { MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); ERR_FAIL_NULL_V(multimesh, 0); - return multimesh->buffer; + return multimesh->buffer[multimesh->current_buffer]; + } + + _FORCE_INLINE_ GLuint multimesh_get_prev_gl_buffer(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); + return multimesh->buffer[multimesh->prev_buffer]; + } + + _FORCE_INLINE_ uint64_t multimesh_get_last_change(RID p_multimesh) const { + MultiMesh *multimesh = multimesh_owner.get_or_null(p_multimesh); + return multimesh->last_change; } _FORCE_INLINE_ uint32_t multimesh_get_stride(RID p_multimesh) const { diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index bcdd0bee9b5..4f3b13aad7f 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -1169,6 +1169,7 @@ void ParticlesStorage::update_particles() { } SWAP(particles->front_instance_buffer, particles->back_instance_buffer); + particles->last_change = RSG::rasterizer->get_frame_number(); // At the end of update, the back_buffer contains the most up-to-date-information to read from. diff --git a/drivers/gles3/storage/particles_storage.h b/drivers/gles3/storage/particles_storage.h index 662592cea4b..60449faa678 100644 --- a/drivers/gles3/storage/particles_storage.h +++ b/drivers/gles3/storage/particles_storage.h @@ -195,6 +195,8 @@ private: GLuint back_process_buffer = 0; // Transform + color + custom data + userdata + velocity + flags. Only needed for processing. GLuint back_instance_buffer = 0; // Transform + color + custom data. In packed format needed for rendering. + uint64_t last_change = 0; + uint32_t instance_buffer_size_cache = 0; uint32_t instance_buffer_stride_cache = 0; uint32_t num_attrib_arrays_cache = 0; @@ -397,6 +399,20 @@ public: return particles->back_instance_buffer; } + _FORCE_INLINE_ GLuint particles_get_prev_gl_buffer(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL_V(particles, 0); + + return particles->front_instance_buffer; + } + + _FORCE_INLINE_ uint64_t particles_get_last_change(RID p_particles) { + Particles *particles = particles_owner.get_or_null(p_particles); + ERR_FAIL_NULL_V(particles, 0); + + return particles->last_change; + } + _FORCE_INLINE_ bool particles_has_collision(RID p_particles) { Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL_V(particles, false); diff --git a/drivers/gles3/storage/render_scene_buffers_gles3.cpp b/drivers/gles3/storage/render_scene_buffers_gles3.cpp index 1ad5dc6bcba..017beefb488 100644 --- a/drivers/gles3/storage/render_scene_buffers_gles3.cpp +++ b/drivers/gles3/storage/render_scene_buffers_gles3.cpp @@ -35,13 +35,6 @@ #include "texture_storage.h" #include "utilities.h" -#ifdef ANDROID_ENABLED -#define glFramebufferTextureMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultiviewOVR -#define glTexStorage3DMultisample GLES3::Config::get_singleton()->eglTexStorage3DMultisample -#define glFramebufferTexture2DMultisampleEXT GLES3::Config::get_singleton()->eglFramebufferTexture2DMultisampleEXT -#define glFramebufferTextureMultisampleMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultisampleMultiviewOVR -#endif // ANDROID_ENABLED - // Will only be defined if GLES 3.2 headers are included #ifndef GL_TEXTURE_2D_MULTISAMPLE_ARRAY #define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 diff --git a/drivers/gles3/storage/texture_storage.cpp b/drivers/gles3/storage/texture_storage.cpp index 2d730a84464..216e60bd3d2 100644 --- a/drivers/gles3/storage/texture_storage.cpp +++ b/drivers/gles3/storage/texture_storage.cpp @@ -37,10 +37,6 @@ #include "config.h" #include "utilities.h" -#ifdef ANDROID_ENABLED -#define glFramebufferTextureMultiviewOVR GLES3::Config::get_singleton()->eglFramebufferTextureMultiviewOVR -#endif - using namespace GLES3; TextureStorage *TextureStorage::singleton = nullptr; @@ -2265,7 +2261,7 @@ AABB TextureStorage::decal_get_aabb(RID p_decal) const { GLuint TextureStorage::system_fbo = 0; -void TextureStorage::_update_render_target(RenderTarget *rt) { +void TextureStorage::_update_render_target_color(RenderTarget *rt) { // do not allocate a render target with no size if (rt->size.x <= 0 || rt->size.y <= 0) { return; @@ -2437,6 +2433,60 @@ void TextureStorage::_update_render_target(RenderTarget *rt) { glBindFramebuffer(GL_FRAMEBUFFER, system_fbo); } +void TextureStorage::_update_render_target_velocity(RenderTarget *rt) { + GLuint new_velocity_fbo; + glGenFramebuffers(1, &new_velocity_fbo); + glBindFramebuffer(GL_FRAMEBUFFER, new_velocity_fbo); + + uint32_t view_count = rt->view_count; + GLuint texture_target = view_count > 1 ? GL_TEXTURE_2D_ARRAY : GL_TEXTURE_2D; + + GLuint velocity_texture_id = texture_get_texid(rt->overridden.velocity); + glBindTexture(texture_target, velocity_texture_id); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + +#ifndef IOS_ENABLED + if (view_count > 1) { + glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, velocity_texture_id, 0, 0, view_count); + } else { +#else + { +#endif + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, velocity_texture_id, 0); + } + + GLuint velocity_depth_texture_id = texture_get_texid(rt->overridden.velocity_depth); + glBindTexture(texture_target, velocity_depth_texture_id); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + glTexParameteri(texture_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(texture_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + +#ifndef IOS_ENABLED + if (view_count > 1) { + glFramebufferTextureMultiviewOVR(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, velocity_depth_texture_id, 0, 0, view_count); + } else { +#else + { +#endif + glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D, velocity_depth_texture_id, 0); + } + + GLenum status = glCheckFramebufferStatus(GL_FRAMEBUFFER); + if (status != GL_FRAMEBUFFER_COMPLETE) { + glDeleteFramebuffers(1, &new_velocity_fbo); + WARN_PRINT(vformat("Could not create motion vector render target, status: %s.", GLES3::TextureStorage::get_singleton()->get_framebuffer_error(status))); + } else { + rt->overridden.velocity_fbo = new_velocity_fbo; + } + + glBindTexture(texture_target, 0); + glBindFramebuffer(GL_FRAMEBUFFER, 0); +} + void TextureStorage::_create_render_target_backbuffer(RenderTarget *rt) { ERR_FAIL_COND_MSG(rt->backbuffer_fbo != 0, "Cannot allocate RenderTarget backbuffer: already initialized."); ERR_FAIL_COND(rt->direct_to_screen); @@ -2578,6 +2628,12 @@ void TextureStorage::_clear_render_target(RenderTarget *rt) { return; } + for (KeyValue &E : rt->overridden.velocity_fbo_cache) { + glDeleteFramebuffers(1, &E.value); + } + rt->overridden.velocity_fbo_cache.clear(); + rt->overridden.velocity_fbo = 0; + // Dispose of the cached fbo's and the allocated textures for (KeyValue &E : rt->overridden.fbo_cache) { glDeleteTextures(E.value.allocated_textures.size(), E.value.allocated_textures.ptr()); @@ -2630,7 +2686,6 @@ void TextureStorage::_clear_render_target(RenderTarget *rt) { } rt->depth = 0; - rt->overridden.velocity = RID(); rt->overridden.is_overridden = false; if (rt->backbuffer_fbo != 0) { @@ -2659,7 +2714,7 @@ RID TextureStorage::render_target_create() { t.is_render_target = true; render_target.texture = texture_owner.make_rid(t); - _update_render_target(&render_target); + _update_render_target_color(&render_target); return render_target_owner.make_rid(render_target); } @@ -2708,7 +2763,7 @@ void TextureStorage::render_target_set_size(RID p_render_target, int p_width, in rt->size = Size2i(p_width, p_height); rt->view_count = p_view_count; - _update_render_target(rt); + _update_render_target_color(rt); } // TODO: convert to Size2i internally @@ -2727,9 +2782,10 @@ void TextureStorage::render_target_set_override(RID p_render_target, RID p_color // Remember what our current color output is. RID was_color_texture = render_target_get_texture(p_render_target); - rt->overridden.velocity = p_velocity_texture; + bool create_new_color_fbo = true; + bool create_new_velocity_fbo = true; - if (rt->overridden.color == p_color_texture && rt->overridden.depth == p_depth_texture) { + if (rt->overridden.color == p_color_texture && rt->overridden.depth == p_depth_texture && rt->overridden.velocity == p_velocity_texture && rt->overridden.velocity_depth == p_velocity_depth_texture) { return; } @@ -2740,8 +2796,8 @@ void TextureStorage::render_target_set_override(RID p_render_target, RID p_color } _clear_render_target(rt); - _update_render_target(rt); - return; + _update_render_target_color(rt); + create_new_color_fbo = false; } if (!rt->overridden.is_overridden) { @@ -2751,6 +2807,8 @@ void TextureStorage::render_target_set_override(RID p_render_target, RID p_color rt->overridden.color = p_color_texture; rt->overridden.depth = p_depth_texture; rt->overridden.depth_has_stencil = p_depth_texture.is_null(); + rt->overridden.velocity = p_velocity_texture; + rt->overridden.velocity_depth = p_velocity_depth_texture; rt->overridden.is_overridden = true; // Update to our new color output. @@ -2771,25 +2829,51 @@ void TextureStorage::render_target_set_override(RID p_render_target, RID p_color rt->depth_has_stencil = cache->get().depth_has_stencil; rt->size = cache->get().size; rt->texture = p_color_texture; - return; + create_new_color_fbo = false; } - _update_render_target(rt); + uint32_t velocity_hash_key = hash_murmur3_one_64(p_velocity_texture.get_id()); + velocity_hash_key = hash_murmur3_one_64(p_velocity_depth_texture.get_id(), velocity_hash_key); + velocity_hash_key = hash_fmix32(velocity_hash_key); - RenderTarget::RTOverridden::FBOCacheEntry new_entry; - new_entry.fbo = rt->fbo; - new_entry.color = rt->color; - new_entry.depth = rt->depth; - new_entry.depth_has_stencil = rt->depth_has_stencil; - new_entry.size = rt->size; - // Keep track of any textures we had to allocate because they weren't overridden. - if (p_color_texture.is_null()) { - new_entry.allocated_textures.push_back(rt->color); + RBMap::Element *fbo = rt->overridden.velocity_fbo_cache.find(velocity_hash_key); + if (fbo != nullptr) { + rt->overridden.velocity_fbo = fbo->get(); + create_new_velocity_fbo = false; } - if (p_depth_texture.is_null()) { - new_entry.allocated_textures.push_back(rt->depth); + + if (p_velocity_texture.is_null()) { + for (KeyValue &E : rt->overridden.velocity_fbo_cache) { + glDeleteFramebuffers(1, &E.value); + } + + rt->overridden.velocity_fbo_cache.clear(); + rt->overridden.velocity_fbo = 0; + create_new_velocity_fbo = false; + } + + if (create_new_color_fbo) { + _update_render_target_color(rt); + + RenderTarget::RTOverridden::FBOCacheEntry new_entry; + new_entry.fbo = rt->fbo; + new_entry.color = rt->color; + new_entry.depth = rt->depth; + new_entry.size = rt->size; + // Keep track of any textures we had to allocate because they weren't overridden. + if (p_color_texture.is_null()) { + new_entry.allocated_textures.push_back(rt->color); + } + if (p_depth_texture.is_null()) { + new_entry.allocated_textures.push_back(rt->depth); + } + rt->overridden.fbo_cache.insert(hash_key, new_entry); + } + + if (create_new_velocity_fbo) { + _update_render_target_velocity(rt); + rt->overridden.velocity_fbo_cache.insert(velocity_hash_key, rt->overridden.velocity_fbo); } - rt->overridden.fbo_cache.insert(hash_key, new_entry); } RID TextureStorage::render_target_get_override_color(RID p_render_target) const { @@ -2813,6 +2897,13 @@ RID TextureStorage::render_target_get_override_velocity(RID p_render_target) con return rt->overridden.velocity; } +RID TextureStorage::render_target_get_override_velocity_depth(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, RID()); + + return rt->overridden.velocity_depth; +} + void TextureStorage::render_target_set_render_region(RID p_render_target, const Rect2i &p_render_region) { RenderTarget *rt = render_target_owner.get_or_null(p_render_target); ERR_FAIL_NULL(rt); @@ -2838,6 +2929,20 @@ RID TextureStorage::render_target_get_texture(RID p_render_target) { return rt->texture; } +void TextureStorage::render_target_set_velocity_target_size(RID p_render_target, const Size2i &p_target_size) { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL(rt); + + rt->velocity_target_size = p_target_size; +} + +Size2i TextureStorage::render_target_get_velocity_target_size(RID p_render_target) const { + RenderTarget *rt = render_target_owner.get_or_null(p_render_target); + ERR_FAIL_NULL_V(rt, Size2i(0, 0)); + + return rt->velocity_target_size; +} + void TextureStorage::render_target_set_transparent(RID p_render_target, bool p_transparent) { RenderTarget *rt = render_target_owner.get_or_null(p_render_target); ERR_FAIL_NULL(rt); @@ -2846,7 +2951,7 @@ void TextureStorage::render_target_set_transparent(RID p_render_target, bool p_t if (rt->overridden.color.is_null()) { _clear_render_target(rt); - _update_render_target(rt); + _update_render_target_color(rt); } } @@ -2872,8 +2977,9 @@ void TextureStorage::render_target_set_direct_to_screen(RID p_render_target, boo rt->overridden.color = RID(); rt->overridden.depth = RID(); rt->overridden.velocity = RID(); + rt->overridden.velocity_depth = RID(); } - _update_render_target(rt); + _update_render_target_color(rt); } bool TextureStorage::render_target_get_direct_to_screen(RID p_render_target) const { @@ -2909,7 +3015,7 @@ void TextureStorage::render_target_set_msaa(RID p_render_target, RS::ViewportMSA _clear_render_target(rt); rt->msaa = p_msaa; - _update_render_target(rt); + _update_render_target_color(rt); } RS::ViewportMSAA TextureStorage::render_target_get_msaa(RID p_render_target) const { @@ -2929,7 +3035,7 @@ void TextureStorage::render_target_set_use_hdr(RID p_render_target, bool p_use_h _clear_render_target(rt); rt->hdr = p_use_hdr_2d; - _update_render_target(rt); + _update_render_target_color(rt); } bool TextureStorage::render_target_is_using_hdr(RID p_render_target) const { diff --git a/drivers/gles3/storage/texture_storage.h b/drivers/gles3/storage/texture_storage.h index 9c1aa1f63c8..009ea6d313f 100644 --- a/drivers/gles3/storage/texture_storage.h +++ b/drivers/gles3/storage/texture_storage.h @@ -359,6 +359,8 @@ struct RenderTarget { GLuint backbuffer_depth = 0; bool depth_has_stencil = true; + Size2i velocity_target_size; + bool hdr = false; // For Compatibility this effects both 2D and 3D rendering! GLuint color_internal_format = GL_RGBA8; GLuint color_format = GL_RGBA; @@ -390,6 +392,7 @@ struct RenderTarget { RID color; RID depth; RID velocity; + RID velocity_depth; struct FBOCacheEntry { GLuint fbo; @@ -400,6 +403,9 @@ struct RenderTarget { bool depth_has_stencil; }; RBMap fbo_cache; + + GLuint velocity_fbo = 0; + RBMap velocity_fbo_cache; } overridden; RID texture; @@ -464,7 +470,8 @@ private: mutable RID_Owner render_target_owner; void _clear_render_target(RenderTarget *rt); - void _update_render_target(RenderTarget *rt); + void _update_render_target_color(RenderTarget *rt); + void _update_render_target_velocity(RenderTarget *rt); void _create_render_target_backbuffer(RenderTarget *rt); void _render_target_allocate_sdf(RenderTarget *rt); void _render_target_clear_sdf(RenderTarget *rt); @@ -707,15 +714,15 @@ public: virtual RID render_target_get_override_color(RID p_render_target) const override; virtual RID render_target_get_override_depth(RID p_render_target) const override; virtual RID render_target_get_override_velocity(RID p_render_target) const override; - virtual RID render_target_get_override_velocity_depth(RID p_render_target) const override { return RID(); } + virtual RID render_target_get_override_velocity_depth(RID p_render_target) const override; virtual void render_target_set_render_region(RID p_render_target, const Rect2i &p_render_region) override; virtual Rect2i render_target_get_render_region(RID p_render_target) const override; virtual RID render_target_get_texture(RID p_render_target) override; - virtual void render_target_set_velocity_target_size(RID p_render_target, const Size2i &p_target_size) override {} - virtual Size2i render_target_get_velocity_target_size(RID p_render_target) const override { return Size2i(); } + virtual void render_target_set_velocity_target_size(RID p_render_target, const Size2i &p_target_size) override; + virtual Size2i render_target_get_velocity_target_size(RID p_render_target) const override; void bind_framebuffer(GLuint framebuffer) { glBindFramebuffer(GL_FRAMEBUFFER, framebuffer);