mirror of
https://github.com/godotengine/godot.git
synced 2025-10-19 16:03:29 +00:00
Merge pull request #111652 from DarioSamo/opaque-list-key
Organize render surface sorting key for optimizing API performance.
This commit is contained in:
commit
710a6e0303
5 changed files with 44 additions and 12 deletions
|
@ -844,7 +844,7 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
|
||||||
|
|
||||||
RenderElementInfo &element_info = rl->element_info[p_offset + i];
|
RenderElementInfo &element_info = rl->element_info[p_offset + i];
|
||||||
|
|
||||||
element_info.value = uint32_t((surface->sort.sort_key2 & 0x0FFF00000000) >> 32u);
|
element_info.value = uint32_t(surface->sort.sort_key1 & 0xFFF);
|
||||||
|
|
||||||
if (cant_repeat) {
|
if (cant_repeat) {
|
||||||
prev_surface = nullptr;
|
prev_surface = nullptr;
|
||||||
|
@ -4075,7 +4075,8 @@ void RenderForwardClustered::_geometry_instance_add_surface_with_material(Geomet
|
||||||
sdcache->sort.sort_key2 = 0;
|
sdcache->sort.sort_key2 = 0;
|
||||||
|
|
||||||
sdcache->sort.surface_index = p_surface;
|
sdcache->sort.surface_index = p_surface;
|
||||||
sdcache->sort.material_id = p_material_id;
|
sdcache->sort.material_id_hi = (p_material_id & 0xFF000000) >> 24;
|
||||||
|
sdcache->sort.material_id_lo = (p_material_id & 0x00FFFFFF);
|
||||||
sdcache->sort.shader_id = p_shader_id;
|
sdcache->sort.shader_id = p_shader_id;
|
||||||
sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway
|
sdcache->sort.geometry_id = p_mesh.get_local_index(); //only meshes can repeat anyway
|
||||||
sdcache->sort.uses_forward_gi = ginstance->can_sdfgi;
|
sdcache->sort.uses_forward_gi = ginstance->can_sdfgi;
|
||||||
|
|
|
@ -498,17 +498,23 @@ private:
|
||||||
uint64_t sort_key2;
|
uint64_t sort_key2;
|
||||||
};
|
};
|
||||||
struct {
|
struct {
|
||||||
uint64_t geometry_id : 32;
|
// Needs to be grouped together to be used in RenderElementInfo, as the value is masked directly.
|
||||||
uint64_t material_id : 32;
|
|
||||||
|
|
||||||
uint64_t shader_id : 32;
|
|
||||||
uint64_t lod_index : 8;
|
uint64_t lod_index : 8;
|
||||||
uint64_t uses_softshadow : 1;
|
uint64_t uses_softshadow : 1;
|
||||||
uint64_t uses_projector : 1;
|
uint64_t uses_projector : 1;
|
||||||
uint64_t uses_forward_gi : 1;
|
uint64_t uses_forward_gi : 1;
|
||||||
uint64_t uses_lightmap : 1;
|
uint64_t uses_lightmap : 1;
|
||||||
|
|
||||||
|
// Sorted based on optimal order for respecting priority and reducing the amount of rebinding of shaders, materials,
|
||||||
|
// and geometry. This current order was found to be the most optimal in large projects. If you wish to measure
|
||||||
|
// differences, refer to RenderingDeviceGraph and the methods available to print statistics for draw lists.
|
||||||
uint64_t depth_layer : 4;
|
uint64_t depth_layer : 4;
|
||||||
uint64_t surface_index : 8;
|
uint64_t surface_index : 8;
|
||||||
|
uint64_t geometry_id : 32;
|
||||||
|
uint64_t material_id_hi : 8;
|
||||||
|
|
||||||
|
uint64_t material_id_lo : 24;
|
||||||
|
uint64_t shader_id : 32;
|
||||||
uint64_t priority : 8;
|
uint64_t priority : 8;
|
||||||
};
|
};
|
||||||
} sort;
|
} sort;
|
||||||
|
|
|
@ -1981,7 +1981,7 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
|
||||||
RenderElementInfo &element_info = rl->element_info[p_offset + i];
|
RenderElementInfo &element_info = rl->element_info[p_offset + i];
|
||||||
|
|
||||||
// Sets lod_index and uses_lightmap at once.
|
// Sets lod_index and uses_lightmap at once.
|
||||||
element_info.value = uint32_t((surface->sort.sort_key2 & 0x01FF00000000) >> 32u);
|
element_info.value = uint32_t(surface->sort.sort_key1 & 0x1FF);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (p_update_buffer) {
|
if (p_update_buffer) {
|
||||||
|
@ -2764,7 +2764,8 @@ void RenderForwardMobile::_geometry_instance_add_surface_with_material(GeometryI
|
||||||
sdcache->sort.sort_key2 = 0;
|
sdcache->sort.sort_key2 = 0;
|
||||||
|
|
||||||
sdcache->sort.surface_index = p_surface;
|
sdcache->sort.surface_index = p_surface;
|
||||||
sdcache->sort.material_id = p_material_id;
|
sdcache->sort.material_id_hi = (p_material_id & 0xFF000000) >> 24;
|
||||||
|
sdcache->sort.material_id_lo = (p_material_id & 0x00FFFFFF);
|
||||||
sdcache->sort.shader_id = p_shader_id;
|
sdcache->sort.shader_id = p_shader_id;
|
||||||
sdcache->sort.geometry_id = p_mesh.get_local_index();
|
sdcache->sort.geometry_id = p_mesh.get_local_index();
|
||||||
sdcache->sort.priority = p_material->priority;
|
sdcache->sort.priority = p_material->priority;
|
||||||
|
|
|
@ -478,15 +478,21 @@ protected:
|
||||||
uint64_t sort_key2;
|
uint64_t sort_key2;
|
||||||
};
|
};
|
||||||
struct {
|
struct {
|
||||||
uint64_t geometry_id : 32;
|
// Needs to be grouped together to be used in RenderElementInfo, as the value is masked directly.
|
||||||
uint64_t material_id : 32;
|
|
||||||
|
|
||||||
uint64_t shader_id : 32;
|
|
||||||
uint64_t lod_index : 8;
|
uint64_t lod_index : 8;
|
||||||
uint64_t uses_lightmap : 1;
|
uint64_t uses_lightmap : 1;
|
||||||
uint64_t pad : 3;
|
uint64_t pad : 3;
|
||||||
|
|
||||||
|
// Sorted based on optimal order for respecting priority and reducing the amount of rebinding of shaders, materials,
|
||||||
|
// and geometry. This current order was found to be the most optimal in large projects. If you wish to measure
|
||||||
|
// differences, refer to RenderingDeviceGraph and the methods available to print statistics for draw lists.
|
||||||
uint64_t depth_layer : 4;
|
uint64_t depth_layer : 4;
|
||||||
uint64_t surface_index : 8;
|
uint64_t surface_index : 8;
|
||||||
|
uint64_t geometry_id : 32;
|
||||||
|
uint64_t material_id_hi : 8;
|
||||||
|
|
||||||
|
uint64_t material_id_lo : 24;
|
||||||
|
uint64_t shader_id : 32;
|
||||||
uint64_t priority : 8;
|
uint64_t priority : 8;
|
||||||
};
|
};
|
||||||
} sort;
|
} sort;
|
||||||
|
|
|
@ -35,6 +35,9 @@
|
||||||
#define PRINT_RESOURCE_TRACKER_TOTAL 0
|
#define PRINT_RESOURCE_TRACKER_TOTAL 0
|
||||||
#define PRINT_COMMAND_RECORDING 0
|
#define PRINT_COMMAND_RECORDING 0
|
||||||
|
|
||||||
|
// Prints the total number of bytes used for draw lists in a frame.
|
||||||
|
#define PRINT_DRAW_LIST_STATS 0
|
||||||
|
|
||||||
RenderingDeviceGraph::RenderingDeviceGraph() {
|
RenderingDeviceGraph::RenderingDeviceGraph() {
|
||||||
driver_honors_barriers = false;
|
driver_honors_barriers = false;
|
||||||
driver_clears_with_copy_engine = false;
|
driver_clears_with_copy_engine = false;
|
||||||
|
@ -835,7 +838,15 @@ void RenderingDeviceGraph::_get_draw_list_render_pass_and_framebuffer(const Reco
|
||||||
r_framebuffer = it->value.framebuffer;
|
r_framebuffer = it->value.framebuffer;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PRINT_DRAW_LIST_STATS
|
||||||
|
static uint32_t draw_list_total_size = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) {
|
void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command_buffer, const uint8_t *p_instruction_data, uint32_t p_instruction_data_size) {
|
||||||
|
#if PRINT_DRAW_LIST_STATS
|
||||||
|
draw_list_total_size += p_instruction_data_size;
|
||||||
|
#endif
|
||||||
|
|
||||||
uint32_t instruction_data_cursor = 0;
|
uint32_t instruction_data_cursor = 0;
|
||||||
while (instruction_data_cursor < p_instruction_data_size) {
|
while (instruction_data_cursor < p_instruction_data_size) {
|
||||||
DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size);
|
DEV_ASSERT((instruction_data_cursor + sizeof(DrawListInstruction)) <= p_instruction_data_size);
|
||||||
|
@ -2366,6 +2377,10 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD
|
||||||
workarounds_state.draw_list_found = false;
|
workarounds_state.draw_list_found = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if PRINT_DRAW_LIST_STATS
|
||||||
|
draw_list_total_size = 0;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (p_reorder_commands) {
|
if (p_reorder_commands) {
|
||||||
#if PRINT_RENDER_GRAPH
|
#if PRINT_RENDER_GRAPH
|
||||||
print_line("BEFORE SORT");
|
print_line("BEFORE SORT");
|
||||||
|
@ -2416,6 +2431,9 @@ void RenderingDeviceGraph::end(bool p_reorder_commands, bool p_full_barriers, RD
|
||||||
|
|
||||||
_run_label_command_change(r_command_buffer, -1, -1, false, false, nullptr, 0, current_label_index, current_label_level);
|
_run_label_command_change(r_command_buffer, -1, -1, false, false, nullptr, 0, current_label_index, current_label_level);
|
||||||
|
|
||||||
|
#if PRINT_DRAW_LIST_STATS
|
||||||
|
print_line(vformat("Draw list %d bytes", draw_list_total_size));
|
||||||
|
#endif
|
||||||
#if PRINT_COMMAND_RECORDING
|
#if PRINT_COMMAND_RECORDING
|
||||||
print_line(vformat("Recorded %d commands", command_count));
|
print_line(vformat("Recorded %d commands", command_count));
|
||||||
#endif
|
#endif
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue