2019-06-22 19:34:26 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*  rendering_device.h                                                    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                         This file is part of:                          */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                             GODOT ENGINE                               */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                        https://godotengine.org                         */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
									
										
										
										
											2020-02-11 14:01:43 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */  
						 
					
						
							
								
									
										
										
										
											2019-06-22 19:34:26 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Permission is hereby granted, free of charge, to any person obtaining  */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* a copy of this software and associated documentation files (the        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* "Software"), to deal in the Software without restriction, including    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* without limitation the rights to use, copy, modify, merge, publish,    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* distribute, sublicense, and/or sell copies of the Software, and to     */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* permit persons to whom the Software is furnished to do so, subject to  */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* the following conditions:                                              */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* The above copyright notice and this permission notice shall be         */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* included in all copies or substantial portions of the Software.        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# ifndef RENDERING_DEVICE_H 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# define RENDERING_DEVICE_H 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-11-07 19:33:38 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "core/object/class_db.h" 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "core/object/worker_thread_pool.h" 
  
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "core/os/condition_variable.h" 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "core/os/thread_safe.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "core/templates/local_vector.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "core/templates/oa_hash_map.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "core/templates/rid_owner.h" 
  
						 
					
						
							
								
									
										
										
										
											2020-11-07 19:33:38 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "core/variant/typed_array.h" 
  
						 
					
						
							
								
									
										
										
										
											2020-03-03 22:51:12 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "servers/display_server.h" 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "servers/rendering/rendering_device_commons.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "servers/rendering/rendering_device_driver.h" 
  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "servers/rendering/rendering_device_graph.h" 
  
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDTextureFormat ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  RDTextureView ;  
						 
					
						
							
								
									
										
										
										
											2020-04-21 12:16:45 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDAttachmentFormat ;  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDSamplerState ;  
						 
					
						
							
								
									
										
										
										
											2020-04-21 12:16:45 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDVertexAttribute ;  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDShaderSource ;  
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDShaderSPIRV ;  
						 
					
						
							
								
									
										
										
										
											2022-08-31 19:24:04 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDUniform ;  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDPipelineRasterizationState ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  RDPipelineMultisampleState ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  RDPipelineDepthStencilState ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								class  RDPipelineColorBlendState ;  
						 
					
						
							
								
									
										
										
										
											2021-06-24 10:58:36 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDFramebufferPass ;  
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RDPipelineSpecializationConstant ;  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								class  RenderingDevice  :  public  RenderingDeviceCommons  {  
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									GDCLASS ( RenderingDevice ,  Object ) 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_THREAD_SAFE_CLASS_ 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Thread : : ID  render_thread_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									enum  ShaderLanguage  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SHADER_LANGUAGE_GLSL , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SHADER_LANGUAGE_HLSL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  int64_t  DrawListID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									typedef  int64_t  ComputeListID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-02-11 22:33:54 +11:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  String  ( * ShaderSPIRVGetCacheKeyFunction ) ( const  RenderingDevice  * p_render_device ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									typedef  Vector < uint8_t >  ( * ShaderCompileToSPIRVFunction ) ( ShaderStage  p_stage ,  const  String  & p_source_code ,  ShaderLanguage  p_language ,  String  * r_error ,  const  RenderingDevice  * p_render_device ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-02-17 18:06:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  Vector < uint8_t >  ( * ShaderCacheFunction ) ( ShaderStage  p_stage ,  const  String  & p_source_code ,  ShaderLanguage  p_language ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-05 11:59:58 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  void  ( * InvalidationCallback ) ( void  * ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  ShaderCompileToSPIRVFunction  compile_to_spirv_function ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  ShaderCacheFunction  cache_function ; 
							 
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  ShaderSPIRVGetCacheKeyFunction  get_spirv_cache_key_function ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  RenderingDevice  * singleton ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-19 17:03:19 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderingContextDriver  * context  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderingDeviceDriver  * driver  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderingContextDriver : : Device  device ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-09 23:53:40 -07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  local_device_processing  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  is_main_instance  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								protected :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  void  _bind_methods ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-08-02 14:45:44 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifndef DISABLE_DEPRECATED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  _shader_create_from_bytecode_bind_compat_79606 ( const  Vector < uint8_t >  & p_shader_binary ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  void  _bind_compatibility_methods ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/***************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** ID INFRASTRUCTURE ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/***************************/ 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//base numeric ID for all types
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									enum  { 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										INVALID_FORMAT_ID  =  - 1 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  IDType  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ID_TYPE_FRAMEBUFFER_FORMAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ID_TYPE_VERTEX_FORMAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ID_TYPE_DRAW_LIST , 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ID_TYPE_COMPUTE_LIST  =  4 , 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ID_TYPE_MAX , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ID_BASE_SHIFT  =  58 ,  // 5 bits for ID types.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ID_MASK  =  ( ID_BASE_SHIFT  -  1 ) , 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < RID ,  HashSet < RID > >  dependency_map ;  // IDs to IDs that depend on it.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < RID ,  HashSet < RID > >  reverse_dependency_map ;  // Same as above, but in reverse.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _add_dependency ( RID  p_id ,  RID  p_depends_on ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _free_dependencies ( RID  p_id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/***************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** BUFFER MANAGEMENT ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/***************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// These are temporary buffers on CPU memory that hold
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// the information until the CPU fetches it and places it
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// either on GPU buffers, or images (textures). It ensures
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// updates are properly synchronized with whatever the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// GPU is doing.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// The logic here is as follows, only 3 of these
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// blocks are created at the beginning (one per frame)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// they can each belong to a frame (assigned to current when
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// used) and they can only be reused after the same frame is
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// recycled.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// When CPU requires to allocate more than what is available,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// more of these buffers are created. If a limit is reached,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// then a fence will ensure will wait for blocks allocated
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// in previous frames are processed. If that fails, then
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// another fence will ensure everything pending for the current
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// frame is processed (effectively stalling).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// See the comments in the code to understand better how it works.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  StagingBufferBlock  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : BufferID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  frame_used  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  fill_amount  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Vector < StagingBufferBlock >  staging_buffer_blocks ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									int  staging_buffer_current  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  staging_buffer_block_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  staging_buffer_max_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  staging_buffer_used  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  StagingRequiredAction  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										STAGING_REQUIRED_ACTION_NONE , 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										STAGING_REQUIRED_ACTION_FLUSH_AND_STALL_ALL , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										STAGING_REQUIRED_ACTION_STALL_PREVIOUS 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _staging_buffer_allocate ( uint32_t  p_amount ,  uint32_t  p_required_align ,  uint32_t  & r_alloc_offset ,  uint32_t  & r_alloc_size ,  StagingRequiredAction  & r_required_action ,  bool  p_can_segment  =  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _staging_buffer_execute_required_action ( StagingRequiredAction  p_required_action ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  _insert_staging_block ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  Buffer  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : BufferID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < RDD : : BufferUsageBits >  usage ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDG : : ResourceTracker  * draw_tracker  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										int32_t  transfer_worker_index  =  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  transfer_worker_operation  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Buffer  * _get_buffer_from_owner ( RID  p_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  _buffer_initialize ( Buffer  * p_buffer ,  const  uint8_t  * p_data ,  size_t  p_data_size ,  uint32_t  p_required_align  =  32 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  update_perf_report ( ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// flag for batching descriptor sets
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  descriptor_set_batching  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// When true, the final draw call that copies our offscreen result into the Swapchain is put into its
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// own cmd buffer, so that the whole rendering can start early instead of having to wait for the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// swapchain semaphore to be signaled (which causes bubbles).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  split_swapchain_into_its_own_cmd_buffer  =  true ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  gpu_copy_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  copy_bytes_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  perf_report_text ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < Buffer ,  true >  uniform_buffer_owner ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID_Owner < Buffer ,  true >  storage_buffer_owner ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID_Owner < Buffer ,  true >  texture_buffer_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-08-04 18:40:39 +10:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  buffer_copy ( RID  p_src_buffer ,  RID  p_dst_buffer ,  uint32_t  p_src_offset ,  uint32_t  p_dst_offset ,  uint32_t  p_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  buffer_update ( RID  p_buffer ,  uint32_t  p_offset ,  uint32_t  p_size ,  const  void  * p_data ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  buffer_clear ( RID  p_buffer ,  uint32_t  p_offset ,  uint32_t  p_size ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Vector < uint8_t >  buffer_get_data ( RID  p_buffer ,  uint32_t  p_offset  =  0 ,  uint32_t  p_size  =  0 ) ;  // This causes stall, only use to retrieve large buffers for saving.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** TEXTURE ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// In modern APIs, the concept of textures may not exist;
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// instead there is the image (the memory pretty much,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// the view (how the memory is interpreted) and the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// sampler (how it's sampled from the shader).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Texture here includes the first two stages, but
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// It's possible to create textures sharing the image
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// but with different views. The main use case for this
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// is textures that can be read as both SRGB/Linear,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// or slices of a texture (a mipmap, a layer, a 3D slice)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// for a framebuffer to render into it.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  Texture  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  SharedFallback  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  revision  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : TextureID  texture ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDG : : ResourceTracker  * texture_tracker  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : BufferID  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDG : : ResourceTracker  * buffer_tracker  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  raw_reinterpretation  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDD : : TextureID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureType  type  =  TEXTURE_TYPE_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DataFormat  format  =  DATA_FORMAT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSamples  samples  =  TEXTURE_SAMPLES_MAX ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										TextureSliceType  slice_type  =  TEXTURE_SLICE_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Rect2i  slice_rect ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										uint32_t  width  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  height  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  depth  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  layers  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  mipmaps  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  usage_flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  base_mipmap  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  base_layer  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < DataFormat >  allowed_shared_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  is_resolve_buffer  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										bool  is_discardable  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										bool  has_initial_data  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < RDD : : TextureAspectBits >  read_aspect_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < RDD : : TextureAspectBits >  barrier_aspect_flags ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										bool  bound  =  false ;  // Bound to framebuffer.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RID  owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDG : : ResourceTracker  * draw_tracker  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										HashMap < Rect2i ,  RDG : : ResourceTracker  * >  slice_trackers ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										SharedFallback  * shared_fallback  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										int32_t  transfer_worker_index  =  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  transfer_worker_operation  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : TextureSubresourceRange  barrier_range ( )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : TextureSubresourceRange  r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r . aspect  =  barrier_aspect_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r . base_mipmap  =  base_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r . mipmap_count  =  mipmaps ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r . base_layer  =  base_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r . layer_count  =  layers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureFormat  texture_format ( )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											TextureFormat  tf ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . format  =  format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . width  =  width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . height  =  height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . depth  =  depth ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . array_layers  =  layers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . mipmaps  =  mipmaps ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . texture_type  =  type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . samples  =  samples ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . usage_bits  =  usage_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . shareable_formats  =  allowed_shared_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tf . is_resolve_buffer  =  is_resolve_buffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											tf . is_discardable  =  is_discardable ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  tf ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < Texture ,  true >  texture_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  texture_upload_region_size_px  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  _texture_get_data ( Texture  * tex ,  uint32_t  p_layer ,  bool  p_2d  =  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  _texture_layer_count ( Texture  * p_texture )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  _texture_alignment ( Texture  * p_texture )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _texture_initialize ( RID  p_texture ,  uint32_t  p_layer ,  const  Vector < uint8_t >  & p_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _texture_check_shared_fallback ( Texture  * p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _texture_update_shared_fallback ( RID  p_texture_rid ,  Texture  * p_texture ,  bool  p_for_writing ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _texture_free_shared_fallback ( Texture  * p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _texture_copy_shared ( RID  p_src_texture_rid ,  Texture  * p_src_texture ,  RID  p_dst_texture_rid ,  Texture  * p_dst_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _texture_create_reinterpret_buffer ( Texture  * p_texture ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									struct  TextureView  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										DataFormat  format_override  =  DATA_FORMAT_MAX ;  // // Means, use same as format.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSwizzle  swizzle_r  =  TEXTURE_SWIZZLE_R ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSwizzle  swizzle_g  =  TEXTURE_SWIZZLE_G ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSwizzle  swizzle_b  =  TEXTURE_SWIZZLE_B ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSwizzle  swizzle_a  =  TEXTURE_SWIZZLE_A ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  operator = = ( const  TextureView  & p_other )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( format_override  ! =  p_other . format_override )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-09-22 18:38:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											}  else  if  ( swizzle_r  ! =  p_other . swizzle_r )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-09-22 18:38:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											}  else  if  ( swizzle_g  ! =  p_other . swizzle_g )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-09-22 18:38:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											}  else  if  ( swizzle_b  ! =  p_other . swizzle_b )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-09-22 18:38:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											}  else  if  ( swizzle_a  ! =  p_other . swizzle_a )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-09-22 18:38:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  texture_create ( const  TextureFormat  & p_format ,  const  TextureView  & p_view ,  const  Vector < Vector < uint8_t > >  & p_data  =  Vector < Vector < uint8_t > > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  texture_create_shared ( const  TextureView  & p_view ,  RID  p_with_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  texture_create_from_extension ( TextureType  p_type ,  DataFormat  p_format ,  TextureSamples  p_samples ,  BitField < RenderingDevice : : TextureUsageBits >  p_usage ,  uint64_t  p_image ,  uint64_t  p_width ,  uint64_t  p_height ,  uint64_t  p_depth ,  uint64_t  p_layers ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  texture_create_shared_from_slice ( const  TextureView  & p_view ,  RID  p_with_texture ,  uint32_t  p_layer ,  uint32_t  p_mipmap ,  uint32_t  p_mipmaps  =  1 ,  TextureSliceType  p_slice_type  =  TEXTURE_SLICE_2D ,  uint32_t  p_layers  =  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  texture_update ( RID  p_texture ,  uint32_t  p_layer ,  const  Vector < uint8_t >  & p_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Vector < uint8_t >  texture_get_data ( RID  p_texture ,  uint32_t  p_layer ) ;  // CPU textures will return immediately, while GPU textures will most likely force a flush
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  texture_is_format_supported_for_usage ( DataFormat  p_format ,  BitField < TextureUsageBits >  p_usage )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  texture_is_shared ( RID  p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  texture_is_valid ( RID  p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureFormat  texture_get_format ( RID  p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Size2i  texture_size ( RID  p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifndef DISABLE_DEPRECATED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  texture_get_native_handle ( RID  p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2019-08-26 17:43:58 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  texture_copy ( RID  p_from_texture ,  RID  p_to_texture ,  const  Vector3  & p_from ,  const  Vector3  & p_to ,  const  Vector3  & p_size ,  uint32_t  p_src_mipmap ,  uint32_t  p_dst_mipmap ,  uint32_t  p_src_layer ,  uint32_t  p_dst_layer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  texture_clear ( RID  p_texture ,  const  Color  & p_color ,  uint32_t  p_base_mipmap ,  uint32_t  p_mipmaps ,  uint32_t  p_base_layer ,  uint32_t  p_layers ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  texture_resolve_multisample ( RID  p_from_texture ,  RID  p_to_texture ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-10 17:44:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  texture_set_discardable ( RID  p_texture ,  bool  p_discardable ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  texture_is_discardable ( RID  p_texture ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-10-03 17:39:08 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									/*********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** FRAMEBUFFER ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// In modern APIs, generally, framebuffers work similar to how they
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// do in OpenGL, with the exception that
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// the "format" (RDD::RenderPassID) is not dynamic
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// and must be more or less the same as the one
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// used for the render pipelines.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									struct  AttachmentFormat  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-24 15:07:22 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										enum  :   uint32_t  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											UNUSED_ATTACHMENT  =  0xFFFFFFFF 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										DataFormat  format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TextureSamples  samples ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  usage_flags ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-12 10:12:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										AttachmentFormat ( )  { 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-27 10:23:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											format  =  DATA_FORMAT_R8G8B8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											samples  =  TEXTURE_SAMPLES_1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											usage_flags  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-12 10:12:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-06-24 10:58:36 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  FramebufferPass  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < int32_t >  color_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < int32_t >  input_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < int32_t >  resolve_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < int32_t >  preserve_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										int32_t  depth_attachment  =  ATTACHMENT_UNUSED ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-11 22:33:54 +11:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										int32_t  vrs_attachment  =  ATTACHMENT_UNUSED ;  // density map for VRS, only used if supported
 
							 
						 
					
						
							
								
									
										
										
										
											2021-06-24 10:58:36 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  int64_t  FramebufferFormatID ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  FramebufferFormatKey  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < AttachmentFormat >  attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < FramebufferPass >  passes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  view_count  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  operator < ( const  FramebufferFormatKey  & p_key )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( view_count  ! =  p_key . view_count )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  view_count  <  p_key . view_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  pass_size  =  passes . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  key_pass_size  =  p_key . passes . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( pass_size  ! =  key_pass_size )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  pass_size  <  key_pass_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  FramebufferPass  * pass_ptr  =  passes . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  FramebufferPass  * key_pass_ptr  =  p_key . passes . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  pass_size ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												{  // Compare color attachments.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  attachment_size  =  pass_ptr [ i ] . color_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  key_attachment_size  =  key_pass_ptr [ i ] . color_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( attachment_size  ! =  key_attachment_size )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  attachment_size  <  key_attachment_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * pass_attachment_ptr  =  pass_ptr [ i ] . color_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * key_pass_attachment_ptr  =  key_pass_ptr [ i ] . color_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													for  ( uint32_t  j  =  0 ;  j  <  attachment_size ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														if  ( pass_attachment_ptr [ j ]  ! =  key_pass_attachment_ptr [ j ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															return  pass_attachment_ptr [ j ]  <  key_pass_attachment_ptr [ j ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												{  // Compare input attachments.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  attachment_size  =  pass_ptr [ i ] . input_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  key_attachment_size  =  key_pass_ptr [ i ] . input_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( attachment_size  ! =  key_attachment_size )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  attachment_size  <  key_attachment_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * pass_attachment_ptr  =  pass_ptr [ i ] . input_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * key_pass_attachment_ptr  =  key_pass_ptr [ i ] . input_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													for  ( uint32_t  j  =  0 ;  j  <  attachment_size ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														if  ( pass_attachment_ptr [ j ]  ! =  key_pass_attachment_ptr [ j ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															return  pass_attachment_ptr [ j ]  <  key_pass_attachment_ptr [ j ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												{  // Compare resolve attachments.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  attachment_size  =  pass_ptr [ i ] . resolve_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  key_attachment_size  =  key_pass_ptr [ i ] . resolve_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( attachment_size  ! =  key_attachment_size )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  attachment_size  <  key_attachment_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * pass_attachment_ptr  =  pass_ptr [ i ] . resolve_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * key_pass_attachment_ptr  =  key_pass_ptr [ i ] . resolve_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													for  ( uint32_t  j  =  0 ;  j  <  attachment_size ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														if  ( pass_attachment_ptr [ j ]  ! =  key_pass_attachment_ptr [ j ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															return  pass_attachment_ptr [ j ]  <  key_pass_attachment_ptr [ j ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												{  // Compare preserve attachments.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  attachment_size  =  pass_ptr [ i ] . preserve_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													uint32_t  key_attachment_size  =  key_pass_ptr [ i ] . preserve_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( attachment_size  ! =  key_attachment_size )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  attachment_size  <  key_attachment_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * pass_attachment_ptr  =  pass_ptr [ i ] . preserve_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  int32_t  * key_pass_attachment_ptr  =  key_pass_ptr [ i ] . preserve_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													for  ( uint32_t  j  =  0 ;  j  <  attachment_size ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														if  ( pass_attachment_ptr [ j ]  ! =  key_pass_attachment_ptr [ j ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															return  pass_attachment_ptr [ j ]  <  key_pass_attachment_ptr [ j ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( pass_ptr [ i ] . depth_attachment  ! =  key_pass_ptr [ i ] . depth_attachment )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  pass_ptr [ i ] . depth_attachment  <  key_pass_ptr [ i ] . depth_attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											int  as  =  attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											int  bs  =  p_key . attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( as  ! =  bs )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  as  <  bs ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											const  AttachmentFormat  * af_a  =  attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  AttachmentFormat  * af_b  =  p_key . attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( int  i  =  0 ;  i  <  as ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  AttachmentFormat  & a  =  af_a [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  AttachmentFormat  & b  =  af_b [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( a . format  ! =  b . format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  a . format  <  b . format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( a . samples  ! =  b . samples )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  a . samples  <  b . samples ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( a . usage_flags  ! =  b . usage_flags )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  a . usage_flags  <  b . usage_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  false ;  // Equal.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  RDD : : RenderPassID  _render_pass_create ( RenderingDeviceDriver  * p_driver ,  const  Vector < AttachmentFormat >  & p_attachments ,  const  Vector < FramebufferPass >  & p_passes ,  VectorView < RDD : : AttachmentLoadOp >  p_load_ops ,  VectorView < RDD : : AttachmentStoreOp >  p_store_ops ,  uint32_t  p_view_count  =  1 ,  Vector < TextureSamples >  * r_samples  =  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  RDD : : RenderPassID  _render_pass_create_from_graph ( RenderingDeviceDriver  * p_driver ,  VectorView < RDD : : AttachmentLoadOp >  p_load_ops ,  VectorView < RDD : : AttachmentStoreOp >  p_store_ops ,  void  * p_user_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This is a cache and it's never freed, it ensures
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// IDs for a given format are always unique.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RBMap < FramebufferFormatKey ,  FramebufferFormatID >  framebuffer_format_cache ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  FramebufferFormat  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  RBMap < FramebufferFormatKey ,  FramebufferFormatID > : : Element  * E ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : RenderPassID  render_pass ;  // Here for constructing shaders, never used, see section (7.2. Render Pass Compatibility from Vulkan spec).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < TextureSamples >  pass_samples ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  view_count  =  1 ;  // Number of views.
 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									HashMap < FramebufferFormatID ,  FramebufferFormat >  framebuffer_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  Framebuffer  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RenderingDevice  * rendering_device  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										FramebufferFormatID  format_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  storage_mask  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < RID >  texture_ids ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										InvalidationCallback  invalidated_callback  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										void  * invalidated_callback_userdata  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDG : : FramebufferCache  * framebuffer_cache  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Size2  size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  view_count ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < Framebuffer ,  true >  framebuffer_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This ID is warranted to be unique for the same formats, does not need to be freed
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  framebuffer_format_create ( const  Vector < AttachmentFormat >  & p_format ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  framebuffer_format_create_multipass ( const  Vector < AttachmentFormat >  & p_attachments ,  const  Vector < FramebufferPass >  & p_passes ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  framebuffer_format_create_empty ( TextureSamples  p_samples  =  TEXTURE_SAMPLES_1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureSamples  framebuffer_format_get_texture_samples ( FramebufferFormatID  p_format ,  uint32_t  p_pass  =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  framebuffer_create ( const  Vector < RID >  & p_texture_attachments ,  FramebufferFormatID  p_format_check  =  INVALID_ID ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  framebuffer_create_multipass ( const  Vector < RID >  & p_texture_attachments ,  const  Vector < FramebufferPass >  & p_passes ,  FramebufferFormatID  p_format_check  =  INVALID_ID ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  framebuffer_create_empty ( const  Size2i  & p_size ,  TextureSamples  p_samples  =  TEXTURE_SAMPLES_1 ,  FramebufferFormatID  p_format_check  =  INVALID_ID ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  framebuffer_is_valid ( RID  p_framebuffer )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  framebuffer_set_invalidation_callback ( RID  p_framebuffer ,  InvalidationCallback  p_callback ,  void  * p_userdata ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  framebuffer_get_format ( RID  p_framebuffer ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Size2  framebuffer_get_size ( RID  p_framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** SAMPLER ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < RDD : : SamplerID ,  true >  sampler_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  sampler_create ( const  SamplerState  & p_state ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  sampler_is_format_supported_for_filter ( DataFormat  p_format ,  SamplerFilter  p_sampler_filter )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** VERTEX ARRAY ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  int64_t  VertexFormatID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Vertex buffers in Vulkan are similar to how
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// they work in OpenGL, except that instead of
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// an attribute index, there is a buffer binding
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// index (for binding the buffers in real-time)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// and a location index (what is used in the shader).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This mapping is done here internally, and it's not
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// exposed.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < Buffer ,  true >  vertex_buffer_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  VertexDescriptionKey  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < VertexAttribute >  vertex_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  operator = = ( const  VertexDescriptionKey  & p_key )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											int  vdc  =  vertex_formats . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											int  vdck  =  p_key . vertex_formats . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( vdc  ! =  vdck )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  VertexAttribute  * a_ptr  =  vertex_formats . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  VertexAttribute  * b_ptr  =  p_key . vertex_formats . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( int  i  =  0 ;  i  <  vdc ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  VertexAttribute  & a  =  a_ptr [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  VertexAttribute  & b  =  b_ptr [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( a . location  ! =  b . location )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( a . offset  ! =  b . offset )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( a . format  ! =  b . format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( a . stride  ! =  b . stride )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( a . frequency  ! =  b . frequency )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  true ;  // They are equal.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  hash ( )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											int  vdc  =  vertex_formats . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  h  =  hash_murmur3_one_32 ( vdc ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VertexAttribute  * ptr  =  vertex_formats . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( int  i  =  0 ;  i  <  vdc ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  VertexAttribute  & vd  =  ptr [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												h  =  hash_murmur3_one_32 ( vd . location ,  h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												h  =  hash_murmur3_one_32 ( vd . offset ,  h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												h  =  hash_murmur3_one_32 ( vd . format ,  h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												h  =  hash_murmur3_one_32 ( vd . stride ,  h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												h  =  hash_murmur3_one_32 ( vd . frequency ,  h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  hash_fmix32 ( h ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  VertexDescriptionHash  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										static  _FORCE_INLINE_  uint32_t  hash ( const  VertexDescriptionKey  & p_key )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  p_key . hash ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-10 14:12:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// This is a cache and it's never freed, it ensures that
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// ID used for a specific format always remain the same.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < VertexDescriptionKey ,  VertexFormatID ,  VertexDescriptionHash >  vertex_format_cache ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  VertexDescriptionCache  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < VertexAttribute >  vertex_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : VertexFormatID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									HashMap < VertexFormatID ,  VertexDescriptionCache >  vertex_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  VertexArray  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RID  buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VertexFormatID  description ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										int  vertex_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  max_instances_allowed  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < RDD : : BufferID >  buffers ;  // Not owned, just referenced.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Vector < RDG : : ResourceTracker  * >  draw_trackers ;  // Not owned, just referenced.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Vector < uint64_t >  offsets ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Vector < int32_t >  transfer_worker_indices ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < uint64_t >  transfer_worker_operations ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										HashSet < RID >  untracked_buffers ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < VertexArray ,  true >  vertex_array_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  IndexBuffer  :  public  Buffer  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  max_index  =  0 ;  // Used for validation.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  index_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										IndexBufferFormat  format  =  INDEX_BUFFER_FORMAT_UINT16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  supports_restart_indices  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < IndexBuffer ,  true >  index_buffer_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  IndexArray  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  max_index  =  0 ;  // Remember the maximum index here too, for validation.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : BufferID  driver_id ;  // Not owned, inherited from index buffer.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDG : : ResourceTracker  * draw_tracker  =  nullptr ;  // Not owned, inherited from index buffer.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										uint32_t  offset  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  indices  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										IndexBufferFormat  format  =  INDEX_BUFFER_FORMAT_UINT16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  supports_restart_indices  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										int32_t  transfer_worker_index  =  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  transfer_worker_operation  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < IndexArray ,  true >  index_array_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  vertex_buffer_create ( uint32_t  p_size_bytes ,  const  Vector < uint8_t >  & p_data  =  Vector < uint8_t > ( ) ,  bool  p_use_as_storage  =  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This ID is warranted to be unique for the same formats, does not need to be freed
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VertexFormatID  vertex_format_create ( const  Vector < VertexAttribute >  & p_vertex_descriptions ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  vertex_array_create ( uint32_t  p_vertex_count ,  VertexFormatID  p_vertex_format ,  const  Vector < RID >  & p_src_buffers ,  const  Vector < uint64_t >  & p_offsets  =  Vector < uint64_t > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  index_buffer_create ( uint32_t  p_size_indices ,  IndexBufferFormat  p_format ,  const  Vector < uint8_t >  & p_data  =  Vector < uint8_t > ( ) ,  bool  p_use_restart_indices  =  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  index_array_create ( RID  p_index_buffer ,  uint32_t  p_index_offset ,  uint32_t  p_index_count ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** SHADER ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Some APIs (e.g., Vulkan) specifies a really complex behavior for the application
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// in order to tell when descriptor sets need to be re-bound (or not).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// "When binding a descriptor set (see Descriptor Set Binding) to set
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  number N, if the previously bound descriptor sets for sets zero
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  through N-1 were all bound using compatible pipeline layouts,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  then performing this binding does not disturb any of the lower numbered sets.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  If, additionally, the previous bound descriptor set for set N was
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  bound using a pipeline layout compatible for set N, then the bindings
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//  in sets numbered greater than N are also not disturbed."
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// As a result, we need to figure out quickly when something is no longer "compatible".
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// in order to avoid costly rebinds.
 
							 
						 
					
						
							
								
									
										
										
										
											2021-03-22 21:04:55 +11:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  UniformSetFormat  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < ShaderUniform >  uniforms ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  bool  operator < ( const  UniformSetFormat  & p_other )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( uniforms . size ( )  ! =  p_other . uniforms . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  uniforms . size ( )  <  p_other . uniforms . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( int  i  =  0 ;  i  <  uniforms . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( uniforms [ i ]  <  p_other . uniforms [ i ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  else  if  ( p_other . uniforms [ i ]  <  uniforms [ i ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Always grows, never shrinks, ensuring unique IDs, but we assume
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// the amount of formats will never be a problem, as the amount of shaders
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// in a game is limited.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RBMap < UniformSetFormat ,  uint32_t >  uniform_set_format_cache ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Shaders in Vulkan are just pretty much
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// precompiled blocks of SPIR-V bytecode. They
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// are most likely not really compiled to host
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// assembly until a pipeline is created.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// When supplying the shaders, this implementation
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// will use the reflection abilities of glslang to
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// understand and cache everything required to
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// create and use the descriptor sets (Vulkan's
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// biggest pain).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Additionally, hashes are created for every set
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// to do quick validation and ensuring the user
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// does not submit something invalid.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  Shader  :  public  ShaderDescription  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										String  name ;  // Used for debug.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : ShaderID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  layout_hash  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										BitField < RDD : : PipelineStageBits >  stage_bits ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Vector < uint32_t >  set_formats ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-02-11 22:33:54 +11:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  _shader_uniform_debug ( RID  p_shader ,  int  p_set  =  - 1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < Shader ,  true >  shader_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifndef DISABLE_DEPRECATED 
  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2024-09-26 11:26:17 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  BarrierMask  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										BARRIER_MASK_VERTEX  =  1 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_FRAGMENT  =  8 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_COMPUTE  =  2 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_TRANSFER  =  4 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_RASTER  =  BARRIER_MASK_VERTEX  |  BARRIER_MASK_FRAGMENT ,  // 9,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_ALL_BARRIERS  =  0x7FFF ,  // all flags set
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BARRIER_MASK_NO_BARRIER  =  0x8000 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  InitialAction  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_LOAD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_CLEAR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_DISCARD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_MAX , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_CLEAR_REGION  =  INITIAL_ACTION_CLEAR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_CLEAR_REGION_CONTINUE  =  INITIAL_ACTION_CLEAR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_KEEP  =  INITIAL_ACTION_LOAD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_DROP  =  INITIAL_ACTION_DISCARD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										INITIAL_ACTION_CONTINUE  =  INITIAL_ACTION_LOAD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									enum  FinalAction  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										FINAL_ACTION_STORE , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										FINAL_ACTION_DISCARD , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										FINAL_ACTION_MAX , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										FINAL_ACTION_READ  =  FINAL_ACTION_STORE , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										FINAL_ACTION_CONTINUE  =  FINAL_ACTION_STORE , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  barrier ( BitField < BarrierMask >  p_from  =  BARRIER_MASK_ALL_BARRIERS ,  BitField < BarrierMask >  p_to  =  BARRIER_MASK_ALL_BARRIERS ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  full_barrier ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_command_insert_label ( String  p_label_name ,  const  Color  & p_color  =  Color ( 1 ,  1 ,  1 ,  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  draw_list_begin_split ( RID  p_framebuffer ,  uint32_t  p_splits ,  DrawListID  * r_split_ids ,  InitialAction  p_initial_color_action ,  FinalAction  p_final_color_action ,  InitialAction  p_initial_depth_action ,  FinalAction  p_final_depth_action ,  const  Vector < Color >  & p_clear_color_values  =  Vector < Color > ( ) ,  float  p_clear_depth  =  1.0 ,  uint32_t  p_clear_stencil  =  0 ,  const  Rect2  & p_region  =  Rect2 ( ) ,  const  Vector < RID >  & p_storage_textures  =  Vector < RID > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  draw_list_switch_to_next_pass_split ( uint32_t  p_splits ,  DrawListID  * r_split_ids ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < int64_t >  _draw_list_begin_split ( RID  p_framebuffer ,  uint32_t  p_splits ,  InitialAction  p_initial_color_action ,  FinalAction  p_final_color_action ,  InitialAction  p_initial_depth_action ,  FinalAction  p_final_depth_action ,  const  Vector < Color >  & p_clear_color_values  =  Vector < Color > ( ) ,  float  p_clear_depth  =  1.0 ,  uint32_t  p_clear_stencil  =  0 ,  const  Rect2  & p_region  =  Rect2 ( ) ,  const  TypedArray < RID >  & p_storage_textures  =  TypedArray < RID > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < int64_t >  _draw_list_switch_to_next_pass_split ( uint32_t  p_splits ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _draw_list_end_bind_compat_81356 ( BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _compute_list_end_bind_compat_81356 ( BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _barrier_bind_compat_81356 ( BitField < BarrierMask >  p_from ,  BitField < BarrierMask >  p_to ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _draw_list_end_bind_compat_84976 ( BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _compute_list_end_bind_compat_84976 ( BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									InitialAction  _convert_initial_action_84976 ( InitialAction  p_old_initial_action ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FinalAction  _convert_final_action_84976 ( FinalAction  p_old_final_action ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DrawListID  _draw_list_begin_bind_compat_84976 ( RID  p_framebuffer ,  InitialAction  p_initial_color_action ,  FinalAction  p_final_color_action ,  InitialAction  p_initial_depth_action ,  FinalAction  p_final_depth_action ,  const  Vector < Color >  & p_clear_color_values ,  float  p_clear_depth ,  uint32_t  p_clear_stencil ,  const  Rect2  & p_region ,  const  TypedArray < RID >  & p_storage_textures ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ComputeListID  _compute_list_begin_bind_compat_84976 ( bool  p_allow_draw_overlap ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _buffer_update_bind_compat_84976 ( RID  p_buffer ,  uint32_t  p_offset ,  uint32_t  p_size ,  const  Vector < uint8_t >  & p_data ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _buffer_clear_bind_compat_84976 ( RID  p_buffer ,  uint32_t  p_offset ,  uint32_t  p_size ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _texture_update_bind_compat_84976 ( RID  p_texture ,  uint32_t  p_layer ,  const  Vector < uint8_t >  & p_data ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _texture_copy_bind_compat_84976 ( RID  p_from_texture ,  RID  p_to_texture ,  const  Vector3  & p_from ,  const  Vector3  & p_to ,  const  Vector3  & p_size ,  uint32_t  p_src_mipmap ,  uint32_t  p_dst_mipmap ,  uint32_t  p_src_layer ,  uint32_t  p_dst_layer ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _texture_clear_bind_compat_84976 ( RID  p_texture ,  const  Color  & p_color ,  uint32_t  p_base_mipmap ,  uint32_t  p_mipmaps ,  uint32_t  p_base_layer ,  uint32_t  p_layers ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  _texture_resolve_multisample_bind_compat_84976 ( RID  p_from_texture ,  RID  p_to_texture ,  BitField < BarrierMask >  p_post_barrier ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									FramebufferFormatID  _screen_get_framebuffer_format_bind_compat_87340 ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DrawListID  _draw_list_begin_bind_compat_90993 ( RID  p_framebuffer ,  InitialAction  p_initial_color_action ,  FinalAction  p_final_color_action ,  InitialAction  p_initial_depth_action ,  FinalAction  p_final_depth_action ,  const  Vector < Color >  & p_clear_color_values ,  float  p_clear_depth ,  uint32_t  p_clear_stencil ,  const  Rect2  & p_region ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DrawListID  _draw_list_begin_bind_compat_98670 ( RID  p_framebuffer ,  InitialAction  p_initial_color_action ,  FinalAction  p_final_color_action ,  InitialAction  p_initial_depth_action ,  FinalAction  p_final_depth_action ,  const  Vector < Color >  & p_clear_color_values ,  float  p_clear_depth ,  uint32_t  p_clear_stencil ,  const  Rect2  & p_region ,  uint32_t  p_breadcrumb ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2024-05-17 09:55:42 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderingContextDriver  * get_context_driver ( )  const  {  return  context ;  } 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  RDD : : Capabilities  & get_device_capabilities ( )  const  {  return  driver - > get_capabilities ( ) ;  } 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  has_feature ( const  Features  p_feature )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  shader_compile_spirv_from_source ( ShaderStage  p_stage ,  const  String  & p_source_code ,  ShaderLanguage  p_language  =  SHADER_LANGUAGE_GLSL ,  String  * r_error  =  nullptr ,  bool  p_allow_cache  =  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  shader_get_spirv_cache_key ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  void  shader_set_compile_to_spirv_function ( ShaderCompileToSPIRVFunction  p_function ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  void  shader_set_spirv_cache_function ( ShaderCacheFunction  p_function ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  void  shader_set_get_cache_key_function ( ShaderSPIRVGetCacheKeyFunction  p_function ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  shader_get_binary_cache_key ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  shader_compile_binary_from_spirv ( const  Vector < ShaderStageSPIRVData >  & p_spirv ,  const  String  & p_shader_name  =  " " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-28 19:58:32 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  shader_create_from_spirv ( const  Vector < ShaderStageSPIRVData >  & p_spirv ,  const  String  & p_shader_name  =  " " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  shader_create_from_bytecode ( const  Vector < uint8_t >  & p_shader_binary ,  RID  p_placeholder  =  RID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  shader_create_placeholder ( ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  shader_destroy_modules ( RID  p_shader ) ; 
							 
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  shader_get_vertex_input_attribute_mask ( RID  p_shader ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/******************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** UNIFORMS ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/******************/ 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_perf_report ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-06-25 10:33:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  StorageBufferUsage  { 
							 
						 
					
						
							
								
									
										
										
										
											2022-12-15 13:27:57 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT  =  1 , 
							 
						 
					
						
							
								
									
										
										
										
											2020-06-25 10:33:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** BUFFERS ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  uniform_buffer_create ( uint32_t  p_size_bytes ,  const  Vector < uint8_t >  & p_data  =  Vector < uint8_t > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  storage_buffer_create ( uint32_t  p_size ,  const  Vector < uint8_t >  & p_data  =  Vector < uint8_t > ( ) ,  BitField < StorageBufferUsage >  p_usage  =  0 ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  texture_buffer_create ( uint32_t  p_size_elements ,  DataFormat  p_format ,  const  Vector < uint8_t >  & p_data  =  Vector < uint8_t > ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  Uniform  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										UniformType  uniform_type  =  UNIFORM_TYPE_IMAGE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  binding  =  0 ;  // Binding index as specified in shader.
 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// This flag specifies that this is an immutable sampler to be set when creating pipeline layout.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  immutable_sampler  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-03-06 12:57:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									private : 
							 
						 
					
						
							
								
									
										
										
										
											2022-03-31 14:06:10 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// In most cases only one ID is provided per binding, so avoid allocating memory unnecessarily for performance.
 
							 
						 
					
						
							
								
									
										
										
										
											2022-03-06 12:57:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RID  id ;  // If only one is provided, this is used.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < RID >  ids ;  // If multiple ones are provided, this is used instead.
 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-03-06 12:57:09 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									public : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  uint32_t  get_id_count ( )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( id . is_valid ( )  ?  1  :  ids . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  RID  get_id ( uint32_t  p_idx )  const  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( id . is_valid ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												ERR_FAIL_COND_V ( p_idx  ! =  0 ,  RID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  ids [ p_idx ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  void  set_id ( uint32_t  p_idx ,  RID  p_id )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( id . is_valid ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												ERR_FAIL_COND ( p_idx  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												id  =  p_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												ids . write [ p_idx ]  =  p_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  void  append_id ( RID  p_id )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( ids . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( id  = =  RID ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													id  =  p_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													ids . push_back ( id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													ids . push_back ( p_id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													id  =  RID ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												ids . push_back ( p_id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  void  clear_ids ( )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											id  =  RID ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											ids . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  Uniform ( UniformType  p_type ,  int  p_binding ,  RID  p_id )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uniform_type  =  p_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											binding  =  p_binding ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											id  =  p_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_FORCE_INLINE_  Uniform ( UniformType  p_type ,  int  p_binding ,  const  Vector < RID >  & p_ids )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uniform_type  =  p_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											binding  =  p_binding ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											ids  =  p_ids ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										_FORCE_INLINE_  Uniform ( )  =  default ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									typedef  Uniform  PipelineImmutableSampler ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  shader_create_from_bytecode_with_samplers ( const  Vector < uint8_t >  & p_shader_binary ,  RID  p_placeholder  =  RID ( ) ,  const  Vector < PipelineImmutableSampler >  & p_immutable_samplers  =  Vector < PipelineImmutableSampler > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  const  uint32_t  MAX_UNIFORM_SETS  =  16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  const  uint32_t  MAX_PUSH_CONSTANT_SIZE  =  128 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// This structure contains the descriptor set. They _need_ to be allocated
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// for a shader (and will be erased when this shader is erased), but should
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// work for other shaders as long as the hash matches. This covers using
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// them in shader variants.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Keep also in mind that you can share buffers between descriptor sets, so
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// the above restriction is not too serious.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  UniformSet  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  format  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RID  shader_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  shader_set  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : UniformSetID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  AttachableTexture  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  bind  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  texture ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  SharedTexture  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  writing  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  texture ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										LocalVector < AttachableTexture >  attachable_textures ;  // Used for validation.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Vector < RDG : : ResourceTracker  * >  draw_trackers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < RDG : : ResourceUsage >  draw_trackers_usage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										HashMap < RID ,  RDG : : ResourceUsage >  untracked_usage ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										LocalVector < SharedTexture >  shared_textures_to_update ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										InvalidationCallback  invalidated_callback  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										void  * invalidated_callback_userdata  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < UniformSet ,  true >  uniform_set_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _uniform_set_update_shared ( UniformSet  * p_uniform_set ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/** Bake a set of uniforms that can be bound at runtime with the given shader.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ remark 				Setting  p_linear_pool  =  true  while  keeping  the  RID  around  for  longer  than  the  current  frame  will  result  in  undefined  behavior . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ param  p_uniforms 	The  uniforms  to  bake  into  a  set . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ param  p_shader 		The  shader  you  intend  to  bind  these  uniforms  with . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ param  p_set_index 	The  set .  Should  be  in  range  [ 0 ;  4 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 * 						The  value  4  comes  from  physical_device_properties . limits . maxBoundDescriptorSets .  Vulkan  only  guarantees  maxBoundDescriptorSets  > =  4  ( = =  4  is  very  common  on  Mobile ) . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ param  p_linear_pool 	If  you  call  this  function  every  frame  ( and  free  the  returned  RID  within  the  same  frame ! ) ,  set  it  to  true  for  better  performance . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 * 						If  you  plan  on  keeping  the  return  value  around  for  more  than  one  frame  ( e . g .  Sets  that  are  created  once  and  reused  forever )  you  MUST  set  it  to  false . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 *  @ return 				Baked  descriptor  set . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									 */ 
							 
						 
					
						
							
								
									
										
										
										
											2024-07-14 22:13:57 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									template  < typename  Collection > 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  uniform_set_create ( const  Collection  & p_uniforms ,  RID  p_shader ,  uint32_t  p_shader_set ,  bool  p_linear_pool  =  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  uniform_set_is_valid ( RID  p_uniform_set ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  uniform_set_set_invalidation_callback ( RID  p_uniform_set ,  InvalidationCallback  p_callback ,  void  * p_userdata ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  uniform_sets_have_linear_pools ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/*******************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** PIPELINES ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*******************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Render pipeline contains ALL the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// information required for drawing.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This includes all the rasterizer state
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// as well as shader used, framebuffer format,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// etc.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// While the pipeline is just a single object
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// (VkPipeline) a lot of values are also saved
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// here to do validation (vulkan does none by
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// default) and warn the user if something
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// was not supplied as intended.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  RenderPipeline  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Cached values for validation.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  Validation  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											FramebufferFormatID  framebuffer_format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  render_pass  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  dynamic_state  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VertexFormatID  vertex_format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  uses_restart_indices  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  primitive_minimum  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  primitive_divisor  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  validation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Actual pipeline.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RID  shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : ShaderID  shader_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  shader_layout_hash  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < uint32_t >  set_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : PipelineID  driver_id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										BitField < RDD : : PipelineStageBits >  stage_bits ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										uint32_t  push_constant_size  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < RenderPipeline ,  true >  render_pipeline_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  pipeline_cache_enabled  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									size_t  pipeline_cache_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  pipeline_cache_file_path ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									WorkerThreadPool : : TaskID  pipeline_cache_save_task  =  WorkerThreadPool : : INVALID_TASK_ID ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  _load_pipeline_cache ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _update_pipeline_cache ( bool  p_closing  =  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  void  _save_pipeline_cache ( void  * p_data ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  ComputePipeline  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RID  shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : ShaderID  shader_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  shader_layout_hash  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Vector < uint32_t >  set_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : PipelineID  driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  push_constant_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  local_group_size [ 3 ]  =  {  0 ,  0 ,  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID_Owner < ComputePipeline ,  true >  compute_pipeline_owner ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  render_pipeline_create ( RID  p_shader ,  FramebufferFormatID  p_framebuffer_format ,  VertexFormatID  p_vertex_format ,  RenderPrimitive  p_render_primitive ,  const  PipelineRasterizationState  & p_rasterization_state ,  const  PipelineMultisampleState  & p_multisample_state ,  const  PipelineDepthStencilState  & p_depth_stencil_state ,  const  PipelineColorBlendState  & p_blend_state ,  BitField < PipelineDynamicStateFlags >  p_dynamic_state_flags  =  0 ,  uint32_t  p_for_render_pass  =  0 ,  const  Vector < PipelineSpecializationConstant >  & p_specialization_constants  =  Vector < PipelineSpecializationConstant > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  render_pipeline_is_valid ( RID  p_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  compute_pipeline_create ( RID  p_shader ,  const  Vector < PipelineSpecializationConstant >  & p_specialization_constants  =  Vector < PipelineSpecializationConstant > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  compute_pipeline_is_valid ( RID  p_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** SCREEN ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									HashMap < DisplayServer : : WindowID ,  RDD : : SwapChainID >  screen_swap_chains ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < DisplayServer : : WindowID ,  RDD : : FramebufferID >  screen_framebuffers ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  _get_swap_chain_desired_count ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  screen_create ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  screen_prepare_for_drawing ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									int  screen_get_width ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									int  screen_get_height ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-31 16:52:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									int  screen_get_pre_rotation_degrees ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									FramebufferFormatID  screen_get_framebuffer_format ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  screen_free ( DisplayServer : : WindowID  p_screen  =  DisplayServer : : MAIN_WINDOW_ID ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									/*************************/ 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/**** DRAW LISTS (II) ****/ 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									/*************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Draw list contains both the command buffer
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// used for drawing as well as a LOT of
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// information used for validation. This
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// validation is cheap so most of it can
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// also run in release builds.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  DrawList  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										Rect2i  viewport ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  viewport_set  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  SetState  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_expected_format  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  uniform_set_format  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : UniformSetID  uniform_set_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  uniform_set ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  bound  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  State  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SetState  sets [ MAX_UNIFORM_SETS ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  set_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline_shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : ShaderID  pipeline_shader_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_shader_layout_hash  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  pipeline_push_constant_size  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											RID  vertex_array ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  index_array ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-03 14:23:38 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  draw_count  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  state ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  Validation  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  active  =  true ;  // Means command buffer was not closed, so you can keep adding things.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Actual render pass values.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  dynamic_state  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VertexFormatID  vertex_format  =  INVALID_ID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  vertex_array_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  vertex_max_instances_allowed  =  0xFFFFFFFF ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  index_buffer_uses_restart_indices  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-09 15:44:50 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  index_array_count  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  index_array_max_index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < uint32_t >  set_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < bool >  set_bound ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < RID >  set_rids ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Last pipeline set values.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  pipeline_active  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_dynamic_state  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VertexFormatID  pipeline_vertex_format  =  INVALID_ID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline_shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  pipeline_uses_restart_indices  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_primitive_divisor  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_primitive_minimum  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_push_constant_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  pipeline_push_constant_supplied  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  validation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# else 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  Validation  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  vertex_array_size  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-09 15:44:50 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  index_array_count  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  validation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DrawList  * draw_list  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  draw_list_subpass_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  draw_list_framebuffer_format  =  INVALID_ID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  draw_list_current_subpass  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Vector < RID >  draw_list_bound_textures ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_FORCE_INLINE_  DrawList  * _get_draw_list_ptr ( DrawListID  p_id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  _draw_list_allocate ( const  Rect2i  & p_viewport ,  uint32_t  p_subpass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _draw_list_free ( Rect2i  * r_last_viewport  =  nullptr ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  DrawFlags  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_DEFAULT_ALL  =  0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_0  =  ( 1  < <  0 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_1  =  ( 1  < <  1 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_2  =  ( 1  < <  2 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_3  =  ( 1  < <  3 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_4  =  ( 1  < <  4 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_5  =  ( 1  < <  5 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_6  =  ( 1  < <  6 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_7  =  ( 1  < <  7 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_MASK  =  0xFF , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_COLOR_ALL  =  DRAW_CLEAR_COLOR_MASK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_0  =  ( 1  < <  8 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_1  =  ( 1  < <  9 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_2  =  ( 1  < <  10 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_3  =  ( 1  < <  11 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_4  =  ( 1  < <  12 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_5  =  ( 1  < <  13 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_6  =  ( 1  < <  14 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_7  =  ( 1  < <  15 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_MASK  =  0xFF00 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_COLOR_ALL  =  DRAW_IGNORE_COLOR_MASK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_DEPTH  =  ( 1  < <  16 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_DEPTH  =  ( 1  < <  17 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_STENCIL  =  ( 1  < <  18 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_STENCIL  =  ( 1  < <  19 ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_CLEAR_ALL  =  DRAW_CLEAR_COLOR_ALL  |  DRAW_CLEAR_DEPTH  |  DRAW_CLEAR_STENCIL , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DRAW_IGNORE_ALL  =  DRAW_IGNORE_COLOR_ALL  |  DRAW_IGNORE_DEPTH  |  DRAW_IGNORE_STENCIL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DrawListID  draw_list_begin_for_screen ( DisplayServer : : WindowID  p_screen  =  0 ,  const  Color  & p_clear_color  =  Color ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DrawListID  draw_list_begin ( RID  p_framebuffer ,  BitField < DrawFlags >  p_draw_flags  =  DRAW_DEFAULT_ALL ,  const  Vector < Color >  & p_clear_color_values  =  Vector < Color > ( ) ,  float  p_clear_depth_value  =  1.0f ,  uint32_t  p_clear_stencil_value  =  0 ,  const  Rect2  & p_region  =  Rect2 ( ) ,  uint32_t  p_breadcrumb  =  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_set_blend_constants ( DrawListID  p_list ,  const  Color  & p_color ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_bind_render_pipeline ( DrawListID  p_list ,  RID  p_render_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_bind_uniform_set ( DrawListID  p_list ,  RID  p_uniform_set ,  uint32_t  p_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_bind_vertex_array ( DrawListID  p_list ,  RID  p_vertex_array ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_bind_index_array ( DrawListID  p_list ,  RID  p_index_array ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_set_line_width ( DrawListID  p_list ,  float  p_width ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_set_push_constant ( DrawListID  p_list ,  const  void  * p_data ,  uint32_t  p_data_size ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_draw ( DrawListID  p_list ,  bool  p_use_indices ,  uint32_t  p_instances  =  1 ,  uint32_t  p_procedural_vertices  =  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-09-20 21:05:50 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_draw_indirect ( DrawListID  p_list ,  bool  p_use_indices ,  RID  p_buffer ,  uint32_t  p_offset  =  0 ,  uint32_t  p_draw_count  =  1 ,  uint32_t  p_stride  =  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_set_viewport ( DrawListID  p_list ,  const  Rect2  & p_rect ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_enable_scissor ( DrawListID  p_list ,  const  Rect2  & p_rect ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_list_disable_scissor ( DrawListID  p_list ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  draw_list_get_current_pass ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DrawListID  draw_list_switch_to_next_pass ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_list_end ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/***********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** COMPUTE LISTS ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/***********************/ 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  ComputeList  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  SetState  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_expected_format  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  uniform_set_format  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : UniformSetID  uniform_set_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  uniform_set ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  bound  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  State  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SetState  sets [ MAX_UNIFORM_SETS ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  set_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline_shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RDD : : ShaderID  pipeline_shader_driver_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_shader_layout_hash  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  local_group_size [ 3 ]  =  {  0 ,  0 ,  0  } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint8_t  push_constant_data [ MAX_PUSH_CONSTANT_SIZE ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  push_constant_size  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-03 14:23:38 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											uint32_t  dispatch_count  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  state ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										struct  Validation  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  active  =  true ;  // Means command buffer was not closed, so you can keep adding things.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < uint32_t >  set_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < bool >  set_bound ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											Vector < RID >  set_rids ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Last pipeline set values.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  pipeline_active  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RID  pipeline_shader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  invalid_set_from  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  pipeline_push_constant_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  pipeline_push_constant_supplied  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  validation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ComputeList  * compute_list  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ComputeList : : State  compute_list_barrier_state ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-09-25 16:44:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ComputeListID  compute_list_begin ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  compute_list_bind_compute_pipeline ( ComputeListID  p_list ,  RID  p_compute_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_bind_uniform_set ( ComputeListID  p_list ,  RID  p_uniform_set ,  uint32_t  p_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_set_push_constant ( ComputeListID  p_list ,  const  void  * p_data ,  uint32_t  p_data_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_dispatch ( ComputeListID  p_list ,  uint32_t  p_x_groups ,  uint32_t  p_y_groups ,  uint32_t  p_z_groups ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_dispatch_threads ( ComputeListID  p_list ,  uint32_t  p_x_threads ,  uint32_t  p_y_threads ,  uint32_t  p_z_threads ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_dispatch_indirect ( ComputeListID  p_list ,  RID  p_buffer ,  uint32_t  p_offset ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  compute_list_add_barrier ( ComputeListID  p_list ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-09-25 16:44:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  compute_list_end ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/*************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** TRANSFER WORKER ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									struct  TransferWorker  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : BufferID  staging_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  max_transfer_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  staging_buffer_size_in_use  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  staging_buffer_size_allocated  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : CommandBufferID  command_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : CommandPoolID  command_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : FenceID  command_fence ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 12:17:04 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										LocalVector < RDD : : TextureBarrier >  texture_barriers ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										bool  recording  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  submitted  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BinaryMutex  thread_mutex ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  operations_processed  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  operations_submitted  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  operations_counter  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BinaryMutex  operations_mutex ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									LocalVector < TransferWorker  * >  transfer_worker_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  transfer_worker_pool_max_size  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									LocalVector < uint64_t >  transfer_worker_operation_used_by_draw ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									LocalVector < uint32_t >  transfer_worker_pool_available_list ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 12:17:04 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									LocalVector < RDD : : TextureBarrier >  transfer_worker_pool_texture_barriers ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									BinaryMutex  transfer_worker_pool_mutex ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-11 10:31:12 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									BinaryMutex  transfer_worker_pool_texture_barriers_mutex ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ConditionVariable  transfer_worker_pool_condition ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TransferWorker  * _acquire_transfer_worker ( uint32_t  p_transfer_size ,  uint32_t  p_required_align ,  uint32_t  & r_staging_offset ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _release_transfer_worker ( TransferWorker  * p_transfer_worker ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _end_transfer_worker ( TransferWorker  * p_transfer_worker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-15 18:30:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _submit_transfer_worker ( TransferWorker  * p_transfer_worker ,  VectorView < RDD : : SemaphoreID >  p_signal_semaphores  =  VectorView < RDD : : SemaphoreID > ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _wait_for_transfer_worker ( TransferWorker  * p_transfer_worker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 12:17:04 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _flush_barriers_for_transfer_worker ( TransferWorker  * p_transfer_worker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _check_transfer_worker_operation ( uint32_t  p_transfer_worker_index ,  uint64_t  p_transfer_worker_operation ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _check_transfer_worker_buffer ( Buffer  * p_buffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _check_transfer_worker_texture ( Texture  * p_texture ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _check_transfer_worker_vertex_array ( VertexArray  * p_vertex_array ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _check_transfer_worker_index_array ( IndexArray  * p_index_array ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 12:17:04 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _submit_transfer_workers ( RDD : : CommandBufferID  p_draw_command_buffer  =  RDD : : CommandBufferID ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-11 10:31:12 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _submit_transfer_barriers ( RDD : : CommandBufferID  p_draw_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _wait_for_transfer_workers ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _free_transfer_workers ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/***********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** COMMAND GRAPH ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/***********************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _texture_make_mutable ( Texture  * p_texture ,  RID  p_texture_id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _buffer_make_mutable ( Buffer  * p_buffer ,  RID  p_buffer_id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _vertex_array_make_mutable ( VertexArray  * p_vertex_array ,  RID  p_resource_id ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _index_array_make_mutable ( IndexArray  * p_index_array ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _uniform_set_make_mutable ( UniformSet  * p_uniform_set ,  RID  p_resource_id ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  _dependency_make_mutable ( RID  p_id ,  RID  p_resource_id ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  _dependencies_make_mutable_recursive ( RID  p_id ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  _dependencies_make_mutable ( RID  p_id ,  RDG : : ResourceTracker  * p_resource_tracker ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderingDeviceGraph  draw_graph ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/**************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** QUEUE MANAGEMENT ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RDD : : CommandQueueFamilyID  main_queue_family ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RDD : : CommandQueueFamilyID  transfer_queue_family ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RDD : : CommandQueueFamilyID  present_queue_family ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RDD : : CommandQueueID  main_queue ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RDD : : CommandQueueID  transfer_queue ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RDD : : CommandQueueID  present_queue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/**************************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** FRAME MANAGEMENT ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**************************/ 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// This is the frame structure. There are normally
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// 3 of these (used for triple buffering), or 2
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// (double buffering). They are cycled constantly.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// It contains two command buffers, one that is
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// used internally for setting up (creating stuff)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// and another used mostly for drawing.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// They also contains a list of things that need
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// to be disposed of when deleted, which can't
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// happen immediately due to the asynchronous
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// nature of the GPU. They will get deleted
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// when the frame is cycled.
 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									struct  Frame  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// List in usage order, from last to free to first to free.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < Buffer >  buffers_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < Texture >  textures_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < Framebuffer >  framebuffers_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < RDD : : SamplerID >  samplers_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < Shader >  shaders_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < UniformSet >  uniform_sets_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < RenderPipeline >  render_pipelines_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										List < ComputePipeline >  compute_pipelines_to_dispose_of ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// The command pool used by the command buffer.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDD : : CommandPoolID  command_pool ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// The command buffer used by the main thread when recording the frame.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : CommandBufferID  command_buffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Signaled by the command buffer submission. Present must wait on this semaphore.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : SemaphoreID  semaphore ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Signaled by the command buffer submission. Must wait on this fence before beginning command recording for the frame.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RDD : : FenceID  fence ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  fence_signaled  =  false ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-03-15 14:13:31 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Semaphores the frame must wait on before executing the command buffer.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										LocalVector < RDD : : SemaphoreID >  semaphores_to_wait_on ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Swap chains prepared for drawing during the frame that must be presented.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										LocalVector < RDD : : SwapChainID >  swap_chains_to_present ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-15 18:30:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Semaphores the transfer workers can use to wait before rendering the frame.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// This must have the same size of the transfer worker pool.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < RDD : : SemaphoreID >  transfer_worker_semaphores ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Extra command buffer pool used for driver workarounds or to reduce GPU bubbles by
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// splitting the final render pass to the swapchain into its own cmd buffer.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-02 15:59:29 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDG : : CommandBufferPool  command_buffer_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										struct  Timestamp  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											String  description ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint64_t  value  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										RDD : : QueryPoolID  timestamp_pool ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-10 14:12:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										TightLocalVector < String >  timestamp_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < uint64_t >  timestamp_cpu_values ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  timestamp_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < String >  timestamp_result_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < uint64_t >  timestamp_cpu_result_values ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < uint64_t >  timestamp_result_values ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  timestamp_result_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  max_timestamp_query_elements  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									int  frame  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < Frame >  frames ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  frames_drawn  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _free_pending_resources ( int  p_frame ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  texture_memory  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  buffer_memory  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								protected :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  execute_chained_cmds ( bool  p_present_swap_chain , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RenderingDeviceDriver : : FenceID  p_draw_fence , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											RenderingDeviceDriver : : SemaphoreID  p_dst_draw_semaphore_to_signal ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _free_internal ( RID  p_id ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _begin_frame ( bool  p_presented  =  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _end_frame ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _execute_frame ( bool  p_present ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _stall_for_previous_frames ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _flush_and_stall_for_all_frames ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-09-25 16:44:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									template  < typename  T > 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _free_rids ( T  & p_owner ,  const  char  * p_type ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-10-03 17:39:08 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEV_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < RID ,  String >  resource_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2020-06-25 10:33:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								public :  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  initialize ( RenderingContextDriver  * p_context ,  DisplayServer : : WindowID  p_main_window  =  DisplayServer : : INVALID_WINDOW_ID ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  finalize ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  _set_max_fps ( int  p_max_fps ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  free ( RID  p_id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-09-20 17:58:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** Timing ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  capture_timestamp ( const  String  & p_name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  get_captured_timestamps_count ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_captured_timestamps_frame ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_captured_timestamp_gpu_time ( uint32_t  p_index )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_captured_timestamp_cpu_time ( uint32_t  p_index )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  get_captured_timestamp_name ( uint32_t  p_index )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-09-20 17:58:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-07-10 17:44:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** LIMITS ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  limit_get ( Limit  p_limit )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-07-10 17:44:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  swap_buffers ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-10-05 10:27:43 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  get_frame_delay ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  submit ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  sync ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-18 20:30:57 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-07-02 20:14:19 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									enum  MemoryType  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										MEMORY_TEXTURES , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										MEMORY_BUFFERS , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										MEMORY_TOTAL 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  get_memory_usage ( MemoryType  p_type )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-05-01 09:34:23 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderingDevice  * create_local_device ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-18 20:30:57 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  set_resource_name ( RID  p_id ,  const  String  & p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-01-23 22:21:54 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  draw_command_begin_label ( String  p_label_name ,  const  Color  & p_color  =  Color ( 1 ,  1 ,  1 ,  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  draw_command_end_label ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-01-23 22:21:54 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_device_vendor_name ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  get_device_name ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DeviceType  get_device_type ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_device_api_name ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_device_api_version ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  get_device_pipeline_cache_uuid ( )  const ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-02-02 16:51:36 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-03 11:48:46 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  is_composite_alpha_supported ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  get_driver_resource ( DriverResource  p_resource ,  RID  p_rid  =  RID ( ) ,  uint64_t  p_index  =  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-29 12:52:19 +10:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-08-24 17:39:46 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_driver_and_device_memory_report ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									String  get_tracked_object_name ( uint32_t  p_type_index )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_tracked_object_type_count ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_driver_total_memory ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_driver_allocation_count ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_driver_memory_by_object_type ( uint32_t  p_type )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_driver_allocs_by_object_type ( uint32_t  p_type )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_device_total_memory ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_device_allocation_count ( )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_device_memory_by_object_type ( uint32_t  p_type )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  get_device_allocs_by_object_type ( uint32_t  p_type )  const ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									static  RenderingDevice  * get_singleton ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 20:56:42 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									void  make_current ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderingDevice ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									~ RenderingDevice ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-12-08 11:56:08 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								private :  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/**** BINDERS ****/ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*****************/ 
							 
						 
					
						
							
								
									
										
										
										
											2022-12-08 11:56:08 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-04-21 12:16:45 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _texture_create ( const  Ref < RDTextureFormat >  & p_format ,  const  Ref < RDTextureView >  & p_view ,  const  TypedArray < PackedByteArray >  & p_data  =  Array ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _texture_create_shared ( const  Ref < RDTextureView >  & p_view ,  RID  p_with_texture ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-03 00:07:32 -07:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _texture_create_shared_from_slice ( const  Ref < RDTextureView >  & p_view ,  RID  p_with_texture ,  uint32_t  p_layer ,  uint32_t  p_mipmap ,  uint32_t  p_mipmaps  =  1 ,  TextureSliceType  p_slice_type  =  TEXTURE_SLICE_2D ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-07-10 22:31:27 +10:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Ref < RDTextureFormat >  _texture_get_format ( RID  p_rd_texture ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-06-24 10:58:36 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									FramebufferFormatID  _framebuffer_format_create ( const  TypedArray < RDAttachmentFormat >  & p_attachments ,  uint32_t  p_view_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									FramebufferFormatID  _framebuffer_format_create_multipass ( const  TypedArray < RDAttachmentFormat >  & p_attachments ,  const  TypedArray < RDFramebufferPass >  & p_passes ,  uint32_t  p_view_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  _framebuffer_create ( const  TypedArray < RID >  & p_textures ,  FramebufferFormatID  p_format_check  =  INVALID_ID ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  _framebuffer_create_multipass ( const  TypedArray < RID >  & p_textures ,  const  TypedArray < RDFramebufferPass >  & p_passes ,  FramebufferFormatID  p_format_check  =  INVALID_ID ,  uint32_t  p_view_count  =  1 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _sampler_create ( const  Ref < RDSamplerState >  & p_state ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2020-04-21 12:16:45 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VertexFormatID  _vertex_format_create ( const  TypedArray < RDVertexAttribute >  & p_vertex_formats ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2022-11-11 15:45:36 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _vertex_array_create ( uint32_t  p_vertex_count ,  VertexFormatID  p_vertex_format ,  const  TypedArray < RID >  & p_src_buffers ,  const  Vector < int64_t >  & p_offsets  =  Vector < int64_t > ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
											
												Implement Binary Shader Compilation
* Added an extra stage before compiling shader, which is generating a binary blob.
* On Vulkan, this allows caching the SPIRV reflection information, which is expensive to parse.
* On other (future) RenderingDevices, it allows caching converted binary data, such as DXIL or MSL.
This PR makes the shader cache include the reflection information, hence editor startup times are significantly improved.
I tested this well and it appears to work, and I added a lot of consistency checks, but because it includes writing and reading binary information, rare bugs may pop up, so be aware.
There was not much of a choice for storing the reflection information, given shaders can be a lot, take a lot of space and take time to parse.
											 
										 
										
											2021-07-25 11:22:55 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Ref < RDShaderSPIRV >  _shader_compile_spirv_from_source ( const  Ref < RDShaderSource >  & p_source ,  bool  p_allow_cache  =  true ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-08-16 14:51:29 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Vector < uint8_t >  _shader_compile_binary_from_spirv ( const  Ref < RDShaderSPIRV >  & p_bytecode ,  const  String  & p_shader_name  =  " " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RID  _shader_create_from_spirv ( const  Ref < RDShaderSPIRV >  & p_spirv ,  const  String  & p_shader_name  =  " " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-08-31 19:24:04 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _uniform_set_create ( const  TypedArray < RDUniform >  & p_uniforms ,  RID  p_shader ,  uint32_t  p_shader_set ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Error  _buffer_update_bind ( RID  p_buffer ,  uint32_t  p_offset ,  uint32_t  p_size ,  const  Vector < uint8_t >  & p_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2022-12-11 15:37:35 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _render_pipeline_create ( RID  p_shader ,  FramebufferFormatID  p_framebuffer_format ,  VertexFormatID  p_vertex_format ,  RenderPrimitive  p_render_primitive ,  const  Ref < RDPipelineRasterizationState >  & p_rasterization_state ,  const  Ref < RDPipelineMultisampleState >  & p_multisample_state ,  const  Ref < RDPipelineDepthStencilState >  & p_depth_stencil_state ,  const  Ref < RDPipelineColorBlendState >  & p_blend_state ,  BitField < PipelineDynamicStateFlags >  p_dynamic_state_flags ,  uint32_t  p_for_render_pass ,  const  TypedArray < RDPipelineSpecializationConstant >  & p_specialization_constants ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RID  _compute_pipeline_create ( RID  p_shader ,  const  TypedArray < RDPipelineSpecializationConstant >  & p_specialization_constants ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _draw_list_set_push_constant ( DrawListID  p_list ,  const  Vector < uint8_t >  & p_data ,  uint32_t  p_data_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  _compute_list_set_push_constant ( ComputeListID  p_list ,  const  Vector < uint8_t >  & p_data ,  uint32_t  p_data_size ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2021-12-10 17:01:51 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : DeviceType )  
						 
					
						
							
								
									
										
										
										
											2021-08-29 12:52:19 +10:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : DriverResource )  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : ShaderStage )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : ShaderLanguage )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : CompareOperator )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : DataFormat )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : TextureType )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : TextureSamples )  
						 
					
						
							
								
									
										
										
										
											2022-11-26 13:01:24 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_BITFIELD_CAST ( RenderingDevice : : TextureUsageBits )  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : TextureSwizzle )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : TextureSliceType )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : SamplerFilter )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : SamplerRepeatMode )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : SamplerBorderColor )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : VertexFrequency )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : IndexBufferFormat )  
						 
					
						
							
								
									
										
										
										
											2022-12-15 13:27:57 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_BITFIELD_CAST ( RenderingDevice : : StorageBufferUsage )  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : UniformType )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : RenderPrimitive )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : PolygonCullMode )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : PolygonFrontFace )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : StencilOperation )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : LogicOperation )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : BlendFactor )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : BlendOperation )  
						 
					
						
							
								
									
										
										
										
											2022-12-11 15:37:35 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_BITFIELD_CAST ( RenderingDevice : : PipelineDynamicStateFlags )  
						 
					
						
							
								
									
										
										
										
											2021-07-09 16:48:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : PipelineSpecializationConstantType )  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : Limit )  
						 
					
						
							
								
									
										
										
										
											2021-07-02 20:14:19 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : MemoryType )  
						 
					
						
							
								
									
										
										
										
											2022-02-11 22:33:54 +11:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : Features )  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : BreadcrumbMarker )  
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_BITFIELD_CAST ( RenderingDevice : : DrawFlags ) ;  
						 
					
						
							
								
									
										
										
										
											2020-04-19 23:19:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifndef DISABLE_DEPRECATED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_BITFIELD_CAST ( RenderingDevice : : BarrierMask ) ;  
						 
					
						
							
								
									
										
										
										
											2024-10-24 16:01:00 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : InitialAction )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VARIANT_ENUM_CAST ( RenderingDevice : : FinalAction )  
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-15 23:45:24 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								typedef  RenderingDevice  RD ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2019-06-07 13:07:57 -03:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# endif  // RENDERING_DEVICE_H