2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*  rendering_device_driver_vulkan.cpp                                    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                         This file is part of:                          */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                             GODOT ENGINE                               */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                        https://godotengine.org                         */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* Permission is hereby granted, free of charge, to any person obtaining  */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* a copy of this software and associated documentation files (the        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* "Software"), to deal in the Software without restriction, including    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* without limitation the rights to use, copy, modify, merge, publish,    */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* distribute, sublicense, and/or sell copies of the Software, and to     */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* permit persons to whom the Software is furnished to do so, subject to  */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* the following conditions:                                              */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* The above copyright notice and this permission notice shall be         */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* included in all copies or substantial portions of the Software.        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*                                                                        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************************************************************************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "rendering_device_driver_vulkan.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "core/config/project_settings.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "core/io/marshalls.h" 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "vulkan_hooks.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# include  "thirdparty/misc/smolv.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(ANDROID_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "platform/android/java_godot_wrapper.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "platform/android/os_android.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "platform/android/thread_jandroid.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# include  "thirdparty/swappy-frame-pacing/swappyVk.h" 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-02-04 17:35:39 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# define ARRAY_SIZE(a) std::size(a) 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# define PRINT_NATIVE_COMMANDS 0 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** GENERIC ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  const  uint32_t  BREADCRUMB_BUFFER_ENTRIES  =  512u ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								static  const  VkFormat  RD_TO_VK_FORMAT [ RDD : : DATA_FORMAT_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R4G4_UNORM_PACK8 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R4G4B4A4_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B4G4R4A4_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R5G6B5_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B5G6R5_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R5G5B5A1_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B5G5R5A1_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A1R5G5B5_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R8G8B8A8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8A8_SRGB , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_UNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_SNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_USCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_SSCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_UINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_SINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A8B8G8R8_SRGB_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_UNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_SNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_USCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_SSCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_UINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2R10G10B10_SINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_UNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_SNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_USCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_SSCALED_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_UINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_A2B10G10R10_SINT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_SNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_USCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_SSCALED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R16G16B16A16_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32A32_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32A32_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R32G32B32A32_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64A64_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64A64_SINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R64G64B64A64_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B10G11R11_UFLOAT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_E5B9G9R9_UFLOAT_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_D16_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_X8_D24_UNORM_PACK32 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_D32_SFLOAT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_S8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_D16_UNORM_S8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_D24_UNORM_S8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_D32_SFLOAT_S8_UINT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC1_RGB_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC1_RGB_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC1_RGBA_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC1_RGBA_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC2_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC2_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC3_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC3_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC4_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC4_SNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC5_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC5_SNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC6H_UFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC6H_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC7_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_BC7_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_EAC_R11_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_EAC_R11_SNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_EAC_R11G11_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_EAC_R11G11_SNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_4x4_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_4x4_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x4_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x4_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x5_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x5_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x5_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x5_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x6_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x6_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x5_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x5_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x6_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x6_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x8_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x8_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x5_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x5_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x6_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x6_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x8_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x8_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x10_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x10_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x10_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x10_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x12_UNORM_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x12_SRGB_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8B8G8R8_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B8G8R8G8_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8_B8R8_2PLANE_420_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8_B8R8_2PLANE_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R10X6_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R10X6G10X6_UNORM_2PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R12X4_UNORM_PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R12X4G12X4_UNORM_2PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16B16G16R16_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_B16G16R16G16_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16_B16R16_2PLANE_420_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16_B16R16_2PLANE_422_UNORM , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM , 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-12 13:23:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK , 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								static  VkImageLayout  RD_TO_VK_LAYOUT [ RDD : : TEXTURE_LAYOUT_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_UNDEFINED ,  // TEXTURE_LAYOUT_UNDEFINED
 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 12:17:04 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_GENERAL ,  // TEXTURE_LAYOUT_GENERAL
 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_GENERAL ,  // TEXTURE_LAYOUT_STORAGE_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ,  // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL ,  // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL ,  // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ,  // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ,  // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ,  // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL ,  // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL ,  // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR ,  // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT ,  // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  VkPipelineStageFlags  _rd_to_vk_pipeline_stages ( BitField < RDD : : PipelineStageBits >  p_stages )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineStageFlags  vk_flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_stages . has_flag ( RDD : : PIPELINE_STAGE_COPY_BIT )  | |  p_stages . has_flag ( RDD : : PIPELINE_STAGE_RESOLVE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Transfer has been split into copy and resolve bits. Clear them and merge them into one bit.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_flags  | =  VK_PIPELINE_STAGE_TRANSFER_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_stages . clear_flag ( RDD : : PIPELINE_STAGE_COPY_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_stages . clear_flag ( RDD : : PIPELINE_STAGE_RESOLVE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_stages . has_flag ( RDD : : PIPELINE_STAGE_CLEAR_STORAGE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Therefore, storage is never cleared with an explicit command.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_stages . clear_flag ( RDD : : PIPELINE_STAGE_CLEAR_STORAGE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// The rest of the flags have compatible numeric values with Vulkan.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  VkPipelineStageFlags ( p_stages )  |  vk_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  VkAccessFlags  _rd_to_vk_access_flags ( BitField < RDD : : BarrierAccessBits >  p_access )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkAccessFlags  vk_flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_access . has_flag ( RDD : : BARRIER_ACCESS_COPY_READ_BIT )  | |  p_access . has_flag ( RDD : : BARRIER_ACCESS_RESOLVE_READ_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_flags  | =  VK_ACCESS_TRANSFER_READ_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_access . clear_flag ( RDD : : BARRIER_ACCESS_COPY_READ_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_access . clear_flag ( RDD : : BARRIER_ACCESS_RESOLVE_READ_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_access . has_flag ( RDD : : BARRIER_ACCESS_COPY_WRITE_BIT )  | |  p_access . has_flag ( RDD : : BARRIER_ACCESS_RESOLVE_WRITE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_flags  | =  VK_ACCESS_TRANSFER_WRITE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_access . clear_flag ( RDD : : BARRIER_ACCESS_COPY_WRITE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_access . clear_flag ( RDD : : BARRIER_ACCESS_RESOLVE_WRITE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_access . has_flag ( RDD : : BARRIER_ACCESS_STORAGE_CLEAR_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Therefore, storage is never cleared with an explicit command.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_access . clear_flag ( RDD : : BARRIER_ACCESS_STORAGE_CLEAR_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// The rest of the flags have compatible numeric values with Vulkan.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  VkAccessFlags ( p_access )  |  vk_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								// RDD::CompareOperator == VkCompareOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_NEVER ,  VK_COMPARE_OP_NEVER ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_LESS ,  VK_COMPARE_OP_LESS ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_EQUAL ,  VK_COMPARE_OP_EQUAL ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_LESS_OR_EQUAL ,  VK_COMPARE_OP_LESS_OR_EQUAL ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_GREATER ,  VK_COMPARE_OP_GREATER ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_NOT_EQUAL ,  VK_COMPARE_OP_NOT_EQUAL ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_GREATER_OR_EQUAL ,  VK_COMPARE_OP_GREATER_OR_EQUAL ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : COMPARE_OP_ALWAYS ,  VK_COMPARE_OP_ALWAYS ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ARRAYS_COMPATIBLE_FIELDWISE ( Rect2i ,  VkRect2D ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								uint32_t  RenderingDeviceDriverVulkan : : SubgroupCapabilities : : supported_stages_flags_rd ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_VERTEX_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SHADER_STAGE_VERTEX_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SHADER_STAGE_TESSELATION_CONTROL_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SHADER_STAGE_TESSELATION_EVALUATION_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_GEOMETRY_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// FIXME: Add shader stage geometry bit.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_FRAGMENT_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SHADER_STAGE_FRAGMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_COMPUTE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SHADER_STAGE_COMPUTE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								String  RenderingDeviceDriverVulkan : : SubgroupCapabilities : : supported_stages_desc ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  res ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_VERTEX_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_VERTEX " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_TESSELLATION_CONTROL " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_TESSELLATION_EVALUATION " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_GEOMETRY_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_GEOMETRY " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_FRAGMENT_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_FRAGMENT " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  VK_SHADER_STAGE_COMPUTE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_COMPUTE " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// These are not defined on Android GRMBL.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000100  /* VK_SHADER_STAGE_RAYGEN_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_RAYGEN_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000200  /* VK_SHADER_STAGE_ANY_HIT_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_ANY_HIT_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000400  /* VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_CLOSEST_HIT_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000800  /* VK_SHADER_STAGE_MISS_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_MISS_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00001000  /* VK_SHADER_STAGE_INTERSECTION_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_INTERSECTION_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00002000  /* VK_SHADER_STAGE_CALLABLE_BIT_KHR */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_CALLABLE_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000040  /* VK_SHADER_STAGE_TASK_BIT_NV */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_TASK_NV " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_stages  &  0x00000080  /* VK_SHADER_STAGE_MESH_BIT_NV */ )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , STAGE_MESH_NV " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  res . substr ( 2 ) ;  // Remove first ", ".
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint32_t  RenderingDeviceDriverVulkan : : SubgroupCapabilities : : supported_operations_flags_rd ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_BASIC_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_BASIC_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_VOTE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_VOTE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_ARITHMETIC_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_ARITHMETIC_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_BALLOT_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_BALLOT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_SHUFFLE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_SHUFFLE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_SHUFFLE_RELATIVE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_CLUSTERED_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_CLUSTERED_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_QUAD_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										flags  + =  SUBGROUP_QUAD_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								String  RenderingDeviceDriverVulkan : : SubgroupCapabilities : : supported_operations_desc ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  res ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_BASIC_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_BASIC " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_VOTE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_VOTE " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_ARITHMETIC_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_ARITHMETIC " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_BALLOT_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_BALLOT " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_SHUFFLE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_SHUFFLE " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_SHUFFLE_RELATIVE " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_CLUSTERED_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_CLUSTERED " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_QUAD_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_QUAD " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( supported_operations  &  VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										res  + =  " , FEATURE_PARTITIONED_NV " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  res . substr ( 2 ) ;  // Remove first ", ".
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** GENERIC ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : _register_requested_device_extension ( const  CharString  & p_extension_name ,  bool  p_required )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND ( requested_device_extensions . has ( p_extension_name ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									requested_device_extensions [ p_extension_name ]  =  p_required ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _initialize_device_extensions ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									enabled_device_extension_names . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_SWAPCHAIN_EXTENSION_NAME ,  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_MULTIVIEW_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_16BIT_STORAGE_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_MAINTENANCE_2_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-28 13:38:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-12 13:23:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// We don't actually use this extension, but some runtime components on some platforms
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// can and will fill the validation layers with useless info otherwise if not enabled.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( Engine : : get_singleton ( ) - > is_generate_spirv_debug_info_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_register_requested_device_extension ( VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME ,  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(VK_TRACK_DEVICE_MEMORY) 
  
						 
					
						
							
								
									
										
										
										
											2024-08-24 17:39:46 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_register_requested_device_extension ( VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_register_requested_device_extension ( VK_EXT_DEVICE_FAULT_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Debug marker extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Should be last element in the array.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEV_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  want_debug_markers  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# else 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  want_debug_markers  =  OS : : get_singleton ( ) - > is_stdout_verbose ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( want_debug_markers )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_register_requested_device_extension ( VK_EXT_DEBUG_MARKER_EXTENSION_NAME ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  device_extension_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkEnumerateDeviceExtensionProperties ( physical_device ,  nullptr ,  & device_extension_count ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( device_extension_count  = =  0 ,  ERR_CANT_CREATE ,  " vkEnumerateDeviceExtensionProperties failed to find any extensions \n \n Do you have a compatible Vulkan installable client driver (ICD) installed? " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < VkExtensionProperties >  device_extensions ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									device_extensions . resize ( device_extension_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  vkEnumerateDeviceExtensionProperties ( physical_device ,  nullptr ,  & device_extension_count ,  device_extensions . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										char  * * swappy_required_extensions ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  swappy_required_extensions_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Determine number of extensions required by Swappy frame pacer.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SwappyVk_determineDeviceExtensions ( physical_device ,  device_extension_count ,  device_extensions . ptr ( ) ,  & swappy_required_extensions_count ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( swappy_required_extensions_count  <  device_extension_count )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Determine the actual extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swappy_required_extensions  =  ( char  * * ) malloc ( swappy_required_extensions_count  *  sizeof ( char  * ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											char  * pRequiredExtensionsData  =  ( char  * ) malloc ( swappy_required_extensions_count  *  ( VK_MAX_EXTENSION_NAME_SIZE  +  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  swappy_required_extensions_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												swappy_required_extensions [ i ]  =  & pRequiredExtensionsData [ i  *  ( VK_MAX_EXTENSION_NAME_SIZE  +  1 ) ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SwappyVk_determineDeviceExtensions ( physical_device ,  device_extension_count , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													device_extensions . ptr ( ) ,  & swappy_required_extensions_count ,  swappy_required_extensions ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Enable extensions requested by Swappy.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  swappy_required_extensions_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												CharString  extension_name ( swappy_required_extensions [ i ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( requested_device_extensions . has ( extension_name ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													enabled_device_extension_names . insert ( extension_name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											free ( pRequiredExtensionsData ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											free ( swappy_required_extensions ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEV_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  device_extension_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( String ( " VULKAN: Found device extension  " )  +  String : : utf8 ( device_extensions [ i ] . extensionName ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Enable all extensions that are supported and requested.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  device_extension_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										CharString  extension_name ( device_extensions [ i ] . extensionName ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( requested_device_extensions . has ( extension_name ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											enabled_device_extension_names . insert ( extension_name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Now check our requested extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( KeyValue < CharString ,  bool >  & requested_extension  :  requested_device_extensions )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( ! enabled_device_extension_names . has ( requested_extension . key ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( requested_extension . value )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												ERR_FAIL_V_MSG ( ERR_BUG ,  String ( " Required extension  " )  +  String : : utf8 ( requested_extension . key )  +  String ( "  not found. " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												print_verbose ( String ( " Optional extension  " )  +  String : : utf8 ( requested_extension . key )  +  String ( "  not found " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _check_device_features ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkGetPhysicalDeviceFeatures ( physical_device ,  & physical_device_features ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Check for required features.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! physical_device_features . imageCubeArray  | |  ! physical_device_features . independentBlend )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										String  error_string  =  vformat ( " Your GPU (%s) does not support the following features which are required to use Vulkan-based renderers in Godot: \n \n " ,  context_device . name ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( ! physical_device_features . imageCubeArray )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											error_string  + =  " - No support for image cube arrays. \n " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( ! physical_device_features . independentBlend )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											error_string  + =  " - No support for independentBlend. \n " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										error_string  + =  " \n This is usually a hardware limitation, so updating graphics drivers won't help in most cases. " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(ANDROID_ENABLED) || defined(IOS_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Android/iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										OS : : get_singleton ( ) - > alert ( error_string  +  " \n Click OK to exit (black screen will be visible). " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# else 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										OS : : get_singleton ( ) - > alert ( error_string  +  " \n Click OK to exit. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  ERR_CANT_CREATE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Opt-in to the features we actually need/use. These can be changed in the future.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// We do this for multiple reasons:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//	1. Certain features (like sparse* stuff) cause unnecessary internal driver allocations.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//	2. Others like shaderStorageImageMultisample are a huge red flag
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//	   (MSAA + Storage is rarely needed).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//	3. Most features when turned off aren't actually off (we just promise the driver not to use them)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//	   and it is validation what will complain. This allows us to target a minimum baseline.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// TODO: Allow the user to override these settings (i.e. turn off more stuff) using profiles
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// so they can target a broad range of HW. For example Mali HW does not have
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// shaderClipDistance/shaderCullDistance; thus validation would complain if such feature is used;
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// allowing them to fix the problem without even owning Mali HW to test on.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// The excluded features are:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - robustBufferAccess (can hamper performance on some hardware)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - occlusionQueryPrecise
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - pipelineStatisticsQuery
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - shaderStorageImageMultisample (unsupported by Intel Arc, prevents from using MSAA storage accidentally)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - shaderResourceResidency
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseBinding (we don't use sparse features and enabling them cause extra internal allocations inside the Vulkan driver we don't need)
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidencyBuffer
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidencyImage2D
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidencyImage3D
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidency2Samples
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidency4Samples
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidency8Samples
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidency16Samples
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - sparseResidencyAliased
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// - inheritedQueries
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# define VK_DEVICEFEATURE_ENABLE_IF(x)                             \ 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( physical_device_features . x )  {                              \
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										requested_device_features . x  =  physical_device_features . x ;  \
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else                                                         \
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										( ( void ) 0 ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									requested_device_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( fullDrawIndexUint32 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( imageCubeArray ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( independentBlend ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( geometryShader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( tessellationShader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( sampleRateShading ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( dualSrcBlend ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( logicOp ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( multiDrawIndirect ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( drawIndirectFirstInstance ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( depthClamp ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( depthBiasClamp ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( fillModeNonSolid ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( depthBounds ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( wideLines ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( largePoints ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( alphaToOne ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( multiViewport ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( samplerAnisotropy ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( textureCompressionETC2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( textureCompressionASTC_LDR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( textureCompressionBC ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( vertexPipelineStoresAndAtomics ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( fragmentStoresAndAtomics ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderTessellationAndGeometryPointSize ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderImageGatherExtended ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderStorageImageExtendedFormats ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderStorageImageReadWithoutFormat ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderStorageImageWriteWithoutFormat ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderUniformBufferArrayDynamicIndexing ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderSampledImageArrayDynamicIndexing ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderStorageBufferArrayDynamicIndexing ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderStorageImageArrayDynamicIndexing ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderClipDistance ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderCullDistance ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderFloat64 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderInt64 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderInt16 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( shaderResourceMinLod ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_DEVICEFEATURE_ENABLE_IF ( variableMultisampleRate ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _check_device_capabilities ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Fill device family and version.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									device_capabilities . device_family  =  DEVICE_VULKAN ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									device_capabilities . version_major  =  VK_API_VERSION_MAJOR ( physical_device_properties . apiVersion ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									device_capabilities . version_minor  =  VK_API_VERSION_MINOR ( physical_device_properties . apiVersion ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// References:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_multiview.html
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// https://www.khronos.org/blog/vulkan-subgroup-tutorial
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( functions . GetPhysicalDeviceFeatures2  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// We must check that the corresponding extension is present before assuming a feature as enabled.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// See also: https://github.com/godotengine/godot/issues/65409
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										void  * next_features  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceVulkan12Features  device_features_vk_1_2  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceShaderFloat16Int8FeaturesKHR  shader_features  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDeviceBufferDeviceAddressFeaturesKHR  buffer_device_address_features  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDeviceVulkanMemoryModelFeaturesKHR  vulkan_memory_model_features  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDeviceFragmentShadingRateFeaturesKHR  fsr_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceFragmentDensityMapFeaturesEXT  fdm_features  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDevice16BitStorageFeaturesKHR  storage_feature  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceMultiviewFeatures  multiview_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDevicePipelineCreationCacheControlFeatures  pipeline_cache_control_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  bool  use_1_2_features  =  physical_device_properties . apiVersion  > =  VK_API_VERSION_1_2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( use_1_2_features )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_features_vk_1_2 . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_features_vk_1_2 . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & device_features_vk_1_2 ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												next_features  =  & shader_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												buffer_device_address_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												buffer_device_address_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												next_features  =  & buffer_device_address_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												next_features  =  & vulkan_memory_model_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											fsr_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fsr_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & fsr_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & fdm_features ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_16BIT_STORAGE_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_feature . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_feature . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & storage_feature ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_MULTIVIEW_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & multiview_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pipeline_cache_control_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pipeline_cache_control_features . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_features  =  & pipeline_cache_control_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceFeatures2  device_features_2  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_features_2 . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_features_2 . pNext  =  next_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										functions . GetPhysicalDeviceFeatures2 ( physical_device ,  & device_features_2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( use_1_2_features )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef MACOS_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											ERR_FAIL_COND_V_MSG ( ! device_features_vk_1_2 . shaderSampledImageArrayNonUniformIndexing ,  ERR_CANT_CREATE ,  " Your GPU doesn't support shaderSampledImageArrayNonUniformIndexing which is required to use the Vulkan-based renderers in Godot. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_capabilities . shader_float16_is_supported  =  device_features_vk_1_2 . shaderFloat16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_capabilities . shader_int8_is_supported  =  device_features_vk_1_2 . shaderInt8 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												buffer_device_address_support  =  device_features_vk_1_2 . bufferDeviceAddress ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_support  =  device_features_vk_1_2 . vulkanMemoryModel ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_device_scope_support  =  device_features_vk_1_2 . vulkanMemoryModelDeviceScope ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_capabilities . shader_float16_is_supported  =  shader_features . shaderFloat16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												shader_capabilities . shader_int8_is_supported  =  shader_features . shaderInt8 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												buffer_device_address_support  =  buffer_device_address_features . bufferDeviceAddress ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( enabled_device_extension_names . has ( VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_support  =  vulkan_memory_model_features . vulkanMemoryModel ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vulkan_memory_model_device_scope_support  =  vulkan_memory_model_features . vulkanMemoryModelDeviceScope ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											fsr_capabilities . pipeline_supported  =  fsr_features . pipelineFragmentShadingRate ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fsr_capabilities . primitive_supported  =  fsr_features . primitiveFragmentShadingRate ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fsr_capabilities . attachment_supported  =  fsr_features . attachmentFragmentShadingRate ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . attachment_supported  =  fdm_features . fragmentDensityMap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . dynamic_attachment_supported  =  fdm_features . fragmentDensityMapDynamic ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . non_subsampled_images_supported  =  fdm_features . fragmentDensityMapNonSubsampledImages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Multiple VRS techniques can't co-exist during the existence of one device, so we must
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// choose one at creation time and only report one of them as available.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_choose_vrs_capabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_MULTIVIEW_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_capabilities . is_supported  =  multiview_features . multiview ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_capabilities . geometry_shader_is_supported  =  multiview_features . multiviewGeometryShader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_capabilities . tessellation_shader_is_supported  =  multiview_features . multiviewTessellationShader ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_16BIT_STORAGE_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_buffer_capabilities . storage_buffer_16_bit_access_is_supported  =  storage_feature . storageBuffer16BitAccess ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_buffer_capabilities . uniform_and_storage_buffer_16_bit_access_is_supported  =  storage_feature . uniformAndStorageBuffer16BitAccess ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_buffer_capabilities . storage_push_constant_16_is_supported  =  storage_feature . storagePushConstant16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											storage_buffer_capabilities . storage_input_output_16  =  storage_feature . storageInputOutput16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pipeline_cache_control_support  =  pipeline_cache_control_features . pipelineCreationCacheControl ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_DEVICE_FAULT_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_fault_support  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(VK_TRACK_DEVICE_MEMORY) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_memory_report_support  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( functions . GetPhysicalDeviceProperties2  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										void  * next_properties  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDeviceFragmentShadingRatePropertiesKHR  fsr_properties  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceFragmentDensityMapPropertiesEXT  fdm_properties  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPhysicalDeviceMultiviewProperties  multiview_properties  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceSubgroupProperties  subgroup_properties  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceSubgroupSizeControlProperties  subgroup_size_control_properties  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPhysicalDeviceProperties2  physical_device_properties_2  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  bool  use_1_1_properties  =  physical_device_properties . apiVersion  > =  VK_API_VERSION_1_1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( use_1_1_properties )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											subgroup_properties . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											subgroup_properties . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_properties  =  & subgroup_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											subgroup_capabilities . size_control_is_supported  =  enabled_device_extension_names . has ( VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( subgroup_capabilities . size_control_is_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												subgroup_size_control_properties . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												subgroup_size_control_properties . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												next_properties  =  & subgroup_size_control_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( multiview_capabilities . is_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_properties . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_properties . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_properties  =  & multiview_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fsr_properties . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fsr_properties . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_properties  =  & fsr_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( fdm_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_properties . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_properties . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											next_properties  =  & fdm_properties ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										physical_device_properties_2 . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										physical_device_properties_2 . pNext  =  next_properties ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										functions . GetPhysicalDeviceProperties2 ( physical_device ,  & physical_device_properties_2 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . size  =  subgroup_properties . subgroupSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . min_size  =  subgroup_properties . subgroupSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . max_size  =  subgroup_properties . subgroupSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . supported_stages  =  subgroup_properties . supportedStages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . supported_operations  =  subgroup_properties . supportedOperations ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Note: quadOperationsInAllStages will be true if:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subgroup_capabilities . quad_operations_in_all_stages  =  subgroup_properties . quadOperationsInAllStages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( subgroup_capabilities . size_control_is_supported  & &  ( subgroup_size_control_properties . requiredSubgroupSizeStages  &  VK_SHADER_STAGE_COMPUTE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											subgroup_capabilities . min_size  =  subgroup_size_control_properties . minSubgroupSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											subgroup_capabilities . max_size  =  subgroup_size_control_properties . maxSubgroupSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( fsr_capabilities . pipeline_supported  | |  fsr_capabilities . primitive_supported  | |  fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan Fragment Shading Rate supported: " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( fsr_capabilities . pipeline_supported )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												print_verbose ( "   Pipeline fragment shading rate " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( fsr_capabilities . primitive_supported )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												print_verbose ( "   Primitive fragment shading rate " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												// TODO: Expose these somehow to the end user.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												fsr_capabilities . min_texel_size . x  =  fsr_properties . minFragmentShadingRateAttachmentTexelSize . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fsr_capabilities . min_texel_size . y  =  fsr_properties . minFragmentShadingRateAttachmentTexelSize . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fsr_capabilities . max_texel_size . x  =  fsr_properties . maxFragmentShadingRateAttachmentTexelSize . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fsr_capabilities . max_texel_size . y  =  fsr_properties . maxFragmentShadingRateAttachmentTexelSize . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fsr_capabilities . max_fragment_size . x  =  fsr_properties . maxFragmentSize . width ;  // either 4 or 8
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fsr_capabilities . max_fragment_size . y  =  fsr_properties . maxFragmentSize . height ;  // generally the same as width
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												print_verbose ( String ( "   Attachment fragment shading rate " )  + 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														String ( " , min texel size: ( " )  +  itos ( fsr_capabilities . min_texel_size . x )  +  String ( " ,  " )  +  itos ( fsr_capabilities . min_texel_size . y )  +  String ( " ) " )  + 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														String ( " , max texel size: ( " )  +  itos ( fsr_capabilities . max_texel_size . x )  +  String ( " ,  " )  +  itos ( fsr_capabilities . max_texel_size . y )  +  String ( " ) " )  + 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														String ( " , max fragment size: ( " )  +  itos ( fsr_capabilities . max_fragment_size . x )  +  String ( " ,  " )  +  itos ( fsr_capabilities . max_fragment_size . y )  +  String ( " ) " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan Variable Rate Shading not supported " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( fdm_capabilities . attachment_supported  | |  fdm_capabilities . dynamic_attachment_supported  | |  fdm_capabilities . non_subsampled_images_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan Fragment Density Map supported " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											fdm_capabilities . min_texel_size . x  =  fdm_properties . minFragmentDensityTexelSize . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . min_texel_size . y  =  fdm_properties . minFragmentDensityTexelSize . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . max_texel_size . x  =  fdm_properties . maxFragmentDensityTexelSize . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . max_texel_size . y  =  fdm_properties . maxFragmentDensityTexelSize . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fdm_capabilities . invocations_supported  =  fdm_properties . fragmentDensityInvocations ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( fdm_capabilities . dynamic_attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												print_verbose ( "   - dynamic fragment density map supported " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( fdm_capabilities . non_subsampled_images_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												print_verbose ( "   - non-subsampled images supported " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan Fragment Density Map not supported " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( multiview_capabilities . is_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_capabilities . max_view_count  =  multiview_properties . maxMultiviewViewCount ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_capabilities . max_instance_count  =  multiview_properties . maxMultiviewInstanceIndex ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan multiview supported: " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( "   max view count:  "  +  itos ( multiview_capabilities . max_view_count ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( "   max instances:  "  +  itos ( multiview_capabilities . max_instance_count ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( " - Vulkan multiview not supported " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( " - Vulkan subgroup: " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( "   size:  "  +  itos ( subgroup_capabilities . size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( "   min size:  "  +  itos ( subgroup_capabilities . min_size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( "   max size:  "  +  itos ( subgroup_capabilities . max_size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( "   stages:  "  +  subgroup_capabilities . supported_stages_desc ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_verbose ( "   supported ops:  "  +  subgroup_capabilities . supported_operations_desc ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( subgroup_capabilities . quad_operations_in_all_stages )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_verbose ( "   quad operations in all stages " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : _choose_vrs_capabilities ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  prefer_fdm_on_qualcomm  =  physical_device_properties . vendorID  = =  RenderingContextDriver : : Vendor : : VENDOR_QUALCOMM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fdm_capabilities . attachment_supported  & &  ( ! fsr_capabilities . attachment_supported  | |  prefer_fdm_on_qualcomm ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// If available, we prefer using fragment density maps on Qualcomm as they adjust tile distribution when using
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// this technique. Performance as a result is higher than when using fragment shading rate.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_capabilities  =  FragmentShadingRateCapabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Disable any possibility of fragment density maps being used.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_capabilities  =  FragmentDensityMapCapabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Do not report or enable any VRS capabilities if attachment is not supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_capabilities  =  FragmentShadingRateCapabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_capabilities  =  FragmentDensityMapCapabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _add_queue_create_info ( LocalVector < VkDeviceQueueCreateInfo >  & r_queue_create_info )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  queue_family_count  =  queue_family_properties . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									queue_families . resize ( queue_family_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkQueueFlags  queue_flags_mask  =  VK_QUEUE_GRAPHICS_BIT  |  VK_QUEUE_COMPUTE_BIT  |  VK_QUEUE_TRANSFER_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  uint32_t  max_queue_count_per_family  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  const  float  queue_priorities [ max_queue_count_per_family ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  queue_family_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( ( queue_family_properties [ i ] . queueFlags  &  queue_flags_mask )  = =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// We ignore creating queues in families that don't support any of the operations we require.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkDeviceQueueCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . sType  =  VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . queueFamilyIndex  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . queueCount  =  MIN ( queue_family_properties [ i ] . queueCount ,  max_queue_count_per_family ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pQueuePriorities  =  queue_priorities ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_queue_create_info . push_back ( create_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Prepare the vectors where the queues will be filled out.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										queue_families [ i ] . resize ( create_info . queueCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _initialize_device ( const  LocalVector < VkDeviceQueueCreateInfo >  & p_queue_create_info )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < const  char  * >  enabled_extension_names ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									enabled_extension_names . reserve ( enabled_device_extension_names . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( const  CharString  & extension_name  :  enabled_device_extension_names )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										enabled_extension_names . push_back ( extension_name . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  * create_info_next  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPhysicalDeviceShaderFloat16Int8FeaturesKHR  shader_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									shader_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									shader_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									shader_features . shaderFloat16  =  shader_capabilities . shader_float16_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									shader_features . shaderInt8  =  shader_capabilities . shader_int8_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info_next  =  & shader_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkPhysicalDeviceBufferDeviceAddressFeaturesKHR  buffer_device_address_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( buffer_device_address_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										buffer_device_address_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										buffer_device_address_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										buffer_device_address_features . bufferDeviceAddress  =  buffer_device_address_support ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & buffer_device_address_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkPhysicalDeviceVulkanMemoryModelFeaturesKHR  vulkan_memory_model_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( vulkan_memory_model_support  & &  vulkan_memory_model_device_scope_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_memory_model_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_memory_model_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_memory_model_features . vulkanMemoryModel  =  vulkan_memory_model_support ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_memory_model_features . vulkanMemoryModelDeviceScope  =  vulkan_memory_model_device_scope_support ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & vulkan_memory_model_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkPhysicalDeviceFragmentShadingRateFeaturesKHR  fsr_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fsr_capabilities . pipeline_supported  | |  fsr_capabilities . primitive_supported  | |  fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_features . pipelineFragmentShadingRate  =  fsr_capabilities . pipeline_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_features . primitiveFragmentShadingRate  =  fsr_capabilities . primitive_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_features . attachmentFragmentShadingRate  =  fsr_capabilities . attachment_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & fsr_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPhysicalDeviceFragmentDensityMapFeaturesEXT  fdm_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fdm_capabilities . attachment_supported  | |  fdm_capabilities . dynamic_attachment_supported  | |  fdm_capabilities . non_subsampled_images_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_features . fragmentDensityMap  =  fdm_capabilities . attachment_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_features . fragmentDensityMapDynamic  =  fdm_capabilities . dynamic_attachment_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fdm_features . fragmentDensityMapNonSubsampledImages  =  fdm_capabilities . non_subsampled_images_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & fdm_features ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPhysicalDevicePipelineCreationCacheControlFeatures  pipeline_cache_control_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( pipeline_cache_control_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pipeline_cache_control_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pipeline_cache_control_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pipeline_cache_control_features . pipelineCreationCacheControl  =  pipeline_cache_control_support ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & pipeline_cache_control_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkPhysicalDeviceFaultFeaturesEXT  device_fault_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( device_fault_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_fault_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_fault_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & device_fault_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(VK_TRACK_DEVICE_MEMORY) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDeviceDeviceMemoryReportCreateInfoEXT  memory_report_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( device_memory_report_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memory_report_info . sType  =  VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memory_report_info . pfnUserCallback  =  RenderingContextDriverVulkan : : memory_report_callback ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memory_report_info . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memory_report_info . flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memory_report_info . pUserData  =  this ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & memory_report_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkPhysicalDeviceVulkan11Features  vulkan_1_1_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPhysicalDevice16BitStorageFeaturesKHR  storage_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPhysicalDeviceMultiviewFeatures  multiview_features  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  bool  enable_1_2_features  =  physical_device_properties . apiVersion  > =  VK_API_VERSION_1_2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( enable_1_2_features )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// In Vulkan 1.2 and newer we use a newer struct to enable various features.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . storageBuffer16BitAccess  =  storage_buffer_capabilities . storage_buffer_16_bit_access_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . uniformAndStorageBuffer16BitAccess  =  storage_buffer_capabilities . uniform_and_storage_buffer_16_bit_access_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . storagePushConstant16  =  storage_buffer_capabilities . storage_push_constant_16_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . storageInputOutput16  =  storage_buffer_capabilities . storage_input_output_16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . multiview  =  multiview_capabilities . is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . multiviewGeometryShader  =  multiview_capabilities . geometry_shader_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . multiviewTessellationShader  =  multiview_capabilities . tessellation_shader_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . variablePointersStorageBuffer  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . variablePointers  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . protectedMemory  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . samplerYcbcrConversion  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vulkan_1_1_features . shaderDrawParameters  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & vulkan_1_1_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// On Vulkan 1.0 and 1.1 we use our older structs to initialize these features.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . storageBuffer16BitAccess  =  storage_buffer_capabilities . storage_buffer_16_bit_access_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . uniformAndStorageBuffer16BitAccess  =  storage_buffer_capabilities . uniform_and_storage_buffer_16_bit_access_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . storagePushConstant16  =  storage_buffer_capabilities . storage_push_constant_16_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										storage_features . storageInputOutput16  =  storage_buffer_capabilities . storage_input_output_16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info_next  =  & storage_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  bool  enable_1_1_features  =  physical_device_properties . apiVersion  > =  VK_API_VERSION_1_1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enable_1_1_features )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . sType  =  VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . multiview  =  multiview_capabilities . is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . multiviewGeometryShader  =  multiview_capabilities . geometry_shader_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											multiview_features . multiviewTessellationShader  =  multiview_capabilities . tessellation_shader_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											create_info_next  =  & multiview_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDeviceCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pNext  =  create_info_next ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . queueCreateInfoCount  =  p_queue_create_info . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pQueueCreateInfos  =  p_queue_create_info . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . enabledExtensionCount  =  enabled_extension_names . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . ppEnabledExtensionNames  =  enabled_extension_names . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pEnabledFeatures  =  & requested_device_features ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( VulkanHooks : : get_singleton ( )  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  device_created  =  VulkanHooks : : get_singleton ( ) - > create_vulkan_device ( & create_info ,  & vk_device ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( ! device_created ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkResult  err  =  vkCreateDevice ( physical_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DEVICE ) ,  & vk_device ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  queue_families . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  j  =  0 ;  j  <  queue_families [ i ] . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vkGetDeviceQueue ( vk_device ,  i ,  j ,  & queue_families [ i ] [ j ] . queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( functions . GetDeviceProcAddr  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_functions . CreateSwapchainKHR  =  PFN_vkCreateSwapchainKHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkCreateSwapchainKHR " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_functions . DestroySwapchainKHR  =  PFN_vkDestroySwapchainKHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkDestroySwapchainKHR " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_functions . GetSwapchainImagesKHR  =  PFN_vkGetSwapchainImagesKHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkGetSwapchainImagesKHR " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_functions . AcquireNextImageKHR  =  PFN_vkAcquireNextImageKHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkAcquireNextImageKHR " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_functions . QueuePresentKHR  =  PFN_vkQueuePresentKHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkQueuePresentKHR " ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . CreateRenderPass2KHR  =  PFN_vkCreateRenderPass2KHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkCreateRenderPass2KHR " ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											device_functions . EndRenderPass2KHR  =  PFN_vkCmdEndRenderPass2KHR ( functions . GetDeviceProcAddr ( vk_device ,  " vkCmdEndRenderPass2KHR " ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Debug marker extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_EXT_DEBUG_MARKER_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . CmdDebugMarkerBeginEXT  =  ( PFN_vkCmdDebugMarkerBeginEXT ) functions . GetDeviceProcAddr ( vk_device ,  " vkCmdDebugMarkerBeginEXT " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . CmdDebugMarkerEndEXT  =  ( PFN_vkCmdDebugMarkerEndEXT ) functions . GetDeviceProcAddr ( vk_device ,  " vkCmdDebugMarkerEndEXT " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . CmdDebugMarkerInsertEXT  =  ( PFN_vkCmdDebugMarkerInsertEXT ) functions . GetDeviceProcAddr ( vk_device ,  " vkCmdDebugMarkerInsertEXT " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . DebugMarkerSetObjectNameEXT  =  ( PFN_vkDebugMarkerSetObjectNameEXT ) functions . GetDeviceProcAddr ( vk_device ,  " vkDebugMarkerSetObjectNameEXT " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Debug device fault extension.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( device_fault_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											device_functions . GetDeviceFaultInfoEXT  =  ( PFN_vkGetDeviceFaultInfoEXT ) functions . GetDeviceProcAddr ( vk_device ,  " vkGetDeviceFaultInfoEXT " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _initialize_allocator ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocatorCreateInfo  allocator_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									allocator_info . physicalDevice  =  physical_device ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									allocator_info . device  =  vk_device ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									allocator_info . instance  =  context_driver - > instance_get ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-16 10:05:16 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  bool  use_1_3_features  =  physical_device_properties . apiVersion  > =  VK_API_VERSION_1_3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( use_1_3_features )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										allocator_info . flags  | =  VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( buffer_device_address_support )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										allocator_info . flags  | =  VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vmaCreateAllocator ( & allocator_info ,  & allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  ERR_CANT_CREATE ,  " vmaCreateAllocator failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : _initialize_pipeline_cache ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipelines_cache . buffer . resize ( sizeof ( PipelineCacheHeader ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									PipelineCacheHeader  * header  =  ( PipelineCacheHeader  * ) ( pipelines_cache . buffer . ptrw ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* header  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > magic  =  868  +  VK_PIPELINE_CACHE_HEADER_VERSION_ONE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > device_id  =  physical_device_properties . deviceID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > vendor_id  =  physical_device_properties . vendorID ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > driver_version  =  physical_device_properties . driverVersion ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									memcpy ( header - > uuid ,  physical_device_properties . pipelineCacheUUID ,  VK_UUID_SIZE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > driver_abi  =  sizeof ( void  * ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_cache_id  =  String : : hex_encode_buffer ( physical_device_properties . pipelineCacheUUID ,  VK_UUID_SIZE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_cache_id  + =  " -driver- "  +  itos ( physical_device_properties . driverVersion ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _convert_subpass_attachments ( const  VkAttachmentReference2  * p_attachment_references_2 ,  uint32_t  p_attachment_references_count ,  TightLocalVector < VkAttachmentReference >  & r_attachment_references )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_attachment_references . resize ( p_attachment_references_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_attachment_references_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Ignore sType, pNext and aspectMask (which is currently unused).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_attachment_references [ i ] . attachment  =  p_attachment_references_2 [ i ] . attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_attachment_references [ i ] . layout  =  p_attachment_references_2 [ i ] . layout ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VkResult  RenderingDeviceDriverVulkan : : _create_render_pass ( VkDevice  p_device ,  const  VkRenderPassCreateInfo2  * p_create_info ,  const  VkAllocationCallbacks  * p_allocator ,  VkRenderPass  * p_render_pass )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( device_functions . CreateRenderPass2KHR  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  device_functions . CreateRenderPass2KHR ( p_device ,  p_create_info ,  p_allocator ,  p_render_pass ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Compatibility fallback with regular create render pass but by converting the inputs from the newer version to the older one.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < VkAttachmentDescription >  attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										attachments . resize ( p_create_info - > attachmentCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_create_info - > attachmentCount ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Ignores sType and pNext from the attachment.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkAttachmentDescription2  & src  =  p_create_info - > pAttachments [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkAttachmentDescription  & dst  =  attachments [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . flags  =  src . flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . format  =  src . format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . samples  =  src . samples ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . loadOp  =  src . loadOp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . storeOp  =  src . storeOp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . stencilLoadOp  =  src . stencilLoadOp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . stencilStoreOp  =  src . stencilStoreOp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . initialLayout  =  src . initialLayout ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst . finalLayout  =  src . finalLayout ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  uint32_t  attachment_vectors_per_subpass  =  4 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < TightLocalVector < VkAttachmentReference > >  subpasses_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < VkSubpassDescription >  subpasses ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subpasses_attachments . resize ( p_create_info - > subpassCount  *  attachment_vectors_per_subpass ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										subpasses . resize ( p_create_info - > subpassCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_create_info - > subpassCount ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  vector_base_index  =  i  *  attachment_vectors_per_subpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  input_attachments_index  =  vector_base_index  +  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  color_attachments_index  =  vector_base_index  +  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  resolve_attachments_index  =  vector_base_index  +  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  depth_attachment_index  =  vector_base_index  +  3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_convert_subpass_attachments ( p_create_info - > pSubpasses [ i ] . pInputAttachments ,  p_create_info - > pSubpasses [ i ] . inputAttachmentCount ,  subpasses_attachments [ input_attachments_index ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_convert_subpass_attachments ( p_create_info - > pSubpasses [ i ] . pColorAttachments ,  p_create_info - > pSubpasses [ i ] . colorAttachmentCount ,  subpasses_attachments [ color_attachments_index ] ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-25 14:03:02 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_convert_subpass_attachments ( p_create_info - > pSubpasses [ i ] . pResolveAttachments ,  ( p_create_info - > pSubpasses [ i ] . pResolveAttachments  ! =  nullptr )  ?  p_create_info - > pSubpasses [ i ] . colorAttachmentCount  :  0 ,  subpasses_attachments [ resolve_attachments_index ] ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_convert_subpass_attachments ( p_create_info - > pSubpasses [ i ] . pDepthStencilAttachment ,  ( p_create_info - > pSubpasses [ i ] . pDepthStencilAttachment  ! =  nullptr )  ?  1  :  0 ,  subpasses_attachments [ depth_attachment_index ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Ignores sType and pNext from the subpass.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkSubpassDescription2  & src_subpass  =  p_create_info - > pSubpasses [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSubpassDescription  & dst_subpass  =  subpasses [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . flags  =  src_subpass . flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pipelineBindPoint  =  src_subpass . pipelineBindPoint ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . inputAttachmentCount  =  src_subpass . inputAttachmentCount ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pInputAttachments  =  subpasses_attachments [ input_attachments_index ] . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . colorAttachmentCount  =  src_subpass . colorAttachmentCount ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pColorAttachments  =  subpasses_attachments [ color_attachments_index ] . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pResolveAttachments  =  subpasses_attachments [ resolve_attachments_index ] . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pDepthStencilAttachment  =  subpasses_attachments [ depth_attachment_index ] . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . preserveAttachmentCount  =  src_subpass . preserveAttachmentCount ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_subpass . pPreserveAttachments  =  src_subpass . pPreserveAttachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										TightLocalVector < VkSubpassDependency >  dependencies ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										dependencies . resize ( p_create_info - > dependencyCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_create_info - > dependencyCount ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Ignores sType and pNext from the dependency, and viewMask which is currently unused.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkSubpassDependency2  & src_dependency  =  p_create_info - > pDependencies [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSubpassDependency  & dst_dependency  =  dependencies [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . srcSubpass  =  src_dependency . srcSubpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . dstSubpass  =  src_dependency . dstSubpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . srcStageMask  =  src_dependency . srcStageMask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . dstStageMask  =  src_dependency . dstStageMask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . srcAccessMask  =  src_dependency . srcAccessMask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . dstAccessMask  =  src_dependency . dstAccessMask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											dst_dependency . dependencyFlags  =  src_dependency . dependencyFlags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkRenderPassCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pNext  =  p_create_info - > pNext ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . flags  =  p_create_info - > flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . attachmentCount  =  attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pAttachments  =  attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . subpassCount  =  subpasses . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pSubpasses  =  subpasses . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . dependencyCount  =  dependencies . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pDependencies  =  dependencies . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  vkCreateRenderPass ( vk_device ,  & create_info ,  p_allocator ,  p_render_pass ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : _release_image_semaphore ( CommandQueue  * p_command_queue ,  uint32_t  p_semaphore_index ,  bool  p_release_on_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  p_command_queue - > image_semaphores_swap_chains [ p_semaphore_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swap_chain  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Clear the swap chain from the command queue's vector.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_command_queue - > image_semaphores_swap_chains [ p_semaphore_index ]  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_release_on_swap_chain )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Remove the acquired semaphore from the swap chain's vectors.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  swap_chain - > command_queues_acquired . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( swap_chain - > command_queues_acquired [ i ]  = =  p_command_queue  & &  swap_chain - > command_queues_acquired_semaphores [ i ]  = =  p_semaphore_index )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													swap_chain - > command_queues_acquired . remove_at ( i ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													swap_chain - > command_queues_acquired_semaphores . remove_at ( i ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : _recreate_image_semaphore ( CommandQueue  * p_command_queue ,  uint32_t  p_semaphore_index ,  bool  p_release_on_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_release_image_semaphore ( p_command_queue ,  p_semaphore_index ,  p_release_on_swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSemaphore  semaphore ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSemaphoreCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateSemaphore ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ,  & semaphore ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Indicate the semaphore is free again and destroy the previous one before storing the new one.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroySemaphore ( vk_device ,  p_command_queue - > image_semaphores [ p_semaphore_index ] ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									p_command_queue - > image_semaphores [ p_semaphore_index ]  =  semaphore ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									p_command_queue - > free_image_semaphores . push_back ( p_semaphore_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								// Debug marker extensions.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VkDebugReportObjectTypeEXT  RenderingDeviceDriverVulkan : : _convert_to_debug_report_objectType ( VkObjectType  p_object_type )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_object_type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_UNKNOWN : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_INSTANCE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_PHYSICAL_DEVICE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DEVICE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_QUEUE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SEMAPHORE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_COMMAND_BUFFER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_FENCE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DEVICE_MEMORY : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_BUFFER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_IMAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_EVENT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_QUERY_POOL : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_BUFFER_VIEW : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_IMAGE_VIEW : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SHADER_MODULE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_PIPELINE_CACHE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_PIPELINE_LAYOUT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_RENDER_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_PIPELINE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SAMPLER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DESCRIPTOR_POOL : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DESCRIPTOR_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_FRAMEBUFFER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_COMMAND_POOL : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SURFACE_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_SWAPCHAIN_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DISPLAY_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DISPLAY_MODE_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_CU_MODULE_NVX : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_CU_MODULE_NVX_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_CU_FUNCTION_NVX : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_CU_FUNCTION_NVX_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_VALIDATION_CACHE_EXT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : _set_object_name ( VkObjectType  p_object_type ,  uint64_t  p_object_handle ,  String  p_object_name )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( functions . SetDebugUtilsObjectNameEXT  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										CharString  obj_data  =  p_object_name . utf8 ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkDebugUtilsObjectNameInfoEXT  name_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . sType  =  VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . objectType  =  p_object_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . objectHandle  =  p_object_handle ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . pObjectName  =  obj_data . get_data ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										functions . SetDebugUtilsObjectNameEXT ( vk_device ,  & name_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									}  else  if  ( functions . DebugMarkerSetObjectNameEXT  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Debug marker extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										CharString  obj_data  =  p_object_name . utf8 ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkDebugMarkerObjectNameInfoEXT  name_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . sType  =  VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . objectType  =  _convert_to_debug_report_objectType ( p_object_type ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . object  =  p_object_handle ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										name_info . pObjectName  =  obj_data . get_data ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										functions . DebugMarkerSetObjectNameEXT ( vk_device ,  & name_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : initialize ( uint32_t  p_device_index ,  uint32_t  p_frame_count )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									context_device  =  context_driver - > device_get ( p_device_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									physical_device  =  context_driver - > physical_device_get ( p_device_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkGetPhysicalDeviceProperties ( physical_device ,  & physical_device_properties ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Workaround a driver bug on Adreno 730 GPUs that keeps leaking memory on each call to vkResetDescriptorPool.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-12 12:47:08 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Which eventually run out of memory. In such case we should not be using linear allocated pools
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bug introduced in driver 512.597.0 and fixed in 512.671.0.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Confirmed by Qualcomm.
 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( linear_descriptor_pools_enabled )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  uint32_t  reset_descriptor_pool_broken_driver_begin  =  VK_MAKE_VERSION ( 512u ,  597u ,  0u ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  uint32_t  reset_descriptor_pool_fixed_driver_begin  =  VK_MAKE_VERSION ( 512u ,  671u ,  0u ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										linear_descriptor_pools_enabled  =  physical_device_properties . driverVersion  <  reset_descriptor_pool_broken_driver_begin  | |  physical_device_properties . driverVersion  >  reset_descriptor_pool_fixed_driver_begin ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									frame_count  =  p_frame_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Copy the queue family properties the context already retrieved.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  queue_family_count  =  context_driver - > queue_family_get_count ( p_device_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									queue_family_properties . resize ( queue_family_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  queue_family_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										queue_family_properties [ i ]  =  context_driver - > queue_family_get ( p_device_index ,  i ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Error  err  =  _initialize_device_extensions ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _check_device_features ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _check_device_capabilities ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									LocalVector < VkDeviceQueueCreateInfo >  queue_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _add_queue_create_info ( queue_create_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _initialize_device ( queue_create_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _initialize_allocator ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _initialize_pipeline_cache ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  OK ,  err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									max_descriptor_sets_per_pool  =  GLOBAL_GET ( " rendering/rendering_device/vulkan/max_descriptors_per_pool " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									breadcrumb_buffer  =  buffer_create ( 2u  *  sizeof ( uint32_t )  *  BREADCRUMB_BUFFER_ENTRIES ,  BufferUsageBits : : BUFFER_USAGE_TRANSFER_TO_BIT ,  MemoryAllocationType : : MEMORY_ALLOCATION_TYPE_CPU ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swappy_frame_pacer_enable  =  GLOBAL_GET ( " display/window/frame_pacing/android/enable_frame_pacing " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swappy_mode  =  GLOBAL_GET ( " display/window/frame_pacing/android/swappy_mode " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( VulkanHooks : : get_singleton ( )  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Hooks control device creation & possibly presentation
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// (e.g. OpenXR) thus it's too risky to use Swappy.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										swappy_frame_pacer_enable  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										OS : : get_singleton ( ) - > print ( " VulkanHooks detected (e.g. OpenXR): Force-disabling Swappy Frame Pacing. \n " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-08-27 11:58:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									shader_container_format . set_debug_info_enabled ( Engine : : get_singleton ( ) - > is_generate_spirv_debug_info_enabled ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** MEMORY ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  const  uint32_t  SMALL_ALLOCATION_MAX_SIZE  =  4096 ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VmaPool  RenderingDeviceDriverVulkan : : _find_or_create_small_allocs_pool ( uint32_t  p_mem_type_index )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( small_allocs_pools . has ( p_mem_type_index ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  small_allocs_pools [ p_mem_type_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_verbose ( " Creating VMA small objects pool for memory type index  "  +  itos ( p_mem_type_index ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaPoolCreateInfo  pci  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . memoryTypeIndex  =  p_mem_type_index ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . blockSize  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . minBlockCount  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . maxBlockCount  =  SIZE_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . priority  =  0.5f ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . minAllocationAlignment  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pci . pMemoryAllocateNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaPool  pool  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  res  =  vmaCreatePool ( allocator ,  & pci ,  & pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									small_allocs_pools [ p_mem_type_index ]  =  pool ;  // Don't try to create it again if failed the first time.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( res ,  pool ,  " vmaCreatePool failed with error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** BUFFERS ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::BufferUsageBits == VkBufferUsageFlagBits.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_TRANSFER_FROM_BIT ,  VK_BUFFER_USAGE_TRANSFER_SRC_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_TRANSFER_TO_BIT ,  VK_BUFFER_USAGE_TRANSFER_DST_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_TEXEL_BIT ,  VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_UNIFORM_BIT ,  VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_STORAGE_BIT ,  VK_BUFFER_USAGE_STORAGE_BUFFER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_INDEX_BIT ,  VK_BUFFER_USAGE_INDEX_BUFFER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_VERTEX_BIT ,  VK_BUFFER_USAGE_VERTEX_BUFFER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_INDIRECT_BIT ,  VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT ) ) ;  
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BUFFER_USAGE_DEVICE_ADDRESS_BIT ,  VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT ) ) ;  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : BufferID  RenderingDeviceDriverVulkan : : buffer_create ( uint64_t  p_size ,  BitField < BufferUsageBits >  p_usage ,  MemoryAllocationType  p_allocation_type )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . size  =  p_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . usage  =  p_usage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sharingMode  =  VK_SHARING_MODE_EXCLUSIVE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VmaMemoryUsage  vma_usage  =  VMA_MEMORY_USAGE_UNKNOWN ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  vma_flags_to_remove  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VmaAllocationCreateInfo  alloc_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_allocation_type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  MEMORY_ALLOCATION_TYPE_CPU :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  is_src  =  p_usage . has_flag ( BUFFER_USAGE_TRANSFER_FROM_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  is_dst  =  p_usage . has_flag ( BUFFER_USAGE_TRANSFER_TO_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( is_src  & &  ! is_dst )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												alloc_create_info . flags  =  VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-24 00:08:29 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												alloc_create_info . preferredFlags  | =  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vma_flags_to_remove  | =  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( is_dst  & &  ! is_src )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												alloc_create_info . flags  =  VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-24 00:08:29 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												alloc_create_info . preferredFlags  | =  VK_MEMORY_PROPERTY_HOST_CACHED_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vma_flags_to_remove  | =  VK_MEMORY_PROPERTY_HOST_CACHED_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vma_usage  =  VMA_MEMORY_USAGE_AUTO_PREFER_HOST ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											alloc_create_info . requiredFlags  =  ( VK_MEMORY_PROPERTY_HOST_COHERENT_BIT  |  VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  MEMORY_ALLOCATION_TYPE_GPU :  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vma_usage  =  VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-03-06 19:56:52 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												// We must set it right now or else vmaFindMemoryTypeIndexForBufferInfo will use wrong parameters.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												alloc_create_info . usage  =  vma_usage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											alloc_create_info . preferredFlags  =  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											if  ( p_size  < =  SMALL_ALLOCATION_MAX_SIZE )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												uint32_t  mem_type_index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vmaFindMemoryTypeIndexForBufferInfo ( allocator ,  & create_info ,  & alloc_create_info ,  & mem_type_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												alloc_create_info . pool  =  _find_or_create_small_allocs_pool ( mem_type_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBuffer  vk_buffer  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocation  allocation  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocationInfo  alloc_info  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										alloc_create_info . preferredFlags  & =  ~ vma_flags_to_remove ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										alloc_create_info . usage  =  vma_usage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  err  =  vmaCreateBuffer ( allocator ,  & create_info ,  & alloc_create_info ,  & vk_buffer ,  & allocation ,  & alloc_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  BufferID ( ) ,  " Can't create buffer of size:  "  +  itos ( p_size )  +  " , error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  err  =  vkCreateBuffer ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_BUFFER ) ,  & vk_buffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  BufferID ( ) ,  " Can't create buffer of size:  "  +  itos ( p_size )  +  " , error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vmaAllocateMemoryForBuffer ( allocator ,  vk_buffer ,  & alloc_create_info ,  & allocation ,  & alloc_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  BufferID ( ) ,  " Can't allocate memory for buffer of size:  "  +  itos ( p_size )  +  " , error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vmaBindBufferMemory2 ( allocator ,  allocation ,  0 ,  vk_buffer ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  BufferID ( ) ,  " Can't bind memory to buffer of size:  "  +  itos ( p_size )  +  " , error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									BufferInfo  * buf_info  =  VersatileResource : : allocate < BufferInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									buf_info - > vk_buffer  =  vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									buf_info - > allocation . handle  =  allocation ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									buf_info - > allocation . size  =  alloc_info . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									buf_info - > size  =  p_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  BufferID ( buf_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : buffer_set_texel_format ( BufferID  p_buffer ,  DataFormat  p_format )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									BufferInfo  * buf_info  =  ( BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( ! buf_info - > vk_view ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferViewCreateInfo  view_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . sType  =  VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . format  =  RD_TO_VK_FORMAT [ p_format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . range  =  buf_info - > allocation . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  res  =  vkCreateBufferView ( vk_device ,  & view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_BUFFER_VIEW ) ,  & buf_info - > vk_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( res ,  false ,  " Unable to create buffer view, error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : buffer_free ( BufferID  p_buffer )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									BufferInfo  * buf_info  =  ( BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( buf_info - > vk_view )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroyBufferView ( vk_device ,  buf_info - > vk_view ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_BUFFER_VIEW ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vmaDestroyBuffer ( allocator ,  buf_info - > vk_buffer ,  buf_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkDestroyBuffer ( vk_device ,  buf_info - > vk_buffer ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_BUFFER ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vmaFreeMemory ( allocator ,  buf_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free ( resources_allocator ,  buf_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : buffer_get_allocation_size ( BufferID  p_buffer )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  buf_info - > allocation . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint8_t  * RenderingDeviceDriverVulkan : : buffer_map ( BufferID  p_buffer )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  * data_ptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vmaMapMemory ( allocator ,  buf_info - > allocation . handle ,  & data_ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  nullptr ,  " vmaMapMemory failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  ( uint8_t  * ) data_ptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : buffer_unmap ( BufferID  p_buffer )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaUnmapMemory ( allocator ,  buf_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : buffer_get_device_address ( BufferID  p_buffer )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferDeviceAddressInfo  address_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									address_info . sType  =  VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									address_info . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									address_info . buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  vkGetBufferDeviceAddress ( vk_device ,  & address_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** TEXTURE ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  const  VkImageType  RD_TEX_TYPE_TO_VK_IMG_TYPE [ RDD : : TEXTURE_TYPE_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_1D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_2D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_3D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_2D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_1D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_2D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_IMAGE_TYPE_2D , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  const  VkSampleCountFlagBits  RD_TO_VK_SAMPLE_COUNT [ RDD : : TEXTURE_SAMPLES_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_1_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_2_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_4_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_8_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_16_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_32_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SAMPLE_COUNT_64_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::TextureType == VkImageViewType.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_1D ,  VK_IMAGE_VIEW_TYPE_1D ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_2D ,  VK_IMAGE_VIEW_TYPE_2D ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_3D ,  VK_IMAGE_VIEW_TYPE_3D ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_CUBE ,  VK_IMAGE_VIEW_TYPE_CUBE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_1D_ARRAY ,  VK_IMAGE_VIEW_TYPE_1D_ARRAY ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_2D_ARRAY ,  VK_IMAGE_VIEW_TYPE_2D_ARRAY ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_TYPE_CUBE_ARRAY ,  VK_IMAGE_VIEW_TYPE_CUBE_ARRAY ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::TextureSwizzle == VkComponentSwizzle.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_IDENTITY ,  VK_COMPONENT_SWIZZLE_IDENTITY ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_ZERO ,  VK_COMPONENT_SWIZZLE_ZERO ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_ONE ,  VK_COMPONENT_SWIZZLE_ONE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_R ,  VK_COMPONENT_SWIZZLE_R ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_G ,  VK_COMPONENT_SWIZZLE_G ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_B ,  VK_COMPONENT_SWIZZLE_B ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_SWIZZLE_A ,  VK_COMPONENT_SWIZZLE_A ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::TextureAspectBits == VkImageAspectFlagBits.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_ASPECT_COLOR_BIT ,  VK_IMAGE_ASPECT_COLOR_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_ASPECT_DEPTH_BIT ,  VK_IMAGE_ASPECT_DEPTH_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : TEXTURE_ASPECT_STENCIL_BIT ,  VK_IMAGE_ASPECT_STENCIL_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								VkSampleCountFlagBits  RenderingDeviceDriverVulkan : : _ensure_supported_sample_count ( TextureSamples  p_requested_sample_count )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkSampleCountFlags  sample_count_flags  =  ( physical_device_properties . limits . framebufferColorSampleCounts  &  physical_device_properties . limits . framebufferDepthSampleCounts ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( sample_count_flags  &  RD_TO_VK_SAMPLE_COUNT [ p_requested_sample_count ] ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// The requested sample count is supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  RD_TO_VK_SAMPLE_COUNT [ p_requested_sample_count ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Find the closest lower supported sample count.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSampleCountFlagBits  sample_count  =  RD_TO_VK_SAMPLE_COUNT [ p_requested_sample_count ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										while  ( sample_count  >  VK_SAMPLE_COUNT_1_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( sample_count_flags  &  sample_count )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  sample_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											sample_count  =  ( VkSampleCountFlagBits ) ( sample_count  > >  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  VK_SAMPLE_COUNT_1_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : TextureID  RenderingDeviceDriverVulkan : : texture_create ( const  TextureFormat  & p_format ,  const  TextureView  & p_view )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_format . shareable_formats . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . flags  | =  VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( enabled_device_extension_names . has ( VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											VkFormat  * vk_allowed_formats  =  ALLOCA_ARRAY ( VkFormat ,  p_format . shareable_formats . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( int  i  =  0 ;  i  <  p_format . shareable_formats . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_allowed_formats [ i ]  =  RD_TO_VK_FORMAT [ p_format . shareable_formats [ i ] ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkImageFormatListCreateInfoKHR  * format_list_create_info  =  ALLOCA_SINGLE ( VkImageFormatListCreateInfoKHR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* format_list_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											format_list_create_info - > sType  =  VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											format_list_create_info - > viewFormatCount  =  p_format . shareable_formats . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											format_list_create_info - > pViewFormats  =  vk_allowed_formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											create_info . pNext  =  format_list_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_format . texture_type  = =  TEXTURE_TYPE_CUBE  | |  p_format . texture_type  = =  TEXTURE_TYPE_CUBE_ARRAY )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . flags  | =  VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									/*if (p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) {
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . flags  | =  VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} */ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . imageType  =  RD_TEX_TYPE_TO_VK_IMG_TYPE [ p_format . texture_type ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . format  =  RD_TO_VK_FORMAT [ p_format . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . extent . width  =  p_format . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . extent . height  =  p_format . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . extent . depth  =  p_format . depth ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . mipLevels  =  p_format . mipmaps ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . arrayLayers  =  p_format . array_layers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . samples  =  _ensure_supported_sample_count ( p_format . samples ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . tiling  =  ( p_format . usage_bits  &  TEXTURE_USAGE_CPU_READ_BIT )  ?  VK_IMAGE_TILING_LINEAR  :  VK_IMAGE_TILING_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Usage.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_SAMPLING_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_SAMPLED_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_STORAGE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_STORAGE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_COLOR_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_INPUT_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_VRS_ATTACHMENT_BIT )  & &  ( p_format . usage_bits  &  TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_VRS_ATTACHMENT_BIT )  & &  ( p_format . usage_bits  &  TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_CAN_UPDATE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_TRANSFER_DST_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_CAN_COPY_FROM_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_TRANSFER_SRC_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_CAN_COPY_TO_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . usage  | =  VK_IMAGE_USAGE_TRANSFER_DST_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sharingMode  =  VK_SHARING_MODE_EXCLUSIVE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . initialLayout  =  VK_IMAGE_LAYOUT_UNDEFINED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Allocate memory.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  width  =  0 ,  height  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  image_size  =  get_image_format_required_size ( p_format . format ,  p_format . width ,  p_format . height ,  p_format . depth ,  p_format . mipmaps ,  & width ,  & height ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocationCreateInfo  alloc_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									alloc_create_info . flags  =  ( p_format . usage_bits  &  TEXTURE_USAGE_CPU_READ_BIT )  ?  VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT  :  0 ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_format . usage_bits  &  TEXTURE_USAGE_TRANSIENT_BIT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  memory_type_index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VmaAllocationCreateInfo  lazy_memory_requirements  =  alloc_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										lazy_memory_requirements . usage  =  VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  result  =  vmaFindMemoryTypeIndex ( allocator ,  UINT32_MAX ,  & lazy_memory_requirements ,  & memory_type_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( VK_SUCCESS  = =  result )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											alloc_create_info  =  lazy_memory_requirements ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											create_info . usage  | =  VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// VUID-VkImageCreateInfo-usage-00963 :
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// If usage includes VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// then bits other than VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-12 12:47:08 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											// and VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT must not be set.
 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											create_info . usage  & =  ( VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT  |  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT  |  VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT  |  VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											alloc_create_info . preferredFlags  =  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										alloc_create_info . preferredFlags  =  VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( image_size  < =  SMALL_ALLOCATION_MAX_SIZE )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  mem_type_index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vmaFindMemoryTypeIndexForImageInfo ( allocator ,  & create_info ,  & alloc_create_info ,  & mem_type_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										alloc_create_info . pool  =  _find_or_create_small_allocs_pool ( mem_type_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Create.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImage  vk_image  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocation  allocation  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaAllocationInfo  alloc_info  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										alloc_create_info . usage  =  VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  err  =  vmaCreateImage ( allocator ,  & create_info ,  & alloc_create_info ,  & vk_image ,  & allocation ,  & alloc_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vmaCreateImage failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  err  =  vkCreateImage ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE ) ,  & vk_image ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vkCreateImage failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vmaAllocateMemoryForImage ( allocator ,  vk_image ,  & alloc_create_info ,  & allocation ,  & alloc_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " Can't allocate memory for image, error:  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vmaBindImageMemory2 ( allocator ,  allocation ,  0 ,  vk_image ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " Can't bind memory to image, error:  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Create view.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageViewCreateInfo  image_view_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . sType  =  VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . image  =  vk_image ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . viewType  =  ( VkImageViewType ) p_format . texture_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . format  =  RD_TO_VK_FORMAT [ p_view . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . r  =  ( VkComponentSwizzle ) p_view . swizzle_r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . g  =  ( VkComponentSwizzle ) p_view . swizzle_g ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . b  =  ( VkComponentSwizzle ) p_view . swizzle_b ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . a  =  ( VkComponentSwizzle ) p_view . swizzle_a ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . levelCount  =  create_info . mipLevels ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . layerCount  =  create_info . arrayLayers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( p_format . usage_bits  &  TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_view_create_info . subresourceRange . aspectMask  =  VK_IMAGE_ASPECT_DEPTH_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_view_create_info . subresourceRange . aspectMask  =  VK_IMAGE_ASPECT_COLOR_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-04-28 13:38:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkImageViewASTCDecodeModeEXT  decode_mode ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( enabled_device_extension_names . has ( VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( image_view_create_info . format  > =  VK_FORMAT_ASTC_4x4_UNORM_BLOCK  & &  image_view_create_info . format  < =  VK_FORMAT_ASTC_12x12_SRGB_BLOCK )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decode_mode . sType  =  VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decode_mode . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decode_mode . decodeMode  =  VK_FORMAT_R8G8B8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . pNext  =  & decode_mode ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkImageView  vk_image_view  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateImageView ( vk_device ,  & image_view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ,  & vk_image_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( err )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vmaDestroyImage ( allocator ,  vk_image ,  allocation ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vkDestroyImage ( vk_device ,  vk_image ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vmaFreeMemory ( allocator ,  allocation ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vkCreateImageView failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureInfo  * tex_info  =  VersatileResource : : allocate < TextureInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									tex_info - > vk_image  =  vk_image ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view  =  vk_image_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > rd_format  =  p_format . format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_create_info  =  create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view_create_info  =  image_view_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > allocation . handle  =  allocation ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > transient  =  ( p_format . usage_bits  &  TEXTURE_USAGE_TRANSIENT_BIT )  ! =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									vmaGetAllocationInfo ( allocator ,  tex_info - > allocation . handle ,  & tex_info - > allocation . info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCreateImageView: 0x%uX for 0x%uX " ,  uint64_t ( vk_image_view ) ,  uint64_t ( vk_image ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									return  TextureID ( tex_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-04-19 17:57:28 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : TextureID  RenderingDeviceDriverVulkan : : texture_create_from_extension ( uint64_t  p_native_texture ,  TextureType  p_type ,  DataFormat  p_format ,  uint32_t  p_array_layers ,  bool  p_depth_stencil ,  uint32_t  p_mipmaps )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkImage  vk_image  =  ( VkImage ) p_native_texture ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// We only need to create a view into the already existing natively-provided texture.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageViewCreateInfo  image_view_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . sType  =  VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . image  =  vk_image ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . viewType  =  ( VkImageViewType ) p_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . format  =  RD_TO_VK_FORMAT [ p_format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . r  =  VK_COMPONENT_SWIZZLE_R ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . g  =  VK_COMPONENT_SWIZZLE_G ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . b  =  VK_COMPONENT_SWIZZLE_B ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . a  =  VK_COMPONENT_SWIZZLE_A ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-04-19 17:57:28 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . baseMipLevel  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . levelCount  =  p_mipmaps ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . layerCount  =  p_array_layers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . aspectMask  =  p_depth_stencil  ?  VK_IMAGE_ASPECT_DEPTH_BIT  :  VK_IMAGE_ASPECT_COLOR_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageView  vk_image_view  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateImageView ( vk_device ,  & image_view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ,  & vk_image_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( err )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vkCreateImageView failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureInfo  * tex_info  =  VersatileResource : : allocate < TextureInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view  =  vk_image_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > rd_format  =  p_format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view_create_info  =  image_view_create_info ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-09-11 02:25:12 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > created_from_extension  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									return  TextureID ( tex_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : TextureID  RenderingDeviceDriverVulkan : : texture_create_shared ( TextureID  p_original_texture ,  const  TextureView  & p_view )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * owner_tex_info  =  ( const  TextureInfo  * ) p_original_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
									
										
										
										
											2024-09-11 02:25:12 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( ! owner_tex_info - > allocation . handle  & &  ! owner_tex_info - > created_from_extension ,  TextureID ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageViewCreateInfo  image_view_create_info  =  owner_tex_info - > vk_view_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . format  =  RD_TO_VK_FORMAT [ p_view . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . r  =  ( VkComponentSwizzle ) p_view . swizzle_r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . g  =  ( VkComponentSwizzle ) p_view . swizzle_g ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . b  =  ( VkComponentSwizzle ) p_view . swizzle_b ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . a  =  ( VkComponentSwizzle ) p_view . swizzle_a ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( enabled_device_extension_names . has ( VK_KHR_MAINTENANCE_2_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										// May need to make VK_KHR_maintenance2 mandatory and thus has Vulkan 1.1 be our minimum supported version
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// if we require setting this information. Vulkan 1.0 may simply not care.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( image_view_create_info . format  ! =  owner_tex_info - > vk_view_create_info . format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkImageViewUsageCreateInfo  * usage_info  =  ALLOCA_SINGLE ( VkImageViewUsageCreateInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* usage_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											usage_info - > sType  =  VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											usage_info - > usage  =  owner_tex_info - > vk_create_info . usage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Certain features may not be available for the format of the view.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkFormatProperties  properties  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vkGetPhysicalDeviceFormatProperties ( physical_device ,  RD_TO_VK_FORMAT [ p_view . format ] ,  & properties ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												const  VkFormatFeatureFlags  & supported_flags  =  owner_tex_info - > vk_create_info . tiling  = =  VK_IMAGE_TILING_LINEAR  ?  properties . linearTilingFeatures  :  properties . optimalTilingFeatures ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( ( usage_info - > usage  &  VK_IMAGE_USAGE_STORAGE_BIT )  & &  ! ( supported_flags  &  VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-13 19:03:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													usage_info - > usage  & =  ~ uint32_t ( VK_IMAGE_USAGE_STORAGE_BIT ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( ( usage_info - > usage  &  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT )  & &  ! ( supported_flags  &  VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-13 19:03:37 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													usage_info - > usage  & =  ~ uint32_t ( VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . pNext  =  usage_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageView  new_vk_image_view  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateImageView ( vk_device ,  & image_view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ,  & new_vk_image_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vkCreateImageView failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureInfo  * tex_info  =  VersatileResource : : allocate < TextureInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* tex_info  =  * owner_tex_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view  =  new_vk_image_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view_create_info  =  image_view_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > allocation  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCreateImageView: 0x%uX for 0x%uX " ,  uint64_t ( new_vk_image_view ) ,  uint64_t ( owner_tex_info - > vk_view_create_info . image ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									return  TextureID ( tex_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : TextureID  RenderingDeviceDriverVulkan : : texture_create_shared_from_slice ( TextureID  p_original_texture ,  const  TextureView  & p_view ,  TextureSliceType  p_slice_type ,  uint32_t  p_layer ,  uint32_t  p_layers ,  uint32_t  p_mipmap ,  uint32_t  p_mipmaps )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * owner_tex_info  =  ( const  TextureInfo  * ) p_original_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
									
										
										
										
											2024-09-11 02:25:12 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( ! owner_tex_info - > allocation . handle  & &  ! owner_tex_info - > created_from_extension ,  TextureID ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageViewCreateInfo  image_view_create_info  =  owner_tex_info - > vk_view_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_slice_type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  TEXTURE_SLICE_2D :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . viewType  =  VK_IMAGE_VIEW_TYPE_2D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  TEXTURE_SLICE_3D :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . viewType  =  VK_IMAGE_VIEW_TYPE_3D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  TEXTURE_SLICE_CUBEMAP :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . viewType  =  VK_IMAGE_VIEW_TYPE_CUBE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  TEXTURE_SLICE_2D_ARRAY :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_view_create_info . viewType  =  VK_IMAGE_VIEW_TYPE_2D_ARRAY ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  TextureID ( nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . format  =  RD_TO_VK_FORMAT [ p_view . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . r  =  ( VkComponentSwizzle ) p_view . swizzle_r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . g  =  ( VkComponentSwizzle ) p_view . swizzle_g ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . b  =  ( VkComponentSwizzle ) p_view . swizzle_b ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . components . a  =  ( VkComponentSwizzle ) p_view . swizzle_a ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . baseMipLevel  =  p_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . levelCount  =  p_mipmaps ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . baseArrayLayer  =  p_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									image_view_create_info . subresourceRange . layerCount  =  p_layers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageView  new_vk_image_view  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateImageView ( vk_device ,  & image_view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ,  & new_vk_image_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  TextureID ( ) ,  " vkCreateImageView failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureInfo  * tex_info  =  VersatileResource : : allocate < TextureInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* tex_info  =  * owner_tex_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view  =  new_vk_image_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > vk_view_create_info  =  image_view_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tex_info - > allocation  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d) " ,  uint64_t ( new_vk_image_view ) ,  uint64_t ( owner_tex_info - > vk_view_create_info . image ) ,  p_mipmap ,  p_mipmaps ,  p_layer ,  p_layers ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									return  TextureID ( tex_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : texture_free ( TextureID  p_texture )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TextureInfo  * tex_info  =  ( TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyImageView ( vk_device ,  tex_info - > vk_view ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( tex_info - > allocation . handle )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-13 14:58:48 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( ! Engine : : get_singleton ( ) - > is_extra_gpu_memory_tracking_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vmaDestroyImage ( allocator ,  tex_info - > vk_view_create_info . image ,  tex_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vkDestroyImage ( vk_device ,  tex_info - > vk_image ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_BUFFER ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vmaFreeMemory ( allocator ,  tex_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free ( resources_allocator ,  tex_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : texture_get_allocation_size ( TextureID  p_texture )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  tex_info - > allocation . info . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : texture_get_copyable_layout ( TextureID  p_texture ,  const  TextureSubresource  & p_subresource ,  TextureCopyableLayout  * r_layout )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_layout  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( tex_info - > vk_create_info . tiling  = =  VK_IMAGE_TILING_LINEAR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkImageSubresource  vk_subres  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subres . aspectMask  =  ( VkImageAspectFlags ) ( 1  < <  p_subresource . aspect ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subres . arrayLayer  =  p_subresource . layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subres . mipLevel  =  p_subresource . mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSubresourceLayout  vk_layout  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkGetImageSubresourceLayout ( vk_device ,  tex_info - > vk_view_create_info . image ,  & vk_subres ,  & vk_layout ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > offset  =  vk_layout . offset ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > size  =  vk_layout . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > row_pitch  =  vk_layout . rowPitch ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > depth_pitch  =  vk_layout . depthPitch ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > layer_pitch  =  vk_layout . arrayPitch ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Tight.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  w  =  tex_info - > vk_create_info . extent . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  h  =  tex_info - > vk_create_info . extent . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  d  =  tex_info - > vk_create_info . extent . depth ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_subresource . mipmap  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											r_layout - > offset  =  get_image_format_required_size ( tex_info - > rd_format ,  w ,  h ,  d ,  p_subresource . mipmap ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_subresource . mipmap ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											w  =  MAX ( 1u ,  w  > >  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											h  =  MAX ( 1u ,  h  > >  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											d  =  MAX ( 1u ,  d  > >  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-06 00:05:15 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										uint32_t  bw  =  0 ,  bh  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										get_compressed_image_format_block_dimensions ( tex_info - > rd_format ,  bw ,  bh ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  sbw  =  0 ,  sbh  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > size  =  get_image_format_required_size ( tex_info - > rd_format ,  w ,  h ,  d ,  1 ,  & sbw ,  & sbh ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > row_pitch  =  r_layout - > size  /  ( ( sbh  /  bh )  *  d ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										r_layout - > depth_pitch  =  r_layout - > size  /  d ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_layout - > layer_pitch  =  r_layout - > size  /  tex_info - > vk_create_info . arrayLayers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint8_t  * RenderingDeviceDriverVulkan : : texture_map ( TextureID  p_texture ,  const  TextureSubresource  & p_subresource )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageSubresource  vk_subres  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_subres . aspectMask  =  ( VkImageAspectFlags ) ( 1  < <  p_subresource . aspect ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_subres . arrayLayer  =  p_subresource . layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_subres . mipLevel  =  p_subresource . mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSubresourceLayout  vk_layout  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkGetImageSubresourceLayout ( vk_device ,  tex_info - > vk_view_create_info . image ,  & vk_subres ,  & vk_layout ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  * data_ptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkMapMemory ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_device , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tex_info - > allocation . info . deviceMemory , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											tex_info - > allocation . info . offset  +  vk_layout . offset , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_layout . size , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											& data_ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaMapMemory ( allocator ,  tex_info - > allocation . handle ,  & data_ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  nullptr ,  " vkMapMemory failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  ( uint8_t  * ) data_ptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : texture_unmap ( TextureID  p_texture )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vmaUnmapMemory ( allocator ,  tex_info - > allocation . handle ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								BitField < RDD : : TextureUsageBits >  RenderingDeviceDriverVulkan : : texture_get_usages_supported_by_format ( DataFormat  p_format ,  bool  p_cpu_readable )  {  
						 
					
						
							
								
									
										
										
										
											2025-02-12 13:23:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_format  > =  DATA_FORMAT_ASTC_4x4_SFLOAT_BLOCK  & &  p_format  < =  DATA_FORMAT_ASTC_12x12_SFLOAT_BLOCK  & &  ! enabled_device_extension_names . has ( VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Formats that were introduced later with extensions must not reach vkGetPhysicalDeviceFormatProperties if the extension isn't available. This means it's not supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkFormatProperties  properties  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkGetPhysicalDeviceFormatProperties ( physical_device ,  RD_TO_VK_FORMAT [ p_format ] ,  & properties ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  VkFormatFeatureFlags  & flags  =  p_cpu_readable  ?  properties . linearTilingFeatures  :  properties . optimalTilingFeatures ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Everything supported by default makes an all-or-nothing check easier for the caller.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									BitField < RDD : : TextureUsageBits >  supported  =  INT64_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! ( flags  &  VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_SAMPLING_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! ( flags  &  VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_COLOR_ATTACHMENT_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! ( flags  &  VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! ( flags  &  VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_STORAGE_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! ( flags  &  VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_STORAGE_ATOMIC_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_format  ! =  DATA_FORMAT_R8_UINT  & &  p_format  ! =  DATA_FORMAT_R8G8_UNORM )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										supported . clear_flag ( TEXTURE_USAGE_VRS_ATTACHMENT_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : texture_can_make_shared_with_format ( TextureID  p_texture ,  DataFormat  p_format ,  bool  & r_raw_reinterpretation )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_raw_reinterpretation  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** SAMPLER ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::SamplerRepeatMode == VkSamplerAddressMode.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_REPEAT_MODE_REPEAT ,  VK_SAMPLER_ADDRESS_MODE_REPEAT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_REPEAT_MODE_MIRRORED_REPEAT ,  VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE ,  VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER ,  VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_REPEAT_MODE_MIRROR_CLAMP_TO_EDGE ,  VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::SamplerBorderColor == VkBorderColor.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ,  VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_INT_TRANSPARENT_BLACK ,  VK_BORDER_COLOR_INT_TRANSPARENT_BLACK ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_FLOAT_OPAQUE_BLACK ,  VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_INT_OPAQUE_BLACK ,  VK_BORDER_COLOR_INT_OPAQUE_BLACK ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_FLOAT_OPAQUE_WHITE ,  VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : SAMPLER_BORDER_COLOR_INT_OPAQUE_WHITE ,  VK_BORDER_COLOR_INT_OPAQUE_WHITE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : SamplerID  RenderingDeviceDriverVulkan : : sampler_create ( const  SamplerState  & p_state )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSamplerCreateInfo  sampler_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . sType  =  VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . magFilter  =  p_state . mag_filter  = =  SAMPLER_FILTER_LINEAR  ?  VK_FILTER_LINEAR  :  VK_FILTER_NEAREST ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . minFilter  =  p_state . min_filter  = =  SAMPLER_FILTER_LINEAR  ?  VK_FILTER_LINEAR  :  VK_FILTER_NEAREST ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . mipmapMode  =  p_state . mip_filter  = =  SAMPLER_FILTER_LINEAR  ?  VK_SAMPLER_MIPMAP_MODE_LINEAR  :  VK_SAMPLER_MIPMAP_MODE_NEAREST ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . addressModeU  =  ( VkSamplerAddressMode ) p_state . repeat_u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . addressModeV  =  ( VkSamplerAddressMode ) p_state . repeat_v ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . addressModeW  =  ( VkSamplerAddressMode ) p_state . repeat_w ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . mipLodBias  =  p_state . lod_bias ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									sampler_create_info . anisotropyEnable  =  p_state . use_anisotropy  & &  ( physical_device_features . samplerAnisotropy  = =  VK_TRUE ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									sampler_create_info . maxAnisotropy  =  p_state . anisotropy_max ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . compareEnable  =  p_state . enable_compare ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . compareOp  =  ( VkCompareOp ) p_state . compare_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . minLod  =  p_state . min_lod ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . maxLod  =  p_state . max_lod ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . borderColor  =  ( VkBorderColor ) p_state . border_color ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sampler_create_info . unnormalizedCoordinates  =  p_state . unnormalized_uvw ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSampler  vk_sampler  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  res  =  vkCreateSampler ( vk_device ,  & sampler_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SAMPLER ) ,  & vk_sampler ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( res ,  SamplerID ( ) ,  " vkCreateSampler failed with error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  SamplerID ( vk_sampler ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : sampler_free ( SamplerID  p_sampler )  {  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroySampler ( vk_device ,  ( VkSampler ) p_sampler . id ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SAMPLER ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : sampler_is_format_supported_for_filter ( DataFormat  p_format ,  SamplerFilter  p_filter )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_filter )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SAMPLER_FILTER_NEAREST :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SAMPLER_FILTER_LINEAR :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											VkFormatProperties  properties  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vkGetPhysicalDeviceFormatProperties ( physical_device ,  RD_TO_VK_FORMAT [ p_format ] ,  & properties ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  ( properties . optimalTilingFeatures  &  VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** VERTEX ARRAY ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : VertexFormatID  RenderingDeviceDriverVulkan : : vertex_format_create ( VectorView < VertexAttribute >  p_vertex_attribs )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Pre-bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VertexFormatInfo  * vf_info  =  VersatileResource : : allocate < VertexFormatInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_bindings . resize ( p_vertex_attribs . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_attributes . resize ( p_vertex_attribs . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_vertex_attribs . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_bindings [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_bindings [ i ] . binding  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_bindings [ i ] . stride  =  p_vertex_attribs [ i ] . stride ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_bindings [ i ] . inputRate  =  p_vertex_attribs [ i ] . frequency  = =  VERTEX_FREQUENCY_INSTANCE  ?  VK_VERTEX_INPUT_RATE_INSTANCE  :  VK_VERTEX_INPUT_RATE_VERTEX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_attributes [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_attributes [ i ] . binding  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_attributes [ i ] . location  =  p_vertex_attribs [ i ] . location ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_attributes [ i ] . format  =  RD_TO_VK_FORMAT [ p_vertex_attribs [ i ] . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vf_info - > vk_attributes [ i ] . offset  =  p_vertex_attribs [ i ] . offset ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info . vertexBindingDescriptionCount  =  vf_info - > vk_bindings . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info . pVertexBindingDescriptions  =  vf_info - > vk_bindings . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info . vertexAttributeDescriptionCount  =  vf_info - > vk_attributes . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vf_info - > vk_create_info . pVertexAttributeDescriptions  =  vf_info - > vk_attributes . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  VertexFormatID ( vf_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : vertex_format_free ( VertexFormatID  p_vertex_format )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VertexFormatInfo  * vf_info  =  ( VertexFormatInfo  * ) p_vertex_format . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free ( resources_allocator ,  vf_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** BARRIERS ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::PipelineStageBits == VkPipelineStageFlagBits.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_TOP_OF_PIPE_BIT ,  VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_DRAW_INDIRECT_BIT ,  VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_VERTEX_INPUT_BIT ,  VK_PIPELINE_STAGE_VERTEX_INPUT_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_VERTEX_SHADER_BIT ,  VK_PIPELINE_STAGE_VERTEX_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT ,  VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT ,  VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_GEOMETRY_SHADER_BIT ,  VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_FRAGMENT_SHADER_BIT ,  VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT ,  VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT ,  VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ,  VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_COMPUTE_SHADER_BIT ,  VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT ,  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_ALL_GRAPHICS_BIT ,  VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_ALL_COMMANDS_BIT ,  VK_PIPELINE_STAGE_ALL_COMMANDS_BIT ) ) ;  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT ,  VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT ,  VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT ) ) ;  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::BarrierAccessBits == VkAccessFlagBits.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT ,  VK_ACCESS_INDIRECT_COMMAND_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_INDEX_READ_BIT ,  VK_ACCESS_INDEX_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT ,  VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_UNIFORM_READ_BIT ,  VK_ACCESS_UNIFORM_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT ,  VK_ACCESS_INPUT_ATTACHMENT_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_SHADER_READ_BIT ,  VK_ACCESS_SHADER_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_SHADER_WRITE_BIT ,  VK_ACCESS_SHADER_WRITE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT ,  VK_ACCESS_COLOR_ATTACHMENT_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ,  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT ,  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT ,  VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_HOST_READ_BIT ,  VK_ACCESS_HOST_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_HOST_WRITE_BIT ,  VK_ACCESS_HOST_WRITE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_MEMORY_READ_BIT ,  VK_ACCESS_MEMORY_READ_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_MEMORY_WRITE_BIT ,  VK_ACCESS_MEMORY_WRITE_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT ,  VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR ) ) ;  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT ,  VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT ) ) ;  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_pipeline_barrier (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										CommandBufferID  p_cmd_buffer , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < PipelineStageBits >  p_src_stages , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < PipelineStageBits >  p_dst_stages , 
							 
						 
					
						
							
								
									
										
										
										
											2025-09-07 16:14:33 -05:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VectorView < MemoryAccessBarrier >  p_memory_barriers , 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										VectorView < BufferBarrier >  p_buffer_barriers , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VectorView < TextureBarrier >  p_texture_barriers )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkMemoryBarrier  * vk_memory_barriers  =  ALLOCA_ARRAY ( VkMemoryBarrier ,  p_memory_barriers . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_memory_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_memory_barriers [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_memory_barriers [ i ] . sType  =  VK_STRUCTURE_TYPE_MEMORY_BARRIER ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_memory_barriers [ i ] . srcAccessMask  =  _rd_to_vk_access_flags ( p_memory_barriers [ i ] . src_access ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_memory_barriers [ i ] . dstAccessMask  =  _rd_to_vk_access_flags ( p_memory_barriers [ i ] . dst_access ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferMemoryBarrier  * vk_buffer_barriers  =  ALLOCA_ARRAY ( VkBufferMemoryBarrier ,  p_buffer_barriers . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_buffer_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . sType  =  VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . srcQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . dstQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . srcAccessMask  =  _rd_to_vk_access_flags ( p_buffer_barriers [ i ] . src_access ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . dstAccessMask  =  _rd_to_vk_access_flags ( p_buffer_barriers [ i ] . dst_access ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . buffer  =  ( ( const  BufferInfo  * ) p_buffer_barriers [ i ] . buffer . id ) - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . offset  =  p_buffer_barriers [ i ] . offset ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffer_barriers [ i ] . size  =  p_buffer_barriers [ i ] . size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageMemoryBarrier  * vk_image_barriers  =  ALLOCA_ARRAY ( VkImageMemoryBarrier ,  p_texture_barriers . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_texture_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture_barriers [ i ] . texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . sType  =  VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . srcAccessMask  =  _rd_to_vk_access_flags ( p_texture_barriers [ i ] . src_access ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . dstAccessMask  =  _rd_to_vk_access_flags ( p_texture_barriers [ i ] . dst_access ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . oldLayout  =  RD_TO_VK_LAYOUT [ p_texture_barriers [ i ] . prev_layout ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . newLayout  =  RD_TO_VK_LAYOUT [ p_texture_barriers [ i ] . next_layout ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . srcQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . dstQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . image  =  tex_info - > vk_view_create_info . image ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . subresourceRange . aspectMask  =  ( VkImageAspectFlags ) p_texture_barriers [ i ] . subresources . aspect ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . subresourceRange . baseMipLevel  =  p_texture_barriers [ i ] . subresources . base_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . subresourceRange . levelCount  =  p_texture_barriers [ i ] . subresources . mipmap_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . subresourceRange . baseArrayLayer  =  p_texture_barriers [ i ] . subresources . base_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_image_barriers [ i ] . subresourceRange . layerCount  =  p_texture_barriers [ i ] . subresources . layer_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d " ,  p_memory_barriers . size ( ) ,  p_buffer_barriers . size ( ) ,  p_texture_barriers . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_memory_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_line ( vformat ( "   VkMemoryBarrier #%d src 0x%uX dst 0x%uX " ,  i ,  vk_memory_barriers [ i ] . srcAccessMask ,  vk_memory_barriers [ i ] . dstAccessMask ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_buffer_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_line ( vformat ( "   VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux " ,  i ,  vk_buffer_barriers [ i ] . srcAccessMask ,  vk_buffer_barriers [ i ] . dstAccessMask ,  uint64_t ( vk_buffer_barriers [ i ] . buffer ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_texture_barriers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_line ( vformat ( "   VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d) " ,  i ,  vk_image_barriers [ i ] . srcAccessMask ,  vk_image_barriers [ i ] . dstAccessMask , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												uint64_t ( vk_image_barriers [ i ] . image ) ,  vk_image_barriers [ i ] . oldLayout ,  vk_image_barriers [ i ] . newLayout ,  vk_image_barriers [ i ] . subresourceRange . baseMipLevel ,  vk_image_barriers [ i ] . subresourceRange . levelCount , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_image_barriers [ i ] . subresourceRange . baseArrayLayer ,  vk_image_barriers [ i ] . subresourceRange . layerCount ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									vkCmdPipelineBarrier ( 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											command_buffer - > vk_command_buffer , 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_rd_to_vk_pipeline_stages ( p_src_stages ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_rd_to_vk_pipeline_stages ( p_dst_stages ) , 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											0 , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											p_memory_barriers . size ( ) ,  vk_memory_barriers , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											p_buffer_barriers . size ( ) ,  vk_buffer_barriers , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											p_texture_barriers . size ( ) ,  vk_image_barriers ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** FENCES ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : FenceID  RenderingDeviceDriverVulkan : : fence_create ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkFence  vk_fence  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkFenceCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_FENCE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateFence ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_FENCE ) ,  & vk_fence ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FenceID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Fence  * fence  =  memnew ( Fence ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fence - > vk_fence  =  vk_fence ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fence - > queue_signaled_from  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  FenceID ( fence ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : fence_wait ( FenceID  p_fence )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Fence  * fence  =  ( Fence  * ) ( p_fence . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  fence_status  =  vkGetFenceStatus ( vk_device ,  fence - > vk_fence ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fence_status  = =  VK_NOT_READY )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkResult  err  =  vkWaitForFences ( vk_device ,  1 ,  & fence - > vk_fence ,  VK_TRUE ,  UINT64_MAX ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FAILED ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkResetFences ( vk_device ,  1 ,  & fence - > vk_fence ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FAILED ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fence - > queue_signaled_from  ! =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Release all semaphores that the command queue associated to the fence waited on the last time it was submitted.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										LocalVector < Pair < Fence  * ,  uint32_t > >  & pairs  =  fence - > queue_signaled_from - > image_semaphores_for_fences ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  i  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										while  ( i  <  pairs . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( pairs [ i ] . first  = =  fence )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												_release_image_semaphore ( fence - > queue_signaled_from ,  pairs [ i ] . second ,  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												fence - > queue_signaled_from - > free_image_semaphores . push_back ( pairs [ i ] . second ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												pairs . remove_at ( i ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												i + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fence - > queue_signaled_from  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : fence_free ( FenceID  p_fence )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Fence  * fence  =  ( Fence  * ) ( p_fence . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyFence ( vk_device ,  fence - > vk_fence ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_FENCE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									memdelete ( fence ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** SEMAPHORES ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : SemaphoreID  RenderingDeviceDriverVulkan : : semaphore_create ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSemaphore  semaphore  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSemaphoreCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateSemaphore ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ,  & semaphore ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  SemaphoreID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  SemaphoreID ( semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : semaphore_free ( SemaphoreID  p_semaphore )  {  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroySemaphore ( vk_device ,  VkSemaphore ( p_semaphore . id ) ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** COMMANDS ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- QUEUE FAMILY -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : CommandQueueFamilyID  RenderingDeviceDriverVulkan : : command_queue_family_get ( BitField < CommandQueueFamilyBits >  p_cmd_queue_family_bits ,  RenderingContextDriver : : SurfaceID  p_surface )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Pick the queue with the least amount of bits that can fulfill the requirements.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkQueueFlags  picked_queue_flags  =  VK_QUEUE_FLAG_BITS_MAX_ENUM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  picked_family_index  =  UINT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  queue_family_properties . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( queue_families [ i ] . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Ignore empty queue families.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_surface  ! =  0  & &  ! context_driver - > queue_family_supports_present ( physical_device ,  i ,  p_surface ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Present is not an actual bit but something that must be queried manually.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											continue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Preferring a queue with less bits will get us closer to getting a queue that performs better for our requirements.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// For example, dedicated compute and transfer queues are usually indicated as such.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  VkQueueFlags  option_queue_flags  =  queue_family_properties [ i ] . queueFlags ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-08-20 09:50:44 -05:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										const  bool  includes_all_bits  =  p_cmd_queue_family_bits . get_shared ( option_queue_flags )  = =  p_cmd_queue_family_bits ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										const  bool  prefer_less_bits  =  option_queue_flags  <  picked_queue_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( includes_all_bits  & &  prefer_less_bits )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											picked_family_index  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											picked_queue_flags  =  option_queue_flags ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-19 01:49:46 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( picked_family_index  > =  queue_family_properties . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  CommandQueueFamilyID ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Since 0 is a valid index and we use 0 as the error case, we make the index start from 1 instead.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  CommandQueueFamilyID ( picked_family_index  +  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- QUEUE -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : CommandQueueID  RenderingDeviceDriverVulkan : : command_queue_create ( CommandQueueFamilyID  p_cmd_queue_family ,  bool  p_identify_as_main_queue )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue_family . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Make a virtual queue on top of a real queue. Use the queue from the family with the least amount of virtual queues created.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  family_index  =  p_cmd_queue_family . id  -  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < Queue >  & queue_family  =  queue_families [ family_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  picked_queue_index  =  UINT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  picked_virtual_count  =  UINT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  queue_family . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( queue_family [ i ] . virtual_count  <  picked_virtual_count )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											picked_queue_index  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											picked_virtual_count  =  queue_family [ i ] . virtual_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( picked_queue_index  > =  queue_family . size ( ) ,  CommandQueueID ( ) ,  " A queue in the picked family could not be found. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkQueue  selected_queue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkGetDeviceQueue ( vk_device ,  family_index ,  picked_queue_index ,  & selected_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SwappyVk_setQueueFamilyIndex ( vk_device ,  selected_queue ,  family_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Create the virtual queue.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandQueue  * command_queue  =  memnew ( CommandQueue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_queue - > queue_family  =  family_index ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_queue - > queue_index  =  picked_queue_index ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									queue_family [ picked_queue_index ] . virtual_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// If is was identified as the main queue and a hook is active, indicate it as such to the hook.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_identify_as_main_queue  & &  ( VulkanHooks : : get_singleton ( )  ! =  nullptr ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VulkanHooks : : get_singleton ( ) - > set_direct_queue_family_and_index ( family_index ,  picked_queue_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  CommandQueueID ( command_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : command_queue_execute_and_present ( CommandQueueID  p_cmd_queue ,  VectorView < SemaphoreID >  p_wait_semaphores ,  VectorView < CommandBufferID >  p_cmd_buffers ,  VectorView < SemaphoreID >  p_cmd_semaphores ,  FenceID  p_cmd_fence ,  VectorView < SwapChainID >  p_swap_chains )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandQueue  * command_queue  =  ( CommandQueue  * ) ( p_cmd_queue . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Queue  & device_queue  =  queue_families [ command_queue - > queue_family ] [ command_queue - > queue_index ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Fence  * fence  =  ( Fence  * ) ( p_cmd_fence . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkFence  vk_fence  =  ( fence  ! =  nullptr )  ?  fence - > vk_fence  :  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									thread_local  LocalVector < VkSemaphore >  wait_semaphores ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									thread_local  LocalVector < VkPipelineStageFlags >  wait_semaphores_stages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									wait_semaphores . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									wait_semaphores_stages . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! command_queue - > pending_semaphores_for_execute . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  command_queue - > pending_semaphores_for_execute . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSemaphore  wait_semaphore  =  command_queue - > image_semaphores [ command_queue - > pending_semaphores_for_execute [ i ] ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											wait_semaphores . push_back ( wait_semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											wait_semaphores_stages . push_back ( VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_queue - > pending_semaphores_for_execute . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_wait_semaphores . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// FIXME: Allow specifying the stage mask in more detail.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										wait_semaphores . push_back ( VkSemaphore ( p_wait_semaphores [ i ] . id ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										wait_semaphores_stages . push_back ( VK_PIPELINE_STAGE_ALL_COMMANDS_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_cmd_buffers . size ( )  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										thread_local  LocalVector < VkCommandBuffer >  command_buffers ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										thread_local  LocalVector < VkSemaphore >  present_semaphores ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										thread_local  LocalVector < VkSemaphore >  signal_semaphores ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_buffers . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										present_semaphores . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										signal_semaphores . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_cmd_buffers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) ( p_cmd_buffers [ i ] . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											command_buffers . push_back ( command_buffer - > vk_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_cmd_semaphores . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											signal_semaphores . push_back ( VkSemaphore ( p_cmd_semaphores [ i ] . id ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_swap_chains . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  SwapChain  * swap_chain  =  ( const  SwapChain  * ) ( p_swap_chains [ i ] . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSemaphore  semaphore  =  swap_chain - > present_semaphores [ swap_chain - > image_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_semaphores . push_back ( semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											signal_semaphores . push_back ( semaphore ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkSubmitInfo  submit_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . sType  =  VK_STRUCTURE_TYPE_SUBMIT_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . waitSemaphoreCount  =  wait_semaphores . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . pWaitSemaphores  =  wait_semaphores . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . pWaitDstStageMask  =  wait_semaphores_stages . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . commandBufferCount  =  command_buffers . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . pCommandBuffers  =  command_buffers . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . signalSemaphoreCount  =  signal_semaphores . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										submit_info . pSignalSemaphores  =  signal_semaphores . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_queue . submit_mutex . lock ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vkQueueSubmit ( device_queue . queue ,  1 ,  & submit_info ,  vk_fence ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_queue . submit_mutex . unlock ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( err  = =  VK_ERROR_DEVICE_LOST )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											print_lost_device_info ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											CRASH_NOW_MSG ( " Vulkan device was lost. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FAILED ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( fence  ! =  nullptr  & &  ! command_queue - > pending_semaphores_for_fence . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fence - > queue_signaled_from  =  command_queue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  command_queue - > pending_semaphores_for_fence . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												command_queue - > image_semaphores_for_fences . push_back ( {  fence ,  command_queue - > pending_semaphores_for_fence [ i ]  } ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											command_queue - > pending_semaphores_for_fence . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( ! present_semaphores . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											// If command buffers were executed, swap chains must wait on the present semaphore used by the command queue.
 
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											wait_semaphores  =  present_semaphores ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_swap_chains . size ( )  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										thread_local  LocalVector < VkSwapchainKHR >  swapchains ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										thread_local  LocalVector < uint32_t >  image_indices ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										thread_local  LocalVector < VkResult >  results ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										swapchains . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_indices . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_swap_chains . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chains [ i ] . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swapchains . push_back ( swap_chain - > vk_swapchain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											DEV_ASSERT ( swap_chain - > image_index  <  swap_chain - > images . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											image_indices . push_back ( swap_chain - > image_index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										results . resize ( swapchains . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPresentInfoKHR  present_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . sType  =  VK_STRUCTURE_TYPE_PRESENT_INFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . waitSemaphoreCount  =  wait_semaphores . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . pWaitSemaphores  =  wait_semaphores . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . swapchainCount  =  swapchains . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . pSwapchains  =  swapchains . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . pImageIndices  =  image_indices . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_info . pResults  =  results . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										device_queue . submit_mutex . lock ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err  =  SwappyVk_queuePresent ( device_queue . queue ,  & present_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err  =  device_functions . QueuePresentKHR ( device_queue . queue ,  & present_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# else 
  
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										err  =  device_functions . QueuePresentKHR ( device_queue . queue ,  & present_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										device_queue . submit_mutex . unlock ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  any_result_is_out_of_date  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_swap_chains . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chains [ i ] . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > image_index  =  UINT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( results [ i ]  = =  VK_ERROR_OUT_OF_DATE_KHR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												context_driver - > surface_set_needs_resize ( swap_chain - > surface ,  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												any_result_is_out_of_date  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( any_result_is_out_of_date  | |  err  = =  VK_ERROR_OUT_OF_DATE_KHR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  FAILED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-15 10:45:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// as the rotation of all the content is not handled at the moment.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// every frame.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-06 02:17:04 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												err  ! =  VK_SUCCESS  & &  err  ! =  VK_SUBOPTIMAL_KHR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												FAILED , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												" QueuePresentKHR failed with error:  "  +  get_vulkan_result ( err ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-16 15:43:59 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_queue_free ( CommandQueueID  p_cmd_queue )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandQueue  * command_queue  =  ( CommandQueue  * ) ( p_cmd_queue . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Erase all the semaphores used for image acquisition.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( VkSemaphore  semaphore  :  command_queue - > image_semaphores )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroySemaphore ( vk_device ,  semaphore ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Retrieve the queue family corresponding to the virtual queue.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( command_queue - > queue_family  <  queue_families . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < Queue >  & queue_family  =  queue_families [ command_queue - > queue_family ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Decrease the virtual queue count.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( command_queue - > queue_index  <  queue_family . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( queue_family [ command_queue - > queue_index ] . virtual_count  >  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									queue_family [ command_queue - > queue_index ] . virtual_count - - ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Destroy the virtual queue structure.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									memdelete ( command_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- POOL -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : CommandPoolID  RenderingDeviceDriverVulkan : : command_pool_create ( CommandQueueFamilyID  p_cmd_queue_family ,  CommandBufferType  p_cmd_buffer_type )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue_family . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  family_index  =  p_cmd_queue_family . id  -  1 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkCommandPoolCreateInfo  cmd_pool_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									cmd_pool_info . sType  =  VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									cmd_pool_info . queueFamilyIndex  =  family_index ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! command_pool_reset_enabled )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cmd_pool_info . flags  =  VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkCommandPool  vk_command_pool  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  res  =  vkCreateCommandPool ( vk_device ,  & cmd_pool_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_COMMAND_POOL ) ,  & vk_command_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( res ,  CommandPoolID ( ) ,  " vkCreateCommandPool failed with error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandPool  * command_pool  =  memnew ( CommandPool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_pool - > vk_command_pool  =  vk_command_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_pool - > buffer_type  =  p_cmd_buffer_type ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  CommandPoolID ( command_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : command_pool_reset ( CommandPoolID  p_cmd_pool )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandPool  * command_pool  =  ( CommandPool  * ) ( p_cmd_pool . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkResetCommandPool ( vk_device ,  command_pool - > vk_command_pool ,  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  false ,  " vkResetCommandPool failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_pool_free ( CommandPoolID  p_cmd_pool )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandPool  * command_pool  =  ( CommandPool  * ) ( p_cmd_pool . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									for  ( CommandBufferInfo  * command_buffer  :  command_pool - > command_buffers_created )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VersatileResource : : free ( resources_allocator ,  command_buffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyCommandPool ( vk_device ,  command_pool - > vk_command_pool ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_COMMAND_POOL ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									memdelete ( command_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- BUFFER -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : CommandBufferID  RenderingDeviceDriverVulkan : : command_buffer_create ( CommandPoolID  p_cmd_pool )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandPool  * command_pool  =  ( CommandPool  * ) ( p_cmd_pool . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkCommandBufferAllocateInfo  cmd_buf_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									cmd_buf_info . sType  =  VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									cmd_buf_info . commandPool  =  command_pool - > vk_command_pool ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									cmd_buf_info . commandBufferCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( command_pool - > buffer_type  = =  COMMAND_BUFFER_TYPE_SECONDARY )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cmd_buf_info . level  =  VK_COMMAND_BUFFER_LEVEL_SECONDARY ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cmd_buf_info . level  =  VK_COMMAND_BUFFER_LEVEL_PRIMARY ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkCommandBuffer  vk_command_buffer  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkAllocateCommandBuffers ( vk_device ,  & cmd_buf_info ,  & vk_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  CommandBufferID ( ) ,  " vkAllocateCommandBuffers failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  VersatileResource : : allocate < CommandBufferInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_buffer - > vk_command_buffer  =  vk_command_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_pool - > command_buffers_created . push_back ( command_buffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  CommandBufferID ( command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : command_buffer_begin ( CommandBufferID  p_cmd_buffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkCommandBufferBeginInfo  cmd_buf_begin_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									cmd_buf_begin_info . sType  =  VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									cmd_buf_begin_info . flags  =  VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkBeginCommandBuffer ( command_buffer - > vk_command_buffer ,  & cmd_buf_begin_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  false ,  " vkBeginCommandBuffer failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : command_buffer_begin_secondary ( CommandBufferID  p_cmd_buffer ,  RenderPassID  p_render_pass ,  uint32_t  p_subpass ,  FramebufferID  p_framebuffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Framebuffer  * framebuffer  =  ( Framebuffer  * ) ( p_framebuffer . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass  =  ( RenderPassInfo  * ) ( p_render_pass . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkCommandBufferInheritanceInfo  inheritance_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									inheritance_info . sType  =  VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									inheritance_info . renderPass  =  render_pass - > vk_render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									inheritance_info . subpass  =  p_subpass ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									inheritance_info . framebuffer  =  framebuffer - > vk_framebuffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkCommandBufferBeginInfo  cmd_buf_begin_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									cmd_buf_begin_info . sType  =  VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									cmd_buf_begin_info . flags  =  VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT  |  VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									cmd_buf_begin_info . pInheritanceInfo  =  & inheritance_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkBeginCommandBuffer ( command_buffer - > vk_command_buffer ,  & cmd_buf_begin_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  false ,  " vkBeginCommandBuffer failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_buffer_end ( CommandBufferID  p_cmd_buffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkEndCommandBuffer ( command_buffer - > vk_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_buffer_execute_secondary ( CommandBufferID  p_cmd_buffer ,  VectorView < CommandBufferID >  p_secondary_cmd_buffers )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									thread_local  LocalVector < VkCommandBuffer >  secondary_command_buffers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									secondary_command_buffers . resize ( p_secondary_cmd_buffers . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_secondary_cmd_buffers . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										CommandBufferInfo  * secondary_command_buffer  =  ( CommandBufferInfo  * ) ( p_secondary_cmd_buffers [ i ] . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										secondary_command_buffers [ i ]  =  secondary_command_buffer - > vk_command_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdExecuteCommands ( command_buffer - > vk_command_buffer ,  p_secondary_cmd_buffers . size ( ) ,  secondary_command_buffers . ptr ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** SWAP CHAIN ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : _swap_chain_release ( SwapChain  * swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Destroy views and framebuffers associated to the swapchain's images.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( FramebufferID  framebuffer  :  swap_chain - > framebuffers )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										framebuffer_free ( framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									for  ( VkImageView  view  :  swap_chain - > image_views )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroyImageView ( vk_device ,  view ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > image_index  =  UINT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > images . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > image_views . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > framebuffers . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swap_chain - > vk_swapchain  ! =  VK_NULL_HANDLE )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Swappy has a bug where the ANativeWindow will be leaked if we call
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// SwappyVk_destroySwapchain, so we must release it by hand.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SwappyVk_setWindow ( vk_device ,  swap_chain - > vk_swapchain ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											SwappyVk_destroySwapchain ( vk_device ,  swap_chain - > vk_swapchain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										device_functions . DestroySwapchainKHR ( vk_device ,  swap_chain - > vk_swapchain ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SWAPCHAIN_KHR ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										swap_chain - > vk_swapchain  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  swap_chain - > command_queues_acquired . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_recreate_image_semaphore ( swap_chain - > command_queues_acquired [ i ] ,  swap_chain - > command_queues_acquired_semaphores [ i ] ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > command_queues_acquired . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > command_queues_acquired_semaphores . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( VkSemaphore  semaphore  :  swap_chain - > present_semaphores )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkDestroySemaphore ( vk_device ,  semaphore ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > present_semaphores . clear ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RenderingDeviceDriver : : SwapChainID  RenderingDeviceDriverVulkan : : swap_chain_create ( RenderingContextDriver : : SurfaceID  p_surface )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_surface  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderingContextDriverVulkan : : Surface  * surface  =  ( RenderingContextDriverVulkan : : Surface  * ) ( p_surface ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Retrieve the formats supported by the surface.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  format_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  functions . GetPhysicalDeviceSurfaceFormatsKHR ( physical_device ,  surface - > vk_surface ,  & format_count ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  SwapChainID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < VkSurfaceFormatKHR >  formats ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									formats . resize ( format_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  functions . GetPhysicalDeviceSurfaceFormatsKHR ( physical_device ,  surface - > vk_surface ,  & format_count ,  formats . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  SwapChainID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkFormat  format  =  VK_FORMAT_UNDEFINED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkColorSpaceKHR  color_space  =  VK_COLOR_SPACE_SRGB_NONLINEAR_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( format_count  = =  1  & &  formats [ 0 ] . format  = =  VK_FORMAT_UNDEFINED )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// If the format list includes just one entry of VK_FORMAT_UNDEFINED, the surface has no preferred format.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										format  =  VK_FORMAT_B8G8R8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										color_space  =  formats [ 0 ] . colorSpace ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( format_count  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Use one of the supported formats, prefer B8G8R8A8_UNORM.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  VkFormat  preferred_format  =  VK_FORMAT_B8G8R8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  VkFormat  second_format  =  VK_FORMAT_R8G8B8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  format_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( formats [ i ] . format  = =  preferred_format  | |  formats [ i ] . format  = =  second_format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												format  =  formats [ i ] . format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( formats [ i ] . format  = =  preferred_format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													// This is the preferred format, stop searching.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// No formats are supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( format  = =  VK_FORMAT_UNDEFINED ,  SwapChainID ( ) ,  " Surface did not return any valid formats. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Create the render pass for the chosen format.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkAttachmentDescription2KHR  attachment  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . sType  =  VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . format  =  format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . samples  =  VK_SAMPLE_COUNT_1_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . loadOp  =  VK_ATTACHMENT_LOAD_OP_CLEAR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . storeOp  =  VK_ATTACHMENT_STORE_OP_STORE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . stencilLoadOp  =  VK_ATTACHMENT_LOAD_OP_DONT_CARE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . stencilStoreOp  =  VK_ATTACHMENT_STORE_OP_DONT_CARE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . initialLayout  =  VK_IMAGE_LAYOUT_UNDEFINED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									attachment . finalLayout  =  VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkAttachmentReference2KHR  color_reference  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_reference . sType  =  VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_reference . layout  =  VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSubpassDescription2KHR  subpass  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									subpass . sType  =  VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									subpass . pipelineBindPoint  =  VK_PIPELINE_BIND_POINT_GRAPHICS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									subpass . colorAttachmentCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									subpass . pColorAttachments  =  & color_reference ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkRenderPassCreateInfo2KHR  pass_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pass_info . sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pass_info . attachmentCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pass_info . pAttachments  =  & attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pass_info . subpassCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pass_info . pSubpasses  =  & subpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkRenderPass  vk_render_pass  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  _create_render_pass ( vk_device ,  & pass_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_RENDER_PASS ) ,  & vk_render_pass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  SwapChainID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass_info  =  VersatileResource : : allocate < RenderPassInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_info - > vk_render_pass  =  vk_render_pass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  memnew ( SwapChain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > surface  =  p_surface ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > format  =  format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > color_space  =  color_space ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									swap_chain - > render_pass  =  RenderPassID ( render_pass_info ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									return  SwapChainID ( swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Error  RenderingDeviceDriverVulkan : : swap_chain_resize ( CommandQueueID  p_cmd_queue ,  SwapChainID  p_swap_chain ,  uint32_t  p_desired_framebuffer_count )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandQueue  * command_queue  =  ( CommandQueue  * ) ( p_cmd_queue . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Release all current contents of the swap chain.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_swap_chain_release ( swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Validate if the command queue being used supports creating the swap chain for this surface.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! context_driver - > queue_family_supports_present ( physical_device ,  command_queue - > queue_family ,  swap_chain - > surface ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_V_MSG ( ERR_CANT_CREATE ,  " Surface is not supported by device. Did the GPU go offline? Was the window created on another monitor? Check " 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
																		" previous errors & try launching with --gpu-validation. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Retrieve the surface's capabilities.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderingContextDriverVulkan : : Surface  * surface  =  ( RenderingContextDriverVulkan : : Surface  * ) ( swap_chain - > surface ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSurfaceCapabilitiesKHR  surface_capabilities  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  functions . GetPhysicalDeviceSurfaceCapabilitiesKHR ( physical_device ,  surface - > vk_surface ,  & surface_capabilities ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// No swapchain yet, this is the first time we're creating it.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! swap_chain - > vk_swapchain )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-02 17:06:33 -06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( surface_capabilities . currentExtent . width  = =  0xFFFFFFFF )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// The current extent is currently undefined, so the current surface width and height will be clamped to the surface's capabilities.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// We make sure to overwrite surface_capabilities.currentExtent.width so that the same check further below
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// does not set extent.width = CLAMP( surface->width, ... ) on the first run of this function, because
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// that'd be potentially unswapped.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											surface_capabilities . currentExtent . width  =  CLAMP ( surface - > width ,  surface_capabilities . minImageExtent . width ,  surface_capabilities . maxImageExtent . width ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											surface_capabilities . currentExtent . height  =  CLAMP ( surface - > height ,  surface_capabilities . minImageExtent . height ,  surface_capabilities . maxImageExtent . height ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// We must SWAP() only once otherwise we'll keep ping-ponging between
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// the right and wrong resolutions after multiple calls to swap_chain_resize().
 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( surface_capabilities . currentTransform  &  VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												surface_capabilities . currentTransform  &  VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Swap to get identity width and height.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-02 17:06:33 -06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											SWAP ( surface_capabilities . currentExtent . width ,  surface_capabilities . currentExtent . height ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkExtent2D  extent ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( surface_capabilities . currentExtent . width  = =  0xFFFFFFFF )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// The current extent is currently undefined, so the current surface width and height will be clamped to the surface's capabilities.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-02 17:06:33 -06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// We can only be here on the second call to swap_chain_resize(), by which time surface->width & surface->height should already be swapped if needed.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										extent . width  =  CLAMP ( surface - > width ,  surface_capabilities . minImageExtent . width ,  surface_capabilities . maxImageExtent . width ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										extent . height  =  CLAMP ( surface - > height ,  surface_capabilities . minImageExtent . height ,  surface_capabilities . maxImageExtent . height ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Grab the dimensions from the current extent.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										extent  =  surface_capabilities . currentExtent ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										surface - > width  =  extent . width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										surface - > height  =  extent . height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( surface - > width  = =  0  | |  surface - > height  = =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// The surface doesn't have valid dimensions, so we can't create a swap chain.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  ERR_SKIP ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Find what present modes are supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									TightLocalVector < VkPresentModeKHR >  present_modes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  present_modes_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  functions . GetPhysicalDeviceSurfacePresentModesKHR ( physical_device ,  surface - > vk_surface ,  & present_modes_count ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									present_modes . resize ( present_modes_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  functions . GetPhysicalDeviceSurfacePresentModesKHR ( physical_device ,  surface - > vk_surface ,  & present_modes_count ,  present_modes . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Choose the present mode based on the display server setting.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPresentModeKHR  present_mode  =  VkPresentModeKHR : : VK_PRESENT_MODE_FIFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  present_mode_name  =  " Enabled " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( surface - > vsync_mode )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DisplayServer : : VSYNC_MAILBOX : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode  =  VK_PRESENT_MODE_MAILBOX_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode_name  =  " Mailbox " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DisplayServer : : VSYNC_ADAPTIVE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode  =  VK_PRESENT_MODE_FIFO_RELAXED_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode_name  =  " Adaptive " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DisplayServer : : VSYNC_ENABLED : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode  =  VK_PRESENT_MODE_FIFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode_name  =  " Enabled " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DisplayServer : : VSYNC_DISABLED : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode  =  VK_PRESENT_MODE_IMMEDIATE_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											present_mode_name  =  " Disabled " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-06 16:43:04 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									bool  present_mode_available  =  present_modes . has ( present_mode ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-17 23:18:28 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! present_mode_available )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Present mode is not available, fall back to FIFO which is guaranteed to be supported.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										WARN_PRINT ( vformat ( " The requested V-Sync mode %s is not available. Falling back to V-Sync mode Enabled. " ,  present_mode_name ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										surface - > vsync_mode  =  DisplayServer : : VSYNC_ENABLED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										present_mode  =  VkPresentModeKHR : : VK_PRESENT_MODE_FIFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Clamp the desired image count to the surface's capabilities.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  desired_swapchain_images  =  MAX ( p_desired_framebuffer_count ,  surface_capabilities . minImageCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( surface_capabilities . maxImageCount  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Only clamp to the max image count if it's defined. A max image count of 0 means there's no upper limit to the amount of images.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										desired_swapchain_images  =  MIN ( desired_swapchain_images ,  surface_capabilities . maxImageCount ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-02-15 10:45:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Refer to the comment in command_queue_present() for more details.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkSurfaceTransformFlagBitsKHR  surface_transform_bits  =  surface_capabilities . currentTransform ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkCompositeAlphaFlagBitsKHR  composite_alpha  =  VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( OS : : get_singleton ( ) - > is_layered_allowed ( )  | |  ! ( surface_capabilities . supportedCompositeAlpha  &  composite_alpha ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Find a supported composite alpha mode - one of these is guaranteed to be set.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkCompositeAlphaFlagBitsKHR  composite_alpha_flags [ 4 ]  =  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  ARRAY_SIZE ( composite_alpha_flags ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( surface_capabilities . supportedCompositeAlpha  &  composite_alpha_flags [ i ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												composite_alpha  =  composite_alpha_flags [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-03 11:48:46 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										has_comp_alpha [ ( uint64_t ) p_cmd_queue . id ]  =  ( composite_alpha  ! =  VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSwapchainCreateInfoKHR  swap_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . sType  =  VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . surface  =  surface - > vk_surface ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . minImageCount  =  desired_swapchain_images ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . imageFormat  =  swap_chain - > format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . imageColorSpace  =  swap_chain - > color_space ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-02 17:06:33 -06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									swap_create_info . imageExtent  =  extent ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									swap_create_info . imageArrayLayers  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . imageUsage  =  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . imageSharingMode  =  VK_SHARING_MODE_EXCLUSIVE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . preTransform  =  surface_transform_bits ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-31 16:52:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									switch  ( swap_create_info . preTransform )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > pre_transform_rotation_degrees  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > pre_transform_rotation_degrees  =  90 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > pre_transform_rotation_degrees  =  180 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > pre_transform_rotation_degrees  =  270 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											WARN_PRINT ( " Unexpected swap_create_info.preTransform =  "  +  itos ( swap_create_info . preTransform )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											swap_chain - > pre_transform_rotation_degrees  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									swap_create_info . compositeAlpha  =  composite_alpha ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . presentMode  =  present_mode ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_create_info . clipped  =  true ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									err  =  device_functions . CreateSwapchainKHR ( vk_device ,  & swap_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SWAPCHAIN_KHR ) ,  & swap_chain - > vk_swapchain ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(SWAPPY_FRAME_PACING_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SwappyVk_initAndGetRefreshCycleDuration ( get_jni_env ( ) ,  static_cast < OS_Android  * > ( OS : : get_singleton ( ) ) - > get_godot_java ( ) - > get_activity ( ) ,  physical_device , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_device ,  swap_chain - > vk_swapchain ,  & swap_chain - > refresh_duration ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SwappyVk_setWindow ( vk_device ,  swap_chain - > vk_swapchain ,  static_cast < OS_Android  * > ( OS : : get_singleton ( ) ) - > get_native_window ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-12 16:15:34 -06:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										SwappyVk_setSwapIntervalNS ( vk_device ,  swap_chain - > vk_swapchain ,  swap_chain - > refresh_duration ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										enum  SwappyModes  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											PIPELINE_FORCED_ON , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											AUTO_FPS_PIPELINE_FORCED_ON , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											AUTO_FPS_AUTO_PIPELINE , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										switch  ( swappy_mode )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  PIPELINE_FORCED_ON : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoSwapInterval ( true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoPipelineMode ( true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  AUTO_FPS_PIPELINE_FORCED_ON : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoSwapInterval ( true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoPipelineMode ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  AUTO_FPS_AUTO_PIPELINE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoSwapInterval ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												SwappyVk_setAutoPipelineMode ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  image_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  device_functions . GetSwapchainImagesKHR ( vk_device ,  swap_chain - > vk_swapchain ,  & image_count ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > images . resize ( image_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  device_functions . GetSwapchainImagesKHR ( vk_device ,  swap_chain - > vk_swapchain ,  & image_count ,  swap_chain - > images . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageViewCreateInfo  view_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . sType  =  VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . viewType  =  VK_IMAGE_VIEW_TYPE_2D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . format  =  swap_chain - > format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . components . r  =  VK_COMPONENT_SWIZZLE_R ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . components . g  =  VK_COMPONENT_SWIZZLE_G ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . components . b  =  VK_COMPONENT_SWIZZLE_B ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . components . a  =  VK_COMPONENT_SWIZZLE_A ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . subresourceRange . aspectMask  =  VK_IMAGE_ASPECT_COLOR_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . subresourceRange . levelCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									view_create_info . subresourceRange . layerCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > image_views . reserve ( image_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageView  image_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  image_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										view_create_info . image  =  swap_chain - > images [ i ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										err  =  vkCreateImageView ( vk_device ,  & view_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_IMAGE_VIEW ) ,  & image_view ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										swap_chain - > image_views . push_back ( image_view ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > framebuffers . reserve ( image_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  RenderPassInfo  * render_pass  =  ( const  RenderPassInfo  * ) ( swap_chain - > render_pass . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkFramebufferCreateInfo  fb_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fb_create_info . sType  =  VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									fb_create_info . renderPass  =  render_pass - > vk_render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									fb_create_info . attachmentCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fb_create_info . width  =  surface - > width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fb_create_info . height  =  surface - > height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fb_create_info . layers  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkFramebuffer  vk_framebuffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  image_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fb_create_info . pAttachments  =  & swap_chain - > image_views [ i ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										err  =  vkCreateFramebuffer ( vk_device ,  & fb_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_FRAMEBUFFER ) ,  & vk_framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  ERR_CANT_CREATE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										Framebuffer  * framebuffer  =  memnew ( Framebuffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										framebuffer - > vk_framebuffer  =  vk_framebuffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										framebuffer - > swap_chain_image  =  swap_chain - > images [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										framebuffer - > swap_chain_image_subresource_range  =  view_create_info . subresourceRange ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										swap_chain - > framebuffers . push_back ( RDD : : FramebufferID ( framebuffer ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-05-14 15:20:33 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkSemaphore  vk_semaphore  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  image_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSemaphoreCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . sType  =  VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err  =  vkCreateSemaphore ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ,  & vk_semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FAILED ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										swap_chain - > present_semaphores . push_back ( vk_semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Once everything's been created correctly, indicate the surface no longer needs to be resized.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									context_driver - > surface_set_needs_resize ( swap_chain - > surface ,  false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  OK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : FramebufferID  RenderingDeviceDriverVulkan : : swap_chain_acquire_framebuffer ( CommandQueueID  p_cmd_queue ,  SwapChainID  p_swap_chain ,  bool  & r_resize_required )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_cmd_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									CommandQueue  * command_queue  =  ( CommandQueue  * ) ( p_cmd_queue . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ( swap_chain - > vk_swapchain  = =  VK_NULL_HANDLE )  | |  context_driver - > surface_get_needs_resize ( swap_chain - > surface ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// The surface does not have a valid swap chain or it indicates it requires a resize.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_resize_required  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  FramebufferID ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSemaphore  semaphore  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  semaphore_index  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( command_queue - > free_image_semaphores . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Add a new semaphore if none are free.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSemaphoreCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . sType  =  VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										err  =  vkCreateSemaphore ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SEMAPHORE ) ,  & semaphore ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS ,  FramebufferID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										semaphore_index  =  command_queue - > image_semaphores . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_queue - > image_semaphores . push_back ( semaphore ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_queue - > image_semaphores_swap_chains . push_back ( swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Pick a free semaphore.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  free_index  =  command_queue - > free_image_semaphores . size ( )  -  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										semaphore_index  =  command_queue - > free_image_semaphores [ free_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_queue - > image_semaphores_swap_chains [ semaphore_index ]  =  swap_chain ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										command_queue - > free_image_semaphores . remove_at ( free_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										semaphore  =  command_queue - > image_semaphores [ semaphore_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Store in the swap chain the acquired semaphore.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > command_queues_acquired . push_back ( command_queue ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									swap_chain - > command_queues_acquired_semaphores . push_back ( semaphore_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									err  =  device_functions . AcquireNextImageKHR ( vk_device ,  swap_chain - > vk_swapchain ,  UINT64_MAX ,  semaphore ,  VK_NULL_HANDLE ,  & swap_chain - > image_index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-15 10:45:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( err  = =  VK_ERROR_OUT_OF_DATE_KHR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Out of date leaves the semaphore in a signaled state that will never finish, so it's necessary to recreate it.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										bool  semaphore_recreated  =  _recreate_image_semaphore ( command_queue ,  semaphore_index ,  true ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V ( ! semaphore_recreated ,  FramebufferID ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Swap chain is out of date and must be recreated.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										r_resize_required  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  FramebufferID ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-15 10:45:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									}  else  if  ( err  ! =  VK_SUCCESS  & &  err  ! =  VK_SUBOPTIMAL_KHR )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Swap chain failed to present but the reason is unknown.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-02-15 10:45:21 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Refer to the comment in command_queue_present() as to why VK_SUBOPTIMAL_KHR is handled the same as VK_SUCCESS.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										return  FramebufferID ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Indicate the command queue should wait on these semaphores on the next submission and that it should
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// indicate they're free again on the next fence.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_queue - > pending_semaphores_for_execute . push_back ( semaphore_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_queue - > pending_semaphores_for_fence . push_back ( semaphore_index ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Return the corresponding framebuffer to the new current image.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									FramebufferID  framebuffer_id  =  swap_chain - > framebuffers [ swap_chain - > image_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Framebuffer  * framebuffer  =  ( Framebuffer  * ) ( framebuffer_id . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer - > swap_chain_acquired  =  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  framebuffer_id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : RenderPassID  RenderingDeviceDriverVulkan : : swap_chain_get_render_pass ( SwapChainID  p_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  swap_chain - > render_pass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-31 16:52:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								int  RenderingDeviceDriverVulkan : : swap_chain_get_pre_rotation_degrees ( SwapChainID  p_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  swap_chain - > pre_transform_rotation_degrees ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : DataFormat  RenderingDeviceDriverVulkan : : swap_chain_get_format ( SwapChainID  p_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( swap_chain - > format )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_FORMAT_B8G8R8A8_UNORM : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  DATA_FORMAT_B8G8R8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  VK_FORMAT_R8G8B8A8_UNORM : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  DATA_FORMAT_R8G8B8A8_UNORM ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											DEV_ASSERT ( false  & &  " Unknown swap chain format. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  DATA_FORMAT_MAX ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-05 19:15:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : swap_chain_set_max_fps ( SwapChainID  p_swap_chain ,  int  p_max_fps )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef SWAPPY_FRAME_PACING_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! swappy_frame_pacer_enable )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( swap_chain - > vk_swapchain  ! =  VK_NULL_HANDLE )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  uint64_t  max_time  =  p_max_fps  >  0  ?  uint64_t ( ( 1000.0  *  1000.0  *  1000.0 )  /  p_max_fps )  :  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										SwappyVk_setSwapIntervalNS ( vk_device ,  swap_chain - > vk_swapchain ,  MAX ( swap_chain - > refresh_duration ,  max_time ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : swap_chain_free ( SwapChainID  p_swap_chain )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_swap_chain . id  ! =  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									SwapChain  * swap_chain  =  ( SwapChain  * ) ( p_swap_chain . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_swap_chain_release ( swap_chain ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( swap_chain - > render_pass )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										render_pass_free ( swap_chain - > render_pass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									memdelete ( swap_chain ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** FRAMEBUFFER ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : FramebufferID  RenderingDeviceDriverVulkan : : framebuffer_create ( RenderPassID  p_render_pass ,  VectorView < TextureID >  p_attachments ,  uint32_t  p_width ,  uint32_t  p_height )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass  =  ( RenderPassInfo  * ) ( p_render_pass . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  fragment_density_map_offsets_layers  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkImageView  * vk_img_views  =  ALLOCA_ARRAY ( VkImageView ,  p_attachments . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_attachments . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										const  TextureInfo  * texture  =  ( const  TextureInfo  * ) p_attachments [ i ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_img_views [ i ]  =  texture - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( render_pass - > uses_fragment_density_map_offsets  & &  ( texture - > vk_create_info . usage  &  VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// If the render pass uses the FDM and the usage fits, we store the amount of layers to use it later on the render pass's end.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fragment_density_map_offsets_layers  =  texture - > vk_create_info . arrayLayers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkFramebufferCreateInfo  framebuffer_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . sType  =  VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									framebuffer_create_info . renderPass  =  render_pass - > vk_render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . attachmentCount  =  p_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . pAttachments  =  vk_img_views ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . width  =  p_width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . height  =  p_height ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer_create_info . layers  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkFramebuffer  vk_framebuffer  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateFramebuffer ( vk_device ,  & framebuffer_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_FRAMEBUFFER ) ,  & vk_framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  FramebufferID ( ) ,  " vkCreateFramebuffer failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCreateFramebuffer 0x%uX with %d attachments " ,  uint64_t ( vk_framebuffer ) ,  p_attachments . size ( ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_attachments . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  TextureInfo  * attachment_info  =  ( const  TextureInfo  * ) p_attachments [ i ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										print_line ( vformat ( "   Attachment #%d: IMAGE 0x%uX VIEW 0x%uX " ,  i ,  uint64_t ( attachment_info - > vk_view_create_info . image ) ,  uint64_t ( attachment_info - > vk_view ) ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Framebuffer  * framebuffer  =  memnew ( Framebuffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									framebuffer - > vk_framebuffer  =  vk_framebuffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									framebuffer - > fragment_density_map_offsets_layers  =  fragment_density_map_offsets_layers ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									return  FramebufferID ( framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : framebuffer_free ( FramebufferID  p_framebuffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Framebuffer  * framebuffer  =  ( Framebuffer  * ) ( p_framebuffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkDestroyFramebuffer ( vk_device ,  framebuffer - > vk_framebuffer ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_FRAMEBUFFER ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									memdelete ( framebuffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** SHADER ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  VkShaderStageFlagBits  RD_STAGE_TO_VK_SHADER_STAGE_BITS [ RDD : : SHADER_STAGE_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SHADER_STAGE_VERTEX_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SHADER_STAGE_FRAGMENT_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_SHADER_STAGE_COMPUTE_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : ShaderID  RenderingDeviceDriverVulkan : : shader_create_from_container ( const  Ref < RenderingShaderContainer >  & p_shader_container ,  const  Vector < ImmutableSampler >  & p_immutable_samplers )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ShaderReflection  shader_refl  =  p_shader_container - > get_shader_reflection ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ShaderInfo  shader_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  SHADER_STAGE_MAX ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( shader_refl . push_constant_stages . has_flag ( ( ShaderStage ) ( 1  < <  i ) ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											shader_info . vk_push_constant_stages  | =  RD_STAGE_TO_VK_SHADER_STAGE_BITS [ i ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Set bindings.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									Vector < Vector < VkDescriptorSetLayoutBinding > >  vk_set_bindings ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vk_set_bindings . resize ( shader_refl . uniform_sets . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  shader_refl . uniform_sets . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  j  =  0 ;  j  <  shader_refl . uniform_sets [ i ] . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  ShaderUniform  & uniform  =  shader_refl . uniform_sets [ i ] [ j ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											VkDescriptorSetLayoutBinding  layout_binding  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											layout_binding . binding  =  uniform . binding ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											layout_binding . descriptorCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  k  =  0 ;  k  <  SHADER_STAGE_MAX ;  k + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												if  ( ( uniform . stages . has_flag ( ShaderStage ( 1U  < <  k ) ) ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
													layout_binding . stageFlags  | =  RD_STAGE_TO_VK_SHADER_STAGE_BITS [ k ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											switch  ( uniform . type )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_SAMPLER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_SAMPLER ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													layout_binding . descriptorCount  =  uniform . length ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													// Immutable samplers: here they get set in the layoutbinding, given that they will not be changed later.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													int  immutable_bind_index  =  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( immutable_samplers_enabled  & &  p_immutable_samplers . size ( )  >  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														for  ( int  k  =  0 ;  k  <  p_immutable_samplers . size ( ) ;  k + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															if  ( p_immutable_samplers [ k ] . binding  = =  layout_binding . binding )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
																immutable_bind_index  =  k ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
																break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														if  ( immutable_bind_index  > =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
															layout_binding . pImmutableSamplers  =  ( VkSampler  * ) & p_immutable_samplers [ immutable_bind_index ] . ids [ 0 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_SAMPLER_WITH_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													layout_binding . descriptorCount  =  uniform . length ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													layout_binding . descriptorCount  =  uniform . length ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_IMAGE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													layout_binding . descriptorCount  =  uniform . length ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_TEXTURE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													layout_binding . descriptorCount  =  uniform . length ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_IMAGE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_UNIFORM_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_STORAGE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  UNIFORM_TYPE_INPUT_ATTACHMENT :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													layout_binding . descriptorType  =  VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													DEV_ASSERT ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_set_bindings . write [ i ] . push_back ( layout_binding ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Modules.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  res ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  error_text ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  decompressed_code ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Vector < uint8_t >  decoded_spirv ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkShaderModule  vk_module ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( int  i  =  0 ;  i  <  shader_refl . stages_vector . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  RenderingShaderContainer : : Shader  & shader  =  p_shader_container - > shaders [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										bool  requires_decompression  =  ( shader . code_decompressed_size  >  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( requires_decompression )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decompressed_code . resize ( shader . code_decompressed_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											bool  decompressed  =  p_shader_container - > decompress_code ( shader . code_compressed_bytes . ptr ( ) ,  shader . code_compressed_bytes . size ( ) ,  shader . code_compression_flags ,  decompressed_code . ptrw ( ) ,  decompressed_code . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( ! decompressed )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												error_text  =  vformat ( " Failed to decompress code on shader stage %s. " ,  String ( SHADER_STAGE_NAMES [ shader_refl . stages_vector [ i ] ] ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										const  uint8_t  * smolv_input  =  requires_decompression  ?  decompressed_code . ptr ( )  :  shader . code_compressed_bytes . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  smolv_input_size  =  requires_decompression  ?  decompressed_code . size ( )  :  shader . code_compressed_bytes . size ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-08-27 11:58:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( shader . code_compression_flags  &  RenderingShaderContainerVulkan : : COMPRESSION_FLAG_SMOLV )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decoded_spirv . resize ( smolv : : GetDecodedBufferSize ( smolv_input ,  smolv_input_size ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( decoded_spirv . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												error_text  =  vformat ( " Malformed smolv input on shader stage %s. " ,  String ( SHADER_STAGE_NAMES [ shader_refl . stages_vector [ i ] ] ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-08-27 11:58:28 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( ! smolv : : Decode ( smolv_input ,  smolv_input_size ,  decoded_spirv . ptrw ( ) ,  decoded_spirv . size ( ) ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												error_text  =  vformat ( " Malformed smolv input on shader stage %s. " ,  String ( SHADER_STAGE_NAMES [ shader_refl . stages_vector [ i ] ] ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											decoded_spirv . resize ( smolv_input_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											memcpy ( decoded_spirv . ptrw ( ) ,  smolv_input ,  decoded_spirv . size ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkShaderModuleCreateInfo  shader_module_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										shader_module_create_info . sType  =  VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										shader_module_create_info . codeSize  =  decoded_spirv . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										shader_module_create_info . pCode  =  ( const  uint32_t  * ) ( decoded_spirv . ptr ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										res  =  vkCreateShaderModule ( vk_device ,  & shader_module_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SHADER_MODULE ) ,  & vk_module ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( res  ! =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											error_text  =  vformat ( " Error (%d) creating module for shader stage %s. " ,  res ,  String ( SHADER_STAGE_NAMES [ shader_refl . stages_vector [ i ] ] ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPipelineShaderStageCreateInfo  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										create_info . stage  =  RD_STAGE_TO_VK_SHADER_STAGE_BITS [ shader_refl . stages_vector [ i ] ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										create_info . module  =  vk_module ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pName  =  " main " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										shader_info . vk_stages_create_info . push_back ( create_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Descriptor sets.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( error_text . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  shader_refl . uniform_sets . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											// Empty ones are fine if they were not used according to spec (binding count will be 0).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkDescriptorSetLayoutCreateInfo  layout_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											layout_create_info . sType  =  VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											layout_create_info . bindingCount  =  vk_set_bindings [ i ] . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											layout_create_info . pBindings  =  vk_set_bindings [ i ] . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkDescriptorSetLayout  layout  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											res  =  vkCreateDescriptorSetLayout ( vk_device ,  & layout_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT ) ,  & layout ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											if  ( res )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												error_text  =  vformat ( " Error (%d) creating descriptor set layout for set %d. " ,  res ,  i ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											shader_info . vk_descriptor_set_layouts . push_back ( layout ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( error_text . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Pipeline layout.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPipelineLayoutCreateInfo  pipeline_layout_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pipeline_layout_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										pipeline_layout_create_info . setLayoutCount  =  shader_info . vk_descriptor_set_layouts . size ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										pipeline_layout_create_info . pSetLayouts  =  shader_info . vk_descriptor_set_layouts . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( shader_refl . push_constant_size  >  0 )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											VkPushConstantRange  * push_constant_range  =  ALLOCA_SINGLE ( VkPushConstantRange ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* push_constant_range  =  { } ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											push_constant_range - > stageFlags  =  shader_info . vk_push_constant_stages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											push_constant_range - > size  =  shader_refl . push_constant_size ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											pipeline_layout_create_info . pushConstantRangeCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pipeline_layout_create_info . pPushConstantRanges  =  push_constant_range ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										res  =  vkCreatePipelineLayout ( vk_device ,  & pipeline_layout_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE_LAYOUT ) ,  & shader_info . vk_pipeline_layout ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( res  ! =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											error_text  =  vformat ( " Error (%d) creating pipeline layout. " ,  res ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! error_text . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Clean up if failed.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  shader_info . vk_stages_create_info . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vkDestroyShaderModule ( vk_device ,  shader_info . vk_stages_create_info [ i ] . module ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SHADER_MODULE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  shader_info . vk_descriptor_set_layouts . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vkDestroyDescriptorSetLayout ( vk_device ,  shader_info . vk_descriptor_set_layouts [ i ] ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_V_MSG ( ShaderID ( ) ,  error_text ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ShaderInfo  * shader_info_ptr  =  VersatileResource : : allocate < ShaderInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* shader_info_ptr  =  shader_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  ShaderID ( shader_info_ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : shader_free ( ShaderID  p_shader )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ShaderInfo  * shader_info  =  ( ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  shader_info - > vk_descriptor_set_layouts . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroyDescriptorSetLayout ( vk_device ,  shader_info - > vk_descriptor_set_layouts [ i ] ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyPipelineLayout ( vk_device ,  shader_info - > vk_pipeline_layout ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE_LAYOUT ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									shader_destroy_modules ( p_shader ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free ( resources_allocator ,  shader_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : shader_destroy_modules ( ShaderID  p_shader )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ShaderInfo  * si  =  ( ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  si - > vk_stages_create_info . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( si - > vk_stages_create_info [ i ] . module )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vkDestroyShaderModule ( vk_device ,  si - > vk_stages_create_info [ i ] . module , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_SHADER_MODULE ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											si - > vk_stages_create_info [ i ] . module  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									si - > vk_stages_create_info . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/*********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** UNIFORM SET ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*********************/  
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								VkDescriptorPool  RenderingDeviceDriverVulkan : : _descriptor_set_pool_find_or_create ( const  DescriptorSetPoolKey  & p_key ,  DescriptorSetPools : : Iterator  * r_pool_sets_it ,  int  p_linear_pool_index )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  linear_pool  =  p_linear_pool_index  > =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DescriptorSetPools : : Iterator  pool_sets_it  =  linear_pool  ?  linear_descriptor_set_pools [ p_linear_pool_index ] . find ( p_key )  :  descriptor_set_pools . find ( p_key ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( pool_sets_it )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( KeyValue < VkDescriptorPool ,  uint32_t >  & E  :  pool_sets_it - > value )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( E . value  <  max_descriptor_sets_per_pool )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												* r_pool_sets_it  =  pool_sets_it ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												return  E . key ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Create a new one.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Here comes more vulkan API strangeness.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDescriptorPoolSize  * vk_sizes  =  ALLOCA_ARRAY ( VkDescriptorPoolSize ,  UNIFORM_TYPE_MAX ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  vk_sizes_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkDescriptorPoolSize  * curr_vk_size  =  vk_sizes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_SAMPLER ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_SAMPLER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_SAMPLER ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_SAMPLER_WITH_TEXTURE ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_SAMPLER_WITH_TEXTURE ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_TEXTURE ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_TEXTURE ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_IMAGE ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_IMAGE ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_TEXTURE_BUFFER ]  | |  p_key . uniform_type [ UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  ( p_key . uniform_type [ UNIFORM_TYPE_TEXTURE_BUFFER ]  +  p_key . uniform_type [ UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER ] )  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_IMAGE_BUFFER ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_IMAGE_BUFFER ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_UNIFORM_BUFFER ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_UNIFORM_BUFFER ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_STORAGE_BUFFER ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_STORAGE_BUFFER ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_key . uniform_type [ UNIFORM_TYPE_INPUT_ATTACHMENT ] )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* curr_vk_size  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > type  =  VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size - > descriptorCount  =  p_key . uniform_type [ UNIFORM_TYPE_INPUT_ATTACHMENT ]  *  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											curr_vk_size + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_sizes_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DEV_ASSERT ( vk_sizes_count  < =  UNIFORM_TYPE_MAX ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDescriptorPoolCreateInfo  descriptor_set_pool_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_pool_create_info . sType  =  VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( linear_descriptor_pools_enabled  & &  linear_pool )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										descriptor_set_pool_create_info . flags  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										descriptor_set_pool_create_info . flags  =  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT ;  // Can't think how somebody may NOT need this flag.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									descriptor_set_pool_create_info . maxSets  =  max_descriptor_sets_per_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_pool_create_info . poolSizeCount  =  vk_sizes_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_pool_create_info . pPoolSizes  =  vk_sizes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDescriptorPool  vk_pool  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  res  =  vkCreateDescriptorPool ( vk_device ,  & descriptor_set_pool_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_POOL ) ,  & vk_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									if  ( res )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( res ,  VK_NULL_HANDLE ,  " vkCreateDescriptorPool failed with error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! pool_sets_it )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( linear_pool )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pool_sets_it  =  linear_descriptor_set_pools [ p_linear_pool_index ] . insert ( p_key ,  HashMap < VkDescriptorPool ,  uint32_t > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											pool_sets_it  =  descriptor_set_pools . insert ( p_key ,  HashMap < VkDescriptorPool ,  uint32_t > ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									HashMap < VkDescriptorPool ,  uint32_t >  & pool_rcs  =  pool_sets_it - > value ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pool_rcs . insert ( vk_pool ,  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_pool_sets_it  =  pool_sets_it ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  vk_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : _descriptor_set_pool_unreference ( DescriptorSetPools : : Iterator  p_pool_sets_it ,  VkDescriptorPool  p_vk_descriptor_pool ,  int  p_linear_pool_index )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									HashMap < VkDescriptorPool ,  uint32_t > : : Iterator  pool_rcs_it  =  p_pool_sets_it - > value . find ( p_vk_descriptor_pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pool_rcs_it - > value - - ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( pool_rcs_it - > value  = =  0 )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroyDescriptorPool ( vk_device ,  p_vk_descriptor_pool ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_POOL ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										p_pool_sets_it - > value . erase ( p_vk_descriptor_pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_pool_sets_it - > value . is_empty ( ) )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( linear_descriptor_pools_enabled  & &  p_linear_pool_index  > =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												linear_descriptor_set_pools [ p_linear_pool_index ] . remove ( p_pool_sets_it ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												descriptor_set_pools . remove ( p_pool_sets_it ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : UniformSetID  RenderingDeviceDriverVulkan : : uniform_set_create ( VectorView < BoundUniform >  p_uniforms ,  ShaderID  p_shader ,  uint32_t  p_set_index ,  int  p_linear_pool_index )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( ! linear_descriptor_pools_enabled )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										p_linear_pool_index  =  - 1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									DescriptorSetPoolKey  pool_key ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Immutable samplers will be skipped so we need to track the number of vk_writes used.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkWriteDescriptorSet  * vk_writes  =  ALLOCA_ARRAY ( VkWriteDescriptorSet ,  p_uniforms . size ( ) ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint32_t  writes_amount  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_uniforms . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  BoundUniform  & uniform  =  p_uniforms [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_writes [ writes_amount ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_writes [ writes_amount ] . sType  =  VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  num_descriptors  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										switch  ( uniform . type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_SAMPLER :  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												if  ( uniform . immutable_sampler  & &  immutable_samplers_enabled )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													continue ;  // Skipping immutable samplers.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . sampler  =  ( VkSampler ) uniform . ids [ j ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageView  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageLayout  =  VK_IMAGE_LAYOUT_UNDEFINED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_SAMPLER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_SAMPLER_WITH_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( )  /  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( ( ( const  TextureInfo  * ) uniform . ids [ j  *  2  +  1 ] . id ) - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . sampler  =  ( VkSampler ) uniform . ids [ j  *  2  +  0 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageView  =  ( ( const  TextureInfo  * ) uniform . ids [ j  *  2  +  1 ] . id ) - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageLayout  =  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( ( ( const  TextureInfo  * ) uniform . ids [ j ] . id ) - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageView  =  ( ( const  TextureInfo  * ) uniform . ids [ j ] . id ) - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageLayout  =  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_IMAGE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													if  ( ( ( const  TextureInfo  * ) uniform . ids [ j ] . id ) - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageView  =  ( ( const  TextureInfo  * ) uniform . ids [ j ] . id ) - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageLayout  =  VK_IMAGE_LAYOUT_GENERAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_TEXTURE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorBufferInfo  * vk_buf_infos  =  ALLOCA_ARRAY ( VkDescriptorBufferInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkBufferView  * vk_buf_views  =  ALLOCA_ARRAY ( VkBufferView ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) uniform . ids [ j ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ] . buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ] . range  =  buf_info - > size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_views [ j ]  =  buf_info - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pBufferInfo  =  vk_buf_infos ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pTexelBufferView  =  vk_buf_views ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( )  /  2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorBufferInfo  * vk_buf_infos  =  ALLOCA_ARRAY ( VkDescriptorBufferInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkBufferView  * vk_buf_views  =  ALLOCA_ARRAY ( VkBufferView ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  num_descriptors ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . sampler  =  ( VkSampler ) uniform . ids [ j  *  2  +  0 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) uniform . ids [ j  *  2  +  1 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ] . buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_infos [ j ] . range  =  buf_info - > size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_buf_views [ j ]  =  buf_info - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pBufferInfo  =  vk_buf_infos ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pTexelBufferView  =  vk_buf_views ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_IMAGE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												CRASH_NOW_MSG ( " Unimplemented! " ) ;  // TODO.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_UNIFORM_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) uniform . ids [ 0 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorBufferInfo  * vk_buf_info  =  ALLOCA_SINGLE ( VkDescriptorBufferInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												* vk_buf_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_buf_info - > buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_buf_info - > range  =  buf_info - > size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pBufferInfo  =  vk_buf_info ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_STORAGE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) uniform . ids [ 0 ] . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorBufferInfo  * vk_buf_info  =  ALLOCA_SINGLE ( VkDescriptorBufferInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												* vk_buf_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_buf_info - > buffer  =  buf_info - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_buf_info - > range  =  buf_info - > size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pBufferInfo  =  vk_buf_info ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											case  UNIFORM_TYPE_INPUT_ATTACHMENT :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												num_descriptors  =  uniform . ids . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VkDescriptorImageInfo  * vk_img_infos  =  ALLOCA_ARRAY ( VkDescriptorImageInfo ,  num_descriptors ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												for  ( uint32_t  j  =  0 ;  j  <  uniform . ids . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageView  =  ( ( const  TextureInfo  * ) uniform . ids [ j ] . id ) - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_img_infos [ j ] . imageLayout  =  VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . descriptorType  =  VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_writes [ writes_amount ] . pImageInfo  =  vk_img_infos ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												DEV_ASSERT ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_writes [ writes_amount ] . dstBinding  =  uniform . binding ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_writes [ writes_amount ] . descriptorCount  =  num_descriptors ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_COND_V_MSG ( pool_key . uniform_type [ uniform . type ]  = =  MAX_UNIFORM_POOL_ELEMENT ,  UniformSetID ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												" Uniform set reached the limit of bindings for the same type ( "  +  itos ( MAX_UNIFORM_POOL_ELEMENT )  +  " ). " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pool_key . uniform_type [ uniform . type ]  + =  num_descriptors ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										writes_amount + + ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Need a descriptor pool.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-21 11:19:04 -05:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									DescriptorSetPools : : Iterator  pool_sets_it ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkDescriptorPool  vk_pool  =  _descriptor_set_pool_find_or_create ( pool_key ,  & pool_sets_it ,  p_linear_pool_index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( vk_pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pool_sets_it - > value [ vk_pool ] + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDescriptorSetAllocateInfo  descriptor_set_allocate_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_allocate_info . sType  =  VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_allocate_info . descriptorPool  =  vk_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_allocate_info . descriptorSetCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									descriptor_set_allocate_info . pSetLayouts  =  & shader_info - > vk_descriptor_set_layouts [ p_set_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDescriptorSet  vk_descriptor_set  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkResult  res  =  vkAllocateDescriptorSets ( vk_device ,  & descriptor_set_allocate_info ,  & vk_descriptor_set ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( res )  { 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										_descriptor_set_pool_unreference ( pool_sets_it ,  vk_pool ,  p_linear_pool_index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										ERR_FAIL_V_MSG ( UniformSetID ( ) ,  " Cannot allocate descriptor sets, error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  writes_amount ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										vk_writes [ i ] . dstSet  =  vk_descriptor_set ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkUpdateDescriptorSets ( vk_device ,  writes_amount ,  vk_writes ,  0 ,  nullptr ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Bookkeep.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									UniformSetInfo  * usi  =  VersatileResource : : allocate < UniformSetInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									usi - > vk_descriptor_set  =  vk_descriptor_set ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_linear_pool_index  > =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										usi - > vk_linear_descriptor_pool  =  vk_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										usi - > vk_descriptor_pool  =  vk_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									usi - > pool_sets_it  =  pool_sets_it ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  UniformSetID ( usi ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : uniform_set_free ( UniformSetID  p_uniform_set )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									UniformSetInfo  * usi  =  ( UniformSetInfo  * ) p_uniform_set . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( usi - > vk_linear_descriptor_pool )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Nothing to do. All sets are freed at once using vkResetDescriptorPool.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// We can NOT decrease the reference count (i.e. call _descriptor_set_pool_unreference())
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// because the pool is linear (i.e. the freed set can't be recycled) and further calls to
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// _descriptor_set_pool_find_or_create() need usi->pool_sets_it->value to stay so that we can
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// tell if the pool has ran out of space and we need to create a new pool.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkFreeDescriptorSets ( vk_device ,  usi - > vk_descriptor_pool ,  1 ,  & usi - > vk_descriptor_set ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_descriptor_set_pool_unreference ( usi - > pool_sets_it ,  usi - > vk_descriptor_pool ,  - 1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free ( resources_allocator ,  usi ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : uniform_sets_have_linear_pools ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : linear_uniform_set_pools_reset ( int  p_linear_pool_index )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( linear_descriptor_pools_enabled )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DescriptorSetPools  & pools_to_reset  =  linear_descriptor_set_pools [ p_linear_pool_index ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										DescriptorSetPools : : Iterator  curr_pool  =  pools_to_reset . begin ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										while  ( curr_pool  ! =  pools_to_reset . end ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											HashMap < VkDescriptorPool ,  uint32_t > : : Iterator  curr_pair  =  curr_pool - > value . begin ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											while  ( curr_pair  ! =  curr_pool - > value . end ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vkResetDescriptorPool ( vk_device ,  curr_pair - > key ,  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												curr_pair - > value  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												+ + curr_pair ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											+ + curr_pool ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								// ----- COMMANDS -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_uniform_set_prepare_for_use ( CommandBufferID  p_cmd_buffer ,  UniformSetID  p_uniform_set ,  ShaderID  p_shader ,  uint32_t  p_set_index )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** TRANSFER ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ARRAYS_COMPATIBLE_FIELDWISE ( RDD : : BufferCopyRegion ,  VkBufferCopy ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _texture_subresource_range_to_vk ( const  RDD : : TextureSubresourceRange  & p_subresources ,  VkImageSubresourceRange  * r_vk_subreources )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_vk_subreources  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > aspectMask  =  ( VkImageAspectFlags ) p_subresources . aspect ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > baseMipLevel  =  p_subresources . base_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > levelCount  =  p_subresources . mipmap_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > baseArrayLayer  =  p_subresources . base_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > layerCount  =  p_subresources . layer_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _texture_subresource_layers_to_vk ( const  RDD : : TextureSubresourceLayers  & p_subresources ,  VkImageSubresourceLayers  * r_vk_subreources )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_vk_subreources  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > aspectMask  =  ( VkImageAspectFlags ) p_subresources . aspect ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > mipLevel  =  p_subresources . mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > baseArrayLayer  =  p_subresources . base_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_subreources - > layerCount  =  p_subresources . layer_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _buffer_texture_copy_region_to_vk ( const  RDD : : BufferTextureCopyRegion  & p_copy_region ,  VkBufferImageCopy  * r_vk_copy_region )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_vk_copy_region  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > bufferOffset  =  p_copy_region . buffer_offset ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_texture_subresource_layers_to_vk ( p_copy_region . texture_subresources ,  & r_vk_copy_region - > imageSubresource ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageOffset . x  =  p_copy_region . texture_offset . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageOffset . y  =  p_copy_region . texture_offset . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageOffset . z  =  p_copy_region . texture_offset . z ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageExtent . width  =  p_copy_region . texture_region_size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageExtent . height  =  p_copy_region . texture_region_size . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > imageExtent . depth  =  p_copy_region . texture_region_size . z ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _texture_copy_region_to_vk ( const  RDD : : TextureCopyRegion  & p_copy_region ,  VkImageCopy  * r_vk_copy_region )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_vk_copy_region  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_texture_subresource_layers_to_vk ( p_copy_region . src_subresources ,  & r_vk_copy_region - > srcSubresource ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > srcOffset . x  =  p_copy_region . src_offset . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > srcOffset . y  =  p_copy_region . src_offset . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > srcOffset . z  =  p_copy_region . src_offset . z ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_texture_subresource_layers_to_vk ( p_copy_region . dst_subresources ,  & r_vk_copy_region - > dstSubresource ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > dstOffset . x  =  p_copy_region . dst_offset . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > dstOffset . y  =  p_copy_region . dst_offset . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > dstOffset . z  =  p_copy_region . dst_offset . z ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > extent . width  =  p_copy_region . size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > extent . height  =  p_copy_region . size . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_copy_region - > extent . depth  =  p_copy_region . size . z ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_clear_buffer ( CommandBufferID  p_cmd_buffer ,  BufferID  p_buffer ,  uint64_t  p_offset ,  uint64_t  p_size )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdFillBuffer ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  p_offset ,  p_size ,  0 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_copy_buffer ( CommandBufferID  p_cmd_buffer ,  BufferID  p_src_buffer ,  BufferID  p_dst_buffer ,  VectorView < BufferCopyRegion >  p_regions )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * src_buf_info  =  ( const  BufferInfo  * ) p_src_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * dst_buf_info  =  ( const  BufferInfo  * ) p_dst_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdCopyBuffer ( command_buffer - > vk_command_buffer ,  src_buf_info - > vk_buffer ,  dst_buf_info - > vk_buffer ,  p_regions . size ( ) ,  ( const  VkBufferCopy  * ) p_regions . ptr ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_copy_texture ( CommandBufferID  p_cmd_buffer ,  TextureID  p_src_texture ,  TextureLayout  p_src_texture_layout ,  TextureID  p_dst_texture ,  TextureLayout  p_dst_texture_layout ,  VectorView < TextureCopyRegion >  p_regions )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageCopy  * vk_copy_regions  =  ALLOCA_ARRAY ( VkImageCopy ,  p_regions . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_regions . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_texture_copy_region_to_vk ( p_regions [ i ] ,  & vk_copy_regions [ i ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * src_tex_info  =  ( const  TextureInfo  * ) p_src_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * dst_tex_info  =  ( const  TextureInfo  * ) p_dst_texture . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( src_tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_copy_texture. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( dst_tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_texture. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdCopyImage ( command_buffer - > vk_command_buffer ,  src_tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_src_texture_layout ] ,  dst_tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_dst_texture_layout ] ,  p_regions . size ( ) ,  vk_copy_regions ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_resolve_texture ( CommandBufferID  p_cmd_buffer ,  TextureID  p_src_texture ,  TextureLayout  p_src_texture_layout ,  uint32_t  p_src_layer ,  uint32_t  p_src_mipmap ,  TextureID  p_dst_texture ,  TextureLayout  p_dst_texture_layout ,  uint32_t  p_dst_layer ,  uint32_t  p_dst_mipmap )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * src_tex_info  =  ( const  TextureInfo  * ) p_src_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * dst_tex_info  =  ( const  TextureInfo  * ) p_dst_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageResolve  vk_resolve  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . srcSubresource . aspectMask  =  VK_IMAGE_ASPECT_COLOR_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . srcSubresource . mipLevel  =  p_src_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . srcSubresource . baseArrayLayer  =  p_src_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . srcSubresource . layerCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . dstSubresource . aspectMask  =  VK_IMAGE_ASPECT_COLOR_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . dstSubresource . mipLevel  =  p_dst_mipmap ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . dstSubresource . baseArrayLayer  =  p_dst_layer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . dstSubresource . layerCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . extent . width  =  MAX ( 1u ,  src_tex_info - > vk_create_info . extent . width  > >  p_src_mipmap ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . extent . height  =  MAX ( 1u ,  src_tex_info - > vk_create_info . extent . height  > >  p_src_mipmap ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_resolve . extent . depth  =  MAX ( 1u ,  src_tex_info - > vk_create_info . extent . depth  > >  p_src_mipmap ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( src_tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_resolve_texture. Use a resolve store action pass instead. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( dst_tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_resolve_texture. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdResolveImage ( command_buffer - > vk_command_buffer ,  src_tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_src_texture_layout ] ,  dst_tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_dst_texture_layout ] ,  1 ,  & vk_resolve ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_clear_color_texture ( CommandBufferID  p_cmd_buffer ,  TextureID  p_texture ,  TextureLayout  p_texture_layout ,  const  Color  & p_color ,  const  TextureSubresourceRange  & p_subresources )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkClearColorValue  vk_color  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									memcpy ( & vk_color . float32 ,  p_color . components ,  sizeof ( VkClearColorValue : : float32 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkImageSubresourceRange  vk_subresources  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_texture_subresource_range_to_vk ( p_subresources ,  & vk_subresources ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_texture . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_texture must not be used in command_clear_color_texture. Use a clear store action pass instead. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdClearColorImage ( command_buffer - > vk_command_buffer ,  tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_texture_layout ] ,  & vk_color ,  1 ,  & vk_subresources ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_copy_buffer_to_texture ( CommandBufferID  p_cmd_buffer ,  BufferID  p_src_buffer ,  TextureID  p_dst_texture ,  TextureLayout  p_dst_texture_layout ,  VectorView < BufferTextureCopyRegion >  p_regions )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferImageCopy  * vk_copy_regions  =  ALLOCA_ARRAY ( VkBufferImageCopy ,  p_regions . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_regions . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_buffer_texture_copy_region_to_vk ( p_regions [ i ] ,  & vk_copy_regions [ i ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_src_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_dst_texture . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_buffer_to_texture. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdCopyBufferToImage ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_dst_texture_layout ] ,  p_regions . size ( ) ,  vk_copy_regions ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_copy_texture_to_buffer ( CommandBufferID  p_cmd_buffer ,  TextureID  p_src_texture ,  TextureLayout  p_src_texture_layout ,  BufferID  p_dst_buffer ,  VectorView < BufferTextureCopyRegion >  p_regions )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkBufferImageCopy  * vk_copy_regions  =  ALLOCA_ARRAY ( VkBufferImageCopy ,  p_regions . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_regions . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_buffer_texture_copy_region_to_vk ( p_regions [ i ] ,  & vk_copy_regions [ i ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_src_texture . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_dst_buffer . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# ifdef DEBUG_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( tex_info - > transient )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ERR_PRINT ( " TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_copy_texture_to_buffer. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdCopyImageToBuffer ( command_buffer - > vk_command_buffer ,  tex_info - > vk_view_create_info . image ,  RD_TO_VK_LAYOUT [ p_src_texture_layout ] ,  buf_info - > vk_buffer ,  p_regions . size ( ) ,  vk_copy_regions ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** PIPELINE ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : pipeline_free ( PipelineID  p_pipeline )  {  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyPipeline ( vk_device ,  ( VkPipeline ) p_pipeline . id ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- BINDING -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_push_constants ( CommandBufferID  p_cmd_buffer ,  ShaderID  p_shader ,  uint32_t  p_dst_first_index ,  VectorView < uint32_t >  p_data )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdPushConstants ( command_buffer - > vk_command_buffer ,  shader_info - > vk_pipeline_layout ,  shader_info - > vk_push_constant_stages ,  p_dst_first_index  *  sizeof ( uint32_t ) ,  p_data . size ( )  *  sizeof ( uint32_t ) ,  p_data . ptr ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- CACHE -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								int  RenderingDeviceDriverVulkan : : caching_instance_count  =  0 ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : pipeline_cache_create ( const  Vector < uint8_t >  & p_data )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( caching_instance_count )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										WARN_PRINT ( " There's already a RenderingDeviceDriverVulkan instance doing PSO caching. Only one can at the same time. This one won't. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									caching_instance_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipelines_cache . current_size  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipelines_cache . buffer . resize ( sizeof ( PipelineCacheHeader ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Parse.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
									
										
										
										
											2024-03-05 14:11:32 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										if  ( p_data . is_empty ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// No pre-existing cache, just create it.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  else  if  ( p_data . size ( )  < =  ( int ) sizeof ( PipelineCacheHeader ) )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-08-04 02:17:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											print_verbose ( " Invalid/corrupt Vulkan pipelines cache. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay. " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  PipelineCacheHeader  * loaded_header  =  reinterpret_cast < const  PipelineCacheHeader  * > ( p_data . ptr ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( loaded_header - > magic  ! =  868  +  VK_PIPELINE_CACHE_HEADER_VERSION_ONE )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-08-04 02:17:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												print_verbose ( " Invalid Vulkan pipelines cache magic number. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay. " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  uint8_t  * loaded_buffer_start  =  p_data . ptr ( )  +  sizeof ( PipelineCacheHeader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												uint32_t  loaded_buffer_size  =  p_data . size ( )  -  sizeof ( PipelineCacheHeader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  PipelineCacheHeader  * current_header  =  ( PipelineCacheHeader  * ) pipelines_cache . buffer . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( loaded_header - > data_hash  ! =  hash_murmur3_buffer ( loaded_buffer_start ,  loaded_buffer_size )  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														loaded_header - > data_size  ! =  loaded_buffer_size  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														loaded_header - > vendor_id  ! =  current_header - > vendor_id  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														loaded_header - > device_id  ! =  current_header - > device_id  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														loaded_header - > driver_version  ! =  current_header - > driver_version  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														memcmp ( loaded_header - > uuid ,  current_header - > uuid ,  VK_UUID_SIZE )  ! =  0  | | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
														loaded_header - > driver_abi  ! =  current_header - > driver_abi )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-08-04 02:17:27 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
													print_verbose ( " Invalid Vulkan pipelines cache header. This may be due to an engine change, GPU change or graphics driver version change. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay. " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
												}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													pipelines_cache . current_size  =  loaded_buffer_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													pipelines_cache . buffer  =  p_data ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Create.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPipelineCacheCreateInfo  cache_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cache_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cache_info . initialDataSize  =  pipelines_cache . buffer . size ( )  -  sizeof ( PipelineCacheHeader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										cache_info . pInitialData  =  pipelines_cache . buffer . ptr ( )  +  sizeof ( PipelineCacheHeader ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkResult  err  =  vkCreatePipelineCache ( vk_device ,  & cache_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE_CACHE ) ,  & pipelines_cache . vk_cache ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										if  ( err  ! =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											WARN_PRINT ( " vkCreatePipelinecache failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : pipeline_cache_free ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( pipelines_cache . vk_cache ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyPipelineCache ( vk_device ,  pipelines_cache . vk_cache ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE_CACHE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									pipelines_cache . vk_cache  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( caching_instance_count  >  0 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									caching_instance_count - - ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								size_t  RenderingDeviceDriverVulkan : : pipeline_cache_query_size ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( pipelines_cache . vk_cache ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// FIXME:
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// We're letting the cache grow unboundedly. We may want to set at limit and see if implementations use LRU or the like.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// If we do, we won't be able to assume any longer that the cache is dirty if, and only if, it has grown.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkGetPipelineCacheData ( vk_device ,  pipelines_cache . vk_cache ,  & pipelines_cache . current_size ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  0 ,  " vkGetPipelineCacheData failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  pipelines_cache . current_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								Vector < uint8_t >  RenderingDeviceDriverVulkan : : pipeline_cache_serialize ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( pipelines_cache . vk_cache ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipelines_cache . buffer . resize ( pipelines_cache . current_size  +  sizeof ( PipelineCacheHeader ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  err  =  vkGetPipelineCacheData ( vk_device ,  pipelines_cache . vk_cache ,  & pipelines_cache . current_size ,  pipelines_cache . buffer . ptrw ( )  +  sizeof ( PipelineCacheHeader ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( err  ! =  VK_SUCCESS  & &  err  ! =  VK_INCOMPLETE ,  Vector < uint8_t > ( ) ) ;  // Incomplete is OK because the cache may have grown since the size was queried (unless when exiting).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// The real buffer size may now be bigger than the updated current_size.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// We take into account the new size but keep the buffer resized in a worst-case fashion.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									PipelineCacheHeader  * header  =  ( PipelineCacheHeader  * ) pipelines_cache . buffer . ptrw ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > data_size  =  pipelines_cache . current_size ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									header - > data_hash  =  hash_murmur3_buffer ( pipelines_cache . buffer . ptr ( )  +  sizeof ( PipelineCacheHeader ) ,  pipelines_cache . current_size ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  pipelines_cache . buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** RENDERING ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*******************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- SUBPASS -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::AttachmentLoadOp == VkAttachmentLoadOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : ATTACHMENT_LOAD_OP_LOAD ,  VK_ATTACHMENT_LOAD_OP_LOAD ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : ATTACHMENT_LOAD_OP_CLEAR ,  VK_ATTACHMENT_LOAD_OP_CLEAR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : ATTACHMENT_LOAD_OP_DONT_CARE ,  VK_ATTACHMENT_LOAD_OP_DONT_CARE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::AttachmentStoreOp == VkAttachmentStoreOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : ATTACHMENT_STORE_OP_STORE ,  VK_ATTACHMENT_STORE_OP_STORE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : ATTACHMENT_STORE_OP_DONT_CARE ,  VK_ATTACHMENT_STORE_OP_DONT_CARE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// Assuming Vulkan and RDD's are backed by uint32_t in:
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// - VkSubpassDescription2::pPreserveAttachments and RDD::Subpass::preserve_attachments.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// - VkRenderPassCreateInfo2KHR::pCorrelatedViewMasks and p_view_correlation_mask.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  void  _attachment_reference_to_vk ( const  RDD : : AttachmentReference  & p_attachment_reference ,  VkAttachmentReference2KHR  * r_vk_attachment_reference )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									* r_vk_attachment_reference  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_attachment_reference - > sType  =  VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									r_vk_attachment_reference - > attachment  =  p_attachment_reference . attachment ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									r_vk_attachment_reference - > layout  =  RD_TO_VK_LAYOUT [ p_attachment_reference . layout ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									r_vk_attachment_reference - > aspectMask  =  ( VkImageAspectFlags ) p_attachment_reference . aspect ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RDD : : RenderPassID  RenderingDeviceDriverVulkan : : render_pass_create ( VectorView < Attachment >  p_attachments ,  VectorView < Subpass >  p_subpasses ,  VectorView < SubpassDependency >  p_subpass_dependencies ,  uint32_t  p_view_count ,  AttachmentReference  p_fragment_density_map_attachment )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									// These are only used if we use multiview but we need to define them in scope.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  uint32_t  view_mask  =  ( 1  < <  p_view_count )  -  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  uint32_t  correlation_mask  =  ( 1  < <  p_view_count )  -  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkAttachmentDescription2KHR  * vk_attachments  =  ALLOCA_ARRAY ( VkAttachmentDescription2KHR ,  p_attachments . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_attachments . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . sType  =  VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . format  =  RD_TO_VK_FORMAT [ p_attachments [ i ] . format ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . samples  =  _ensure_supported_sample_count ( p_attachments [ i ] . samples ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . loadOp  =  ( VkAttachmentLoadOp ) p_attachments [ i ] . load_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . storeOp  =  ( VkAttachmentStoreOp ) p_attachments [ i ] . store_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . stencilLoadOp  =  ( VkAttachmentLoadOp ) p_attachments [ i ] . stencil_load_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . stencilStoreOp  =  ( VkAttachmentStoreOp ) p_attachments [ i ] . stencil_store_op ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_attachments [ i ] . initialLayout  =  RD_TO_VK_LAYOUT [ p_attachments [ i ] . initial_layout ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_attachments [ i ] . finalLayout  =  RD_TO_VK_LAYOUT [ p_attachments [ i ] . final_layout ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSubpassDescription2KHR  * vk_subpasses  =  ALLOCA_ARRAY ( VkSubpassDescription2KHR ,  p_subpasses . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_subpasses . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkAttachmentReference2KHR  * vk_subpass_input_attachments  =  ALLOCA_ARRAY ( VkAttachmentReference2KHR ,  p_subpasses [ i ] . input_references . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  j  =  0 ;  j  <  p_subpasses [ i ] . input_references . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_attachment_reference_to_vk ( p_subpasses [ i ] . input_references [ j ] ,  & vk_subpass_input_attachments [ j ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkAttachmentReference2KHR  * vk_subpass_color_attachments  =  ALLOCA_ARRAY ( VkAttachmentReference2KHR ,  p_subpasses [ i ] . color_references . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  j  =  0 ;  j  <  p_subpasses [ i ] . color_references . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_attachment_reference_to_vk ( p_subpasses [ i ] . color_references [ j ] ,  & vk_subpass_color_attachments [ j ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkAttachmentReference2KHR  * vk_subpass_resolve_attachments  =  ALLOCA_ARRAY ( VkAttachmentReference2KHR ,  p_subpasses [ i ] . resolve_references . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  j  =  0 ;  j  <  p_subpasses [ i ] . resolve_references . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_attachment_reference_to_vk ( p_subpasses [ i ] . resolve_references [ j ] ,  & vk_subpass_resolve_attachments [ j ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkAttachmentReference2KHR  * vk_subpass_depth_stencil_attachment  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_subpasses [ i ] . depth_stencil_reference . attachment  ! =  AttachmentReference : : UNUSED )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_subpass_depth_stencil_attachment  =  ALLOCA_SINGLE ( VkAttachmentReference2KHR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_attachment_reference_to_vk ( p_subpasses [ i ] . depth_stencil_reference ,  vk_subpass_depth_stencil_attachment ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . sType  =  VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pipelineBindPoint  =  VK_PIPELINE_BIND_POINT_GRAPHICS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . viewMask  =  p_view_count  = =  1  ?  0  :  view_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . inputAttachmentCount  =  p_subpasses [ i ] . input_references . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pInputAttachments  =  vk_subpass_input_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . colorAttachmentCount  =  p_subpasses [ i ] . color_references . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pColorAttachments  =  vk_subpass_color_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pResolveAttachments  =  vk_subpass_resolve_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pDepthStencilAttachment  =  vk_subpass_depth_stencil_attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . preserveAttachmentCount  =  p_subpasses [ i ] . preserve_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpasses [ i ] . pPreserveAttachments  =  p_subpasses [ i ] . preserve_attachments . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										// Fragment shading rate.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( fsr_capabilities . attachment_supported  & &  p_subpasses [ i ] . fragment_shading_rate_reference . attachment  ! =  AttachmentReference : : UNUSED )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkAttachmentReference2KHR  * vk_subpass_fsr_attachment  =  ALLOCA_SINGLE ( VkAttachmentReference2KHR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* vk_subpass_fsr_attachment  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_subpass_fsr_attachment - > sType  =  VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_subpass_fsr_attachment - > attachment  =  p_subpasses [ i ] . fragment_shading_rate_reference . attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_subpass_fsr_attachment - > layout  =  VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											VkFragmentShadingRateAttachmentInfoKHR  * vk_fsr_info  =  ALLOCA_SINGLE ( VkFragmentShadingRateAttachmentInfoKHR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* vk_fsr_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_fsr_info - > sType  =  VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_fsr_info - > pFragmentShadingRateAttachment  =  vk_subpass_fsr_attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_fsr_info - > shadingRateAttachmentTexelSize . width  =  p_subpasses [ i ] . fragment_shading_rate_texel_size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_fsr_info - > shadingRateAttachmentTexelSize . height  =  p_subpasses [ i ] . fragment_shading_rate_texel_size . y ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											vk_subpasses [ i ] . pNext  =  vk_fsr_info ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSubpassDependency2KHR  * vk_subpass_dependencies  =  ALLOCA_ARRAY ( VkSubpassDependency2KHR ,  p_subpass_dependencies . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_subpass_dependencies . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . sType  =  VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . srcSubpass  =  p_subpass_dependencies [ i ] . src_subpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . dstSubpass  =  p_subpass_dependencies [ i ] . dst_subpass ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-04-24 14:30:48 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . srcStageMask  =  _rd_to_vk_pipeline_stages ( p_subpass_dependencies [ i ] . src_stages ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . dstStageMask  =  _rd_to_vk_pipeline_stages ( p_subpass_dependencies [ i ] . dst_stages ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . srcAccessMask  =  _rd_to_vk_access_flags ( p_subpass_dependencies [ i ] . src_access ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_subpass_dependencies [ i ] . dstAccessMask  =  _rd_to_vk_access_flags ( p_subpass_dependencies [ i ] . dst_access ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkRenderPassCreateInfo2KHR  create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . attachmentCount  =  p_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pAttachments  =  vk_attachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . subpassCount  =  p_subpasses . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pSubpasses  =  vk_subpasses ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . dependencyCount  =  p_subpass_dependencies . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pDependencies  =  vk_subpass_dependencies ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . correlatedViewMaskCount  =  p_view_count  = =  1  ?  0  :  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									create_info . pCorrelatedViewMasks  =  p_view_count  = =  1  ?  nullptr  :  & correlation_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Multiview.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_view_count  >  1  & &  device_functions . CreateRenderPass2KHR  = =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// This is only required when not using vkCreateRenderPass2.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										// We add it if vkCreateRenderPass2KHR is not supported,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// resulting this in being passed to our vkCreateRenderPass fallback.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  * vk_view_masks  =  ALLOCA_ARRAY ( uint32_t ,  p_subpasses . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_subpasses . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_view_masks [ i ]  =  view_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkRenderPassMultiviewCreateInfo  * multiview_create_info  =  ALLOCA_SINGLE ( VkRenderPassMultiviewCreateInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										* multiview_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multiview_create_info - > sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multiview_create_info - > subpassCount  =  p_subpasses . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multiview_create_info - > pViewMasks  =  vk_view_masks ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multiview_create_info - > correlationMaskCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multiview_create_info - > pCorrelationMasks  =  & correlation_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pNext  =  multiview_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Fragment density map.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									bool  uses_fragment_density_map  =  fdm_capabilities . attachment_supported  & &  p_fragment_density_map_attachment . attachment  ! =  AttachmentReference : : UNUSED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( uses_fragment_density_map )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkRenderPassFragmentDensityMapCreateInfoEXT  * vk_fdm_info  =  ALLOCA_SINGLE ( VkRenderPassFragmentDensityMapCreateInfoEXT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_fdm_info - > sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_fdm_info - > fragmentDensityMapAttachment . attachment  =  p_fragment_density_map_attachment . attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_fdm_info - > fragmentDensityMapAttachment . layout  =  RD_TO_VK_LAYOUT [ p_fragment_density_map_attachment . layout ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_fdm_info - > pNext  =  create_info . pNext ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										create_info . pNext  =  vk_fdm_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkRenderPass  vk_render_pass  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  res  =  _create_render_pass ( vk_device ,  & create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_RENDER_PASS ) ,  & vk_render_pass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( res ,  RenderPassID ( ) ,  " vkCreateRenderPass2KHR failed with error  "  +  itos ( res )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass  =  VersatileResource : : allocate < RenderPassInfo > ( resources_allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass - > vk_render_pass  =  vk_render_pass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  RenderPassID ( render_pass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : render_pass_free ( RenderPassID  p_render_pass )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass  =  ( RenderPassInfo  * ) ( p_render_pass . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkDestroyRenderPass ( vk_device ,  render_pass - > vk_render_pass ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_RENDER_PASS ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VersatileResource : : free < RenderPassInfo > ( resources_allocator ,  render_pass ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- COMMANDS -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ARRAYS_COMPATIBLE_FIELDWISE ( RDD : : RenderPassClearValue ,  VkClearValue ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_begin_render_pass ( CommandBufferID  p_cmd_buffer ,  RenderPassID  p_render_pass ,  FramebufferID  p_framebuffer ,  CommandBufferType  p_cmd_buffer_type ,  const  Rect2i  & p_rect ,  VectorView < RenderPassClearValue >  p_clear_values )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									RenderPassInfo  * render_pass  =  ( RenderPassInfo  * ) ( p_render_pass . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									Framebuffer  * framebuffer  =  ( Framebuffer  * ) ( p_framebuffer . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( framebuffer - > swap_chain_acquired )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkImageMemoryBarrier  image_barrier  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . sType  =  VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . dstAccessMask  =  VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . newLayout  =  VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . srcQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . dstQueueFamilyIndex  =  VK_QUEUE_FAMILY_IGNORED ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . image  =  framebuffer - > swap_chain_image ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										image_barrier . subresourceRange  =  framebuffer - > swap_chain_image_subresource_range ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkCmdPipelineBarrier ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ,  VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT ,  0 ,  0 ,  nullptr ,  0 ,  nullptr ,  1 ,  & image_barrier ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										framebuffer - > swap_chain_acquired  =  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkRenderPassBeginInfo  render_pass_begin  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . sType  =  VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									render_pass_begin . renderPass  =  render_pass - > vk_render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-18 14:23:37 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									render_pass_begin . framebuffer  =  framebuffer - > vk_framebuffer ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . renderArea . offset . x  =  p_rect . position . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . renderArea . offset . y  =  p_rect . position . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . renderArea . extent . width  =  p_rect . size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . renderArea . extent . height  =  p_rect . size . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . clearValueCount  =  p_clear_values . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									render_pass_begin . pClearValues  =  ( const  VkClearValue  * ) p_clear_values . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkSubpassContents  vk_subpass_contents  =  p_cmd_buffer_type  = =  COMMAND_BUFFER_TYPE_PRIMARY  ?  VK_SUBPASS_CONTENTS_INLINE  :  VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBeginRenderPass ( command_buffer - > vk_command_buffer ,  & render_pass_begin ,  vk_subpass_contents ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_buffer - > active_framebuffer  =  framebuffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_buffer - > active_render_pass  =  render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( vformat ( " vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX " ,  p_render_pass . id ,  p_framebuffer . id ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_end_render_pass ( CommandBufferID  p_cmd_buffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									CommandBufferInfo  * command_buffer  =  ( CommandBufferInfo  * ) ( p_cmd_buffer . id ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( command_buffer - > active_framebuffer  ! =  nullptr  & &  " A framebuffer must be active. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( command_buffer - > active_render_pass  ! =  nullptr  & &  " A render pass must be active. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdEndRenderPass ( command_buffer - > vk_command_buffer ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_buffer - > active_render_pass  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									command_buffer - > active_framebuffer  =  nullptr ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-22 16:11:26 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if PRINT_NATIVE_COMMANDS 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									print_line ( " vkCmdEndRenderPass " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_next_render_subpass ( CommandBufferID  p_cmd_buffer ,  CommandBufferType  p_cmd_buffer_type )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkSubpassContents  vk_subpass_contents  =  p_cmd_buffer_type  = =  COMMAND_BUFFER_TYPE_PRIMARY  ?  VK_SUBPASS_CONTENTS_INLINE  :  VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdNextSubpass ( command_buffer - > vk_command_buffer ,  vk_subpass_contents ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_set_viewport ( CommandBufferID  p_cmd_buffer ,  VectorView < Rect2i >  p_viewports )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkViewport  * vk_viewports  =  ALLOCA_ARRAY ( VkViewport ,  p_viewports . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_viewports . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . x  =  p_viewports [ i ] . position . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . y  =  p_viewports [ i ] . position . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . width  =  p_viewports [ i ] . size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . height  =  p_viewports [ i ] . size . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . minDepth  =  0.0f ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_viewports [ i ] . maxDepth  =  1.0f ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdSetViewport ( command_buffer - > vk_command_buffer ,  0 ,  p_viewports . size ( ) ,  vk_viewports ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_set_scissor ( CommandBufferID  p_cmd_buffer ,  VectorView < Rect2i >  p_scissors )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdSetScissor ( command_buffer - > vk_command_buffer ,  0 ,  p_scissors . size ( ) ,  ( VkRect2D  * ) p_scissors . ptr ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_clear_attachments ( CommandBufferID  p_cmd_buffer ,  VectorView < AttachmentClear >  p_attachment_clears ,  VectorView < Rect2i >  p_rects )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkClearAttachment  * vk_clears  =  ALLOCA_ARRAY ( VkClearAttachment ,  p_attachment_clears . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_attachment_clears . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_clears [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memcpy ( & vk_clears [ i ] . clearValue ,  & p_attachment_clears [ i ] . value ,  sizeof ( VkClearValue ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_clears [ i ] . colorAttachment  =  p_attachment_clears [ i ] . color_attachment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_clears [ i ] . aspectMask  =  p_attachment_clears [ i ] . aspect ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkClearRect  * vk_rects  =  ALLOCA_ARRAY ( VkClearRect ,  p_rects . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_rects . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . rect . offset . x  =  p_rects [ i ] . position . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . rect . offset . y  =  p_rects [ i ] . position . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . rect . extent . width  =  p_rects [ i ] . size . x ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . rect . extent . height  =  p_rects [ i ] . size . y ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . baseArrayLayer  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_rects [ i ] . layerCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdClearAttachments ( command_buffer - > vk_command_buffer ,  p_attachment_clears . size ( ) ,  vk_clears ,  p_rects . size ( ) ,  vk_rects ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_render_pipeline ( CommandBufferID  p_cmd_buffer ,  PipelineID  p_pipeline )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdBindPipeline ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_GRAPHICS ,  ( VkPipeline ) p_pipeline . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_render_uniform_set ( CommandBufferID  p_cmd_buffer ,  UniformSetID  p_uniform_set ,  ShaderID  p_shader ,  uint32_t  p_set_index )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  UniformSetInfo  * usi  =  ( const  UniformSetInfo  * ) p_uniform_set . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindDescriptorSets ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_GRAPHICS ,  shader_info - > vk_pipeline_layout ,  p_set_index ,  1 ,  & usi - > vk_descriptor_set ,  0 ,  nullptr ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_render_uniform_sets ( CommandBufferID  p_cmd_buffer ,  VectorView < UniformSetID >  p_uniform_sets ,  ShaderID  p_shader ,  uint32_t  p_first_set_index ,  uint32_t  p_set_count )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_set_count  = =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									thread_local  LocalVector < VkDescriptorSet >  sets ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sets . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sets . resize ( p_set_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_set_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										sets [ i ]  =  ( ( const  UniformSetInfo  * ) p_uniform_sets [ i ] . id ) - > vk_descriptor_set ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindDescriptorSets ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_GRAPHICS ,  shader_info - > vk_pipeline_layout ,  p_first_set_index ,  p_set_count ,  & sets [ 0 ] ,  0 ,  nullptr ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw ( CommandBufferID  p_cmd_buffer ,  uint32_t  p_vertex_count ,  uint32_t  p_instance_count ,  uint32_t  p_base_vertex ,  uint32_t  p_first_instance )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdDraw ( command_buffer - > vk_command_buffer ,  p_vertex_count ,  p_instance_count ,  p_base_vertex ,  p_first_instance ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw_indexed ( CommandBufferID  p_cmd_buffer ,  uint32_t  p_index_count ,  uint32_t  p_instance_count ,  uint32_t  p_first_index ,  int32_t  p_vertex_offset ,  uint32_t  p_first_instance )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdDrawIndexed ( command_buffer - > vk_command_buffer ,  p_index_count ,  p_instance_count ,  p_first_index ,  p_vertex_offset ,  p_first_instance ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw_indexed_indirect ( CommandBufferID  p_cmd_buffer ,  BufferID  p_indirect_buffer ,  uint64_t  p_offset ,  uint32_t  p_draw_count ,  uint32_t  p_stride )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_indirect_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdDrawIndexedIndirect ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  p_offset ,  p_draw_count ,  p_stride ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw_indexed_indirect_count ( CommandBufferID  p_cmd_buffer ,  BufferID  p_indirect_buffer ,  uint64_t  p_offset ,  BufferID  p_count_buffer ,  uint64_t  p_count_buffer_offset ,  uint32_t  p_max_draw_count ,  uint32_t  p_stride )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * indirect_buf_info  =  ( const  BufferInfo  * ) p_indirect_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * count_buf_info  =  ( const  BufferInfo  * ) p_count_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdDrawIndexedIndirectCount ( command_buffer - > vk_command_buffer ,  indirect_buf_info - > vk_buffer ,  p_offset ,  count_buf_info - > vk_buffer ,  p_count_buffer_offset ,  p_max_draw_count ,  p_stride ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw_indirect ( CommandBufferID  p_cmd_buffer ,  BufferID  p_indirect_buffer ,  uint64_t  p_offset ,  uint32_t  p_draw_count ,  uint32_t  p_stride )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_indirect_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdDrawIndirect ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  p_offset ,  p_draw_count ,  p_stride ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_draw_indirect_count ( CommandBufferID  p_cmd_buffer ,  BufferID  p_indirect_buffer ,  uint64_t  p_offset ,  BufferID  p_count_buffer ,  uint64_t  p_count_buffer_offset ,  uint32_t  p_max_draw_count ,  uint32_t  p_stride )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * indirect_buf_info  =  ( const  BufferInfo  * ) p_indirect_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * count_buf_info  =  ( const  BufferInfo  * ) p_count_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdDrawIndirectCount ( command_buffer - > vk_command_buffer ,  indirect_buf_info - > vk_buffer ,  p_offset ,  count_buf_info - > vk_buffer ,  p_count_buffer_offset ,  p_max_draw_count ,  p_stride ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_bind_vertex_buffers ( CommandBufferID  p_cmd_buffer ,  uint32_t  p_binding_count ,  const  BufferID  * p_buffers ,  const  uint64_t  * p_offsets )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkBuffer  * vk_buffers  =  ALLOCA_ARRAY ( VkBuffer ,  p_binding_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_binding_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_buffers [ i ]  =  ( ( const  BufferInfo  * ) p_buffers [ i ] . id ) - > vk_buffer ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindVertexBuffers ( command_buffer - > vk_command_buffer ,  0 ,  p_binding_count ,  vk_buffers ,  p_offsets ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_bind_index_buffer ( CommandBufferID  p_cmd_buffer ,  BufferID  p_buffer ,  IndexBufferFormat  p_format ,  uint64_t  p_offset )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindIndexBuffer ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  p_offset ,  p_format  = =  INDEX_BUFFER_FORMAT_UINT16  ?  VK_INDEX_TYPE_UINT16  :  VK_INDEX_TYPE_UINT32 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_set_blend_constants ( CommandBufferID  p_cmd_buffer ,  const  Color  & p_constants )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdSetBlendConstants ( command_buffer - > vk_command_buffer ,  p_constants . components ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_render_set_line_width ( CommandBufferID  p_cmd_buffer ,  float  p_width )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdSetLineWidth ( command_buffer - > vk_command_buffer ,  p_width ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- PIPELINE -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static  const  VkPrimitiveTopology  RD_TO_VK_PRIMITIVE [ RDD : : RENDER_PRIMITIVE_MAX ]  =  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_POINT_LIST , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_LINE_LIST , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_LINE_STRIP , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VK_PRIMITIVE_TOPOLOGY_PATCH_LIST , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								} ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::PolygonCullMode == VkCullModeFlagBits.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : POLYGON_CULL_DISABLED ,  VK_CULL_MODE_NONE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : POLYGON_CULL_FRONT ,  VK_CULL_MODE_FRONT_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : POLYGON_CULL_BACK ,  VK_CULL_MODE_BACK_BIT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::StencilOperation == VkStencilOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_KEEP ,  VK_STENCIL_OP_KEEP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_ZERO ,  VK_STENCIL_OP_ZERO ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_REPLACE ,  VK_STENCIL_OP_REPLACE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_INCREMENT_AND_CLAMP ,  VK_STENCIL_OP_INCREMENT_AND_CLAMP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_DECREMENT_AND_CLAMP ,  VK_STENCIL_OP_DECREMENT_AND_CLAMP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_INVERT ,  VK_STENCIL_OP_INVERT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_INCREMENT_AND_WRAP ,  VK_STENCIL_OP_INCREMENT_AND_WRAP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : STENCIL_OP_DECREMENT_AND_WRAP ,  VK_STENCIL_OP_DECREMENT_AND_WRAP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::LogicOperation == VkLogicOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_CLEAR ,  VK_LOGIC_OP_CLEAR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_AND ,  VK_LOGIC_OP_AND ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_AND_REVERSE ,  VK_LOGIC_OP_AND_REVERSE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_COPY ,  VK_LOGIC_OP_COPY ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_AND_INVERTED ,  VK_LOGIC_OP_AND_INVERTED ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_NO_OP ,  VK_LOGIC_OP_NO_OP ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_XOR ,  VK_LOGIC_OP_XOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_OR ,  VK_LOGIC_OP_OR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_NOR ,  VK_LOGIC_OP_NOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_EQUIVALENT ,  VK_LOGIC_OP_EQUIVALENT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_INVERT ,  VK_LOGIC_OP_INVERT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_OR_REVERSE ,  VK_LOGIC_OP_OR_REVERSE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_COPY_INVERTED ,  VK_LOGIC_OP_COPY_INVERTED ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_OR_INVERTED ,  VK_LOGIC_OP_OR_INVERTED ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_NAND ,  VK_LOGIC_OP_NAND ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : LOGIC_OP_SET ,  VK_LOGIC_OP_SET ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::BlendFactor == VkBlendFactor.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ZERO ,  VK_BLEND_FACTOR_ZERO ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE ,  VK_BLEND_FACTOR_ONE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_SRC_COLOR ,  VK_BLEND_FACTOR_SRC_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_SRC_COLOR ,  VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_DST_COLOR ,  VK_BLEND_FACTOR_DST_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_DST_COLOR ,  VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_SRC_ALPHA ,  VK_BLEND_FACTOR_SRC_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ,  VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_DST_ALPHA ,  VK_BLEND_FACTOR_DST_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_DST_ALPHA ,  VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_CONSTANT_COLOR ,  VK_BLEND_FACTOR_CONSTANT_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR ,  VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_CONSTANT_ALPHA ,  VK_BLEND_FACTOR_CONSTANT_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA ,  VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_SRC_ALPHA_SATURATE ,  VK_BLEND_FACTOR_SRC_ALPHA_SATURATE ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_SRC1_COLOR ,  VK_BLEND_FACTOR_SRC1_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ,  VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_SRC1_ALPHA ,  VK_BLEND_FACTOR_SRC1_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA ,  VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// RDD::BlendOperation == VkBlendOp.
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_OP_ADD ,  VK_BLEND_OP_ADD ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_OP_SUBTRACT ,  VK_BLEND_OP_SUBTRACT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_OP_REVERSE_SUBTRACT ,  VK_BLEND_OP_REVERSE_SUBTRACT ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_OP_MINIMUM ,  VK_BLEND_OP_MIN ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								static_assert ( ENUM_MEMBERS_EQUAL ( RDD : : BLEND_OP_MAXIMUM ,  VK_BLEND_OP_MAX ) ) ;  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : PipelineID  RenderingDeviceDriverVulkan : : render_pipeline_create (  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										ShaderID  p_shader , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VertexFormatID  p_vertex_format , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RenderPrimitive  p_render_primitive , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										PipelineRasterizationState  p_rasterization_state , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										PipelineMultisampleState  p_multisample_state , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										PipelineDepthStencilState  p_depth_stencil_state , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										PipelineColorBlendState  p_blend_state , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VectorView < int32_t >  p_color_attachments , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										BitField < PipelineDynamicStateFlags >  p_dynamic_state , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										RenderPassID  p_render_pass , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  p_render_subpass , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VectorView < PipelineSpecializationConstant >  p_specialization_constants )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Vertex.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  VkPipelineVertexInputStateCreateInfo  * vertex_input_state_create_info  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_vertex_format . id )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  VertexFormatInfo  * vf_info  =  ( const  VertexFormatInfo  * ) p_vertex_format . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vertex_input_state_create_info  =  & vf_info - > vk_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkPipelineVertexInputStateCreateInfo  * null_vertex_input_state  =  ALLOCA_SINGLE ( VkPipelineVertexInputStateCreateInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										* null_vertex_input_state  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										null_vertex_input_state - > sType  =  VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vertex_input_state_create_info  =  null_vertex_input_state ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Input assembly.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineInputAssemblyStateCreateInfo  input_assembly_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									input_assembly_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									input_assembly_create_info . topology  =  RD_TO_VK_PRIMITIVE [ p_render_primitive ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									input_assembly_create_info . primitiveRestartEnable  =  ( p_render_primitive  = =  RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Tessellation.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineTessellationStateCreateInfo  tessellation_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									tessellation_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V ( physical_device_properties . limits . maxTessellationPatchSize  >  0  & &  ( p_rasterization_state . patch_control_points  <  1  | |  p_rasterization_state . patch_control_points  >  physical_device_properties . limits . maxTessellationPatchSize ) ,  PipelineID ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									tessellation_create_info . patchControlPoints  =  p_rasterization_state . patch_control_points ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Viewport.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineViewportStateCreateInfo  viewport_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									viewport_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									viewport_state_create_info . viewportCount  =  1 ;  // If VR extensions are supported at some point, this will have to be customizable in the framebuffer format.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									viewport_state_create_info . scissorCount  =  1 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Rasterization.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineRasterizationStateCreateInfo  rasterization_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . depthClampEnable  =  p_rasterization_state . enable_depth_clamp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . rasterizerDiscardEnable  =  p_rasterization_state . discard_primitives ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . polygonMode  =  p_rasterization_state . wireframe  ?  VK_POLYGON_MODE_LINE  :  VK_POLYGON_MODE_FILL ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . cullMode  =  ( PolygonCullMode ) p_rasterization_state . cull_mode ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . frontFace  =  ( p_rasterization_state . front_face  = =  POLYGON_FRONT_FACE_CLOCKWISE  ?  VK_FRONT_FACE_CLOCKWISE  :  VK_FRONT_FACE_COUNTER_CLOCKWISE ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . depthBiasEnable  =  p_rasterization_state . depth_bias_enabled ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . depthBiasConstantFactor  =  p_rasterization_state . depth_bias_constant_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . depthBiasClamp  =  p_rasterization_state . depth_bias_clamp ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . depthBiasSlopeFactor  =  p_rasterization_state . depth_bias_slope_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									rasterization_state_create_info . lineWidth  =  p_rasterization_state . line_width ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Multisample.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineMultisampleStateCreateInfo  multisample_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . rasterizationSamples  =  _ensure_supported_sample_count ( p_multisample_state . sample_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . sampleShadingEnable  =  p_multisample_state . enable_sample_shading ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . minSampleShading  =  p_multisample_state . min_sample_shading ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_multisample_state . sample_mask . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										static_assert ( ARRAYS_COMPATIBLE ( uint32_t ,  VkSampleMask ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multisample_state_create_info . pSampleMask  =  p_multisample_state . sample_mask . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										multisample_state_create_info . pSampleMask  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . alphaToCoverageEnable  =  p_multisample_state . enable_alpha_to_coverage ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									multisample_state_create_info . alphaToOneEnable  =  p_multisample_state . enable_alpha_to_one ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Depth stencil.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineDepthStencilStateCreateInfo  depth_stencil_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . depthTestEnable  =  p_depth_stencil_state . enable_depth_test ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . depthWriteEnable  =  p_depth_stencil_state . enable_depth_write ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . depthCompareOp  =  ( VkCompareOp ) p_depth_stencil_state . depth_compare_operator ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . depthBoundsTestEnable  =  p_depth_stencil_state . enable_depth_range ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . stencilTestEnable  =  p_depth_stencil_state . enable_stencil ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . failOp  =  ( VkStencilOp ) p_depth_stencil_state . front_op . fail ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . passOp  =  ( VkStencilOp ) p_depth_stencil_state . front_op . pass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . depthFailOp  =  ( VkStencilOp ) p_depth_stencil_state . front_op . depth_fail ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . compareOp  =  ( VkCompareOp ) p_depth_stencil_state . front_op . compare ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . compareMask  =  p_depth_stencil_state . front_op . compare_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . writeMask  =  p_depth_stencil_state . front_op . write_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . front . reference  =  p_depth_stencil_state . front_op . reference ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . failOp  =  ( VkStencilOp ) p_depth_stencil_state . back_op . fail ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . passOp  =  ( VkStencilOp ) p_depth_stencil_state . back_op . pass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . depthFailOp  =  ( VkStencilOp ) p_depth_stencil_state . back_op . depth_fail ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . compareOp  =  ( VkCompareOp ) p_depth_stencil_state . back_op . compare ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . compareMask  =  p_depth_stencil_state . back_op . compare_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . writeMask  =  p_depth_stencil_state . back_op . write_mask ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . back . reference  =  p_depth_stencil_state . back_op . reference ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . minDepthBounds  =  p_depth_stencil_state . depth_range_min ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									depth_stencil_state_create_info . maxDepthBounds  =  p_depth_stencil_state . depth_range_max ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Blend state.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineColorBlendStateCreateInfo  color_blend_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . logicOpEnable  =  p_blend_state . enable_logic_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . logicOp  =  ( VkLogicOp ) p_blend_state . logic_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineColorBlendAttachmentState  * vk_attachment_states  =  ALLOCA_ARRAY ( VkPipelineColorBlendAttachmentState ,  p_color_attachments . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_color_attachments . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_attachment_states [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( p_color_attachments [ i ]  ! =  ATTACHMENT_UNUSED )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . blendEnable  =  p_blend_state . attachments [ i ] . enable_blend ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . srcColorBlendFactor  =  ( VkBlendFactor ) p_blend_state . attachments [ i ] . src_color_blend_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . dstColorBlendFactor  =  ( VkBlendFactor ) p_blend_state . attachments [ i ] . dst_color_blend_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . colorBlendOp  =  ( VkBlendOp ) p_blend_state . attachments [ i ] . color_blend_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . srcAlphaBlendFactor  =  ( VkBlendFactor ) p_blend_state . attachments [ i ] . src_alpha_blend_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . dstAlphaBlendFactor  =  ( VkBlendFactor ) p_blend_state . attachments [ i ] . dst_alpha_blend_factor ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vk_attachment_states [ i ] . alphaBlendOp  =  ( VkBlendOp ) p_blend_state . attachments [ i ] . alpha_blend_op ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( p_blend_state . attachments [ i ] . write_r )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_attachment_states [ i ] . colorWriteMask  | =  VK_COLOR_COMPONENT_R_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( p_blend_state . attachments [ i ] . write_g )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_attachment_states [ i ] . colorWriteMask  | =  VK_COLOR_COMPONENT_G_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( p_blend_state . attachments [ i ] . write_b )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_attachment_states [ i ] . colorWriteMask  | =  VK_COLOR_COMPONENT_B_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( p_blend_state . attachments [ i ] . write_a )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													vk_attachment_states [ i ] . colorWriteMask  | =  VK_COLOR_COMPONENT_A_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . attachmentCount  =  p_color_attachments . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . pAttachments  =  vk_attachment_states ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . blendConstants [ 0 ]  =  p_blend_state . blend_constant . r ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . blendConstants [ 1 ]  =  p_blend_state . blend_constant . g ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . blendConstants [ 2 ]  =  p_blend_state . blend_constant . b ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									color_blend_state_create_info . blendConstants [ 3 ]  =  p_blend_state . blend_constant . a ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Dynamic state.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipelineDynamicStateCreateInfo  dynamic_state_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									dynamic_state_create_info . sType  =  VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									static  const  uint32_t  MAX_DYN_STATE_COUNT  =  9 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDynamicState  * vk_dynamic_states  =  ALLOCA_ARRAY ( VkDynamicState ,  MAX_DYN_STATE_COUNT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  vk_dynamic_states_count  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_VIEWPORT ;  // Viewport and scissor are always dynamic.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_SCISSOR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_LINE_WIDTH ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_LINE_WIDTH ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_DEPTH_BIAS ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_DEPTH_BIAS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_BLEND_CONSTANTS ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_BLEND_CONSTANTS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_DEPTH_BOUNDS ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_DEPTH_BOUNDS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_STENCIL_COMPARE_MASK ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_STENCIL_WRITE_MASK ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_STENCIL_WRITE_MASK ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_dynamic_state . has_flag ( DYNAMIC_STATE_STENCIL_REFERENCE ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states [ vk_dynamic_states_count ]  =  VK_DYNAMIC_STATE_STENCIL_REFERENCE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_dynamic_states_count + + ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( vk_dynamic_states_count  < =  MAX_DYN_STATE_COUNT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									dynamic_state_create_info . dynamicStateCount  =  vk_dynamic_states_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									dynamic_state_create_info . pDynamicStates  =  vk_dynamic_states ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									void  * graphics_pipeline_nextptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( fsr_capabilities . attachment_supported )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Fragment shading rate.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// If FSR is used, this defines how the different FSR types are combined.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) FSR.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// combinerOps[1] decides how we use the output of combinerOps[0] and our attachment FSR.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										VkPipelineFragmentShadingRateStateCreateInfoKHR  * fsr_create_info  =  ALLOCA_SINGLE ( VkPipelineFragmentShadingRateStateCreateInfoKHR ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										* fsr_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_create_info - > sType  =  VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_create_info - > fragmentSize  =  {  4 ,  4  } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_create_info - > combinerOps [ 0 ]  =  VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR ;  // We don't use pipeline/primitive FSR so this really doesn't matter.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										fsr_create_info - > combinerOps [ 1 ]  =  VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR ;  // Always use the outcome of attachment FSR if enabled.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										graphics_pipeline_nextptr  =  fsr_create_info ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Finally, pipeline create info.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkGraphicsPipelineCreateInfo  pipeline_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . sType  =  VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pNext  =  graphics_pipeline_nextptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . stageCount  =  shader_info - > vk_stages_create_info . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( pipeline_create_info . stageCount  = =  0 ,  PipelineID ( ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" Cannot create pipeline without shader module, please make sure shader modules are destroyed only after all associated pipelines are created. " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									VkPipelineShaderStageCreateInfo  * vk_pipeline_stages  =  ALLOCA_ARRAY ( VkPipelineShaderStageCreateInfo ,  shader_info - > vk_stages_create_info . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  shader_info - > vk_stages_create_info . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vk_pipeline_stages [ i ]  =  shader_info - > vk_stages_create_info [ i ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( p_specialization_constants . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSpecializationMapEntry  * specialization_map_entries  =  ALLOCA_ARRAY ( VkSpecializationMapEntry ,  p_specialization_constants . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  j  =  0 ;  j  <  p_specialization_constants . size ( ) ;  j + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												specialization_map_entries [ j ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												specialization_map_entries [ j ] . constantID  =  p_specialization_constants [ j ] . constant_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												specialization_map_entries [ j ] . offset  =  ( const  char  * ) & p_specialization_constants [ j ] . int_value  -  ( const  char  * ) p_specialization_constants . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												specialization_map_entries [ j ] . size  =  sizeof ( uint32_t ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkSpecializationInfo  * specialization_info  =  ALLOCA_SINGLE ( VkSpecializationInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											* specialization_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_info - > dataSize  =  p_specialization_constants . size ( )  *  sizeof ( PipelineSpecializationConstant ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_info - > pData  =  p_specialization_constants . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_info - > mapEntryCount  =  p_specialization_constants . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_info - > pMapEntries  =  specialization_map_entries ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											vk_pipeline_stages [ i ] . pSpecializationInfo  =  specialization_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  RenderPassInfo  * render_pass  =  ( const  RenderPassInfo  * ) ( p_render_pass . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pStages  =  vk_pipeline_stages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pVertexInputState  =  vertex_input_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pInputAssemblyState  =  & input_assembly_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pTessellationState  =  & tessellation_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pViewportState  =  & viewport_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pRasterizationState  =  & rasterization_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pMultisampleState  =  & multisample_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pDepthStencilState  =  & depth_stencil_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pColorBlendState  =  & color_blend_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . pDynamicState  =  & dynamic_state_create_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . layout  =  shader_info - > vk_pipeline_layout ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									pipeline_create_info . renderPass  =  render_pass - > vk_render_pass ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . subpass  =  p_render_subpass ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// ---
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipeline  vk_pipeline  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateGraphicsPipelines ( vk_device ,  pipelines_cache . vk_cache ,  1 ,  & pipeline_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE ) ,  & vk_pipeline ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-21 22:32:58 +02:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  PipelineID ( ) ,  " vkCreateGraphicsPipelines failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  PipelineID ( vk_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** COMPUTE ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- COMMANDS -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_compute_pipeline ( CommandBufferID  p_cmd_buffer ,  PipelineID  p_pipeline )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdBindPipeline ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_COMPUTE ,  ( VkPipeline ) p_pipeline . id ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_compute_uniform_set ( CommandBufferID  p_cmd_buffer ,  UniformSetID  p_uniform_set ,  ShaderID  p_shader ,  uint32_t  p_set_index )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  UniformSetInfo  * usi  =  ( const  UniformSetInfo  * ) p_uniform_set . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindDescriptorSets ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_COMPUTE ,  shader_info - > vk_pipeline_layout ,  p_set_index ,  1 ,  & usi - > vk_descriptor_set ,  0 ,  nullptr ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_bind_compute_uniform_sets ( CommandBufferID  p_cmd_buffer ,  VectorView < UniformSetID >  p_uniform_sets ,  ShaderID  p_shader ,  uint32_t  p_first_set_index ,  uint32_t  p_set_count )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_set_count  = =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									thread_local  LocalVector < VkDescriptorSet >  sets ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sets . clear ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									sets . resize ( p_set_count ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									for  ( uint32_t  i  =  0 ;  i  <  p_set_count ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										sets [ i ]  =  ( ( const  UniformSetInfo  * ) p_uniform_sets [ i ] . id ) - > vk_descriptor_set ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdBindDescriptorSets ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_BIND_POINT_COMPUTE ,  shader_info - > vk_pipeline_layout ,  p_first_set_index ,  p_set_count ,  & sets [ 0 ] ,  0 ,  nullptr ) ; 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_compute_dispatch ( CommandBufferID  p_cmd_buffer ,  uint32_t  p_x_groups ,  uint32_t  p_y_groups ,  uint32_t  p_z_groups )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdDispatch ( command_buffer - > vk_command_buffer ,  p_x_groups ,  p_y_groups ,  p_z_groups ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_compute_dispatch_indirect ( CommandBufferID  p_cmd_buffer ,  BufferID  p_indirect_buffer ,  uint64_t  p_offset )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_indirect_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdDispatchIndirect ( command_buffer - > vk_command_buffer ,  buf_info - > vk_buffer ,  p_offset ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- PIPELINE -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : PipelineID  RenderingDeviceDriverVulkan : : compute_pipeline_create ( ShaderID  p_shader ,  VectorView < PipelineSpecializationConstant >  p_specialization_constants )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_shader . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkComputePipelineCreateInfo  pipeline_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . sType  =  VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . stage  =  shader_info - > vk_stages_create_info [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									pipeline_create_info . layout  =  shader_info - > vk_pipeline_layout ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( p_specialization_constants . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSpecializationMapEntry  * specialization_map_entries  =  ALLOCA_ARRAY ( VkSpecializationMapEntry ,  p_specialization_constants . size ( ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  i  =  0 ;  i  <  p_specialization_constants . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_map_entries [ i ]  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_map_entries [ i ] . constantID  =  p_specialization_constants [ i ] . constant_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_map_entries [ i ] . offset  =  ( const  char  * ) & p_specialization_constants [ i ] . int_value  -  ( const  char  * ) p_specialization_constants . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											specialization_map_entries [ i ] . size  =  sizeof ( uint32_t ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkSpecializationInfo  * specialization_info  =  ALLOCA_SINGLE ( VkSpecializationInfo ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										* specialization_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										specialization_info - > dataSize  =  p_specialization_constants . size ( )  *  sizeof ( PipelineSpecializationConstant ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										specialization_info - > pData  =  p_specialization_constants . ptr ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										specialization_info - > mapEntryCount  =  p_specialization_constants . size ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										specialization_info - > pMapEntries  =  specialization_map_entries ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										pipeline_create_info . stage . pSpecializationInfo  =  specialization_info ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkPipeline  vk_pipeline  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkResult  err  =  vkCreateComputePipelines ( vk_device ,  pipelines_cache . vk_cache ,  1 ,  & pipeline_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_PIPELINE ) ,  & vk_pipeline ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									ERR_FAIL_COND_V_MSG ( err ,  PipelineID ( ) ,  " vkCreateComputePipelines failed with error  "  +  itos ( err )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  PipelineID ( vk_pipeline ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** QUERIES ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/*****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								// ----- TIMESTAMP -----
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RDD : : QueryPoolID  RenderingDeviceDriverVulkan : : timestamp_query_pool_create ( uint32_t  p_query_count )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkQueryPoolCreateInfo  query_pool_create_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									query_pool_create_info . sType  =  VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									query_pool_create_info . queryType  =  VK_QUERY_TYPE_TIMESTAMP ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									query_pool_create_info . queryCount  =  p_query_count ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkQueryPool  vk_query_pool  =  VK_NULL_HANDLE ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCreateQueryPool ( vk_device ,  & query_pool_create_info ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_QUERY_POOL ) ,  & vk_query_pool ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									return  RDD : : QueryPoolID ( vk_query_pool ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : timestamp_query_pool_free ( QueryPoolID  p_pool_id )  {  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkDestroyQueryPool ( vk_device ,  ( VkQueryPool ) p_pool_id . id ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_QUERY_POOL ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : timestamp_query_pool_get_results ( QueryPoolID  p_pool_id ,  uint32_t  p_query_count ,  uint64_t  * r_results )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkGetQueryPoolResults ( vk_device ,  ( VkQueryPool ) p_pool_id . id ,  0 ,  p_query_count ,  sizeof ( uint64_t )  *  p_query_count ,  r_results ,  sizeof ( uint64_t ) ,  VK_QUERY_RESULT_64_BIT ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : timestamp_query_result_to_time ( uint64_t  p_result )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// This sucks because timestampPeriod multiplier is a float, while the timestamp is 64 bits nanosecs.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// So, in cases like nvidia which give you enormous numbers and 1 as multiplier, multiplying is next to impossible.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Need to do 128 bits fixed point multiplication to get the right value.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									auto  mult64to128  =  [ ] ( uint64_t  u ,  uint64_t  v ,  uint64_t  & h ,  uint64_t  & l )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  u1  =  ( u  &  0xffffffff ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  v1  =  ( v  &  0xffffffff ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  t  =  ( u1  *  v1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  w3  =  ( t  &  0xffffffff ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  k  =  ( t  > >  32 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										u  > > =  32 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										t  =  ( u  *  v1 )  +  k ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										k  =  ( t  &  0xffffffff ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint64_t  w1  =  ( t  > >  32 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										v  > > =  32 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										t  =  ( u1  *  v )  +  k ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										k  =  ( t  > >  32 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										h  =  ( u  *  v )  +  w1  +  k ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										l  =  ( t  < <  32 )  +  w3 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  shift_bits  =  16 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint64_t  h  =  0 ,  l  =  0 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									mult64to128 ( p_result ,  uint64_t ( double ( physical_device_properties . limits . timestampPeriod )  *  double ( 1  < <  shift_bits ) ) ,  h ,  l ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									l  > > =  shift_bits ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									l  | =  h  < <  ( 64  -  shift_bits ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  l ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_timestamp_query_pool_reset ( CommandBufferID  p_cmd_buffer ,  QueryPoolID  p_pool_id ,  uint32_t  p_query_count )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdResetQueryPool ( command_buffer - > vk_command_buffer ,  ( VkQueryPool ) p_pool_id . id ,  0 ,  p_query_count ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_timestamp_write ( CommandBufferID  p_cmd_buffer ,  QueryPoolID  p_pool_id ,  uint32_t  p_index )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdWriteTimestamp ( command_buffer - > vk_command_buffer ,  VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT ,  ( VkQueryPool ) p_pool_id . id ,  p_index ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** LABELS ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_begin_label ( CommandBufferID  p_cmd_buffer ,  const  char  * p_label_name ,  const  Color  & p_color )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! functions . CmdBeginDebugUtilsLabelEXT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( functions . CmdDebugMarkerBeginEXT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Debug marker extensions.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											VkDebugMarkerMarkerInfoEXT  marker ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . sType  =  VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . pMarkerName  =  p_label_name ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . color [ 0 ]  =  p_color [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . color [ 1 ]  =  p_color [ 1 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . color [ 2 ]  =  p_color [ 2 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											marker . color [ 3 ]  =  p_color [ 3 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											functions . CmdDebugMarkerBeginEXT ( command_buffer - > vk_command_buffer ,  & marker ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									VkDebugUtilsLabelEXT  label ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . sType  =  VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . pLabelName  =  p_label_name ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . color [ 0 ]  =  p_color [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . color [ 1 ]  =  p_color [ 1 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . color [ 2 ]  =  p_color [ 2 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									label . color [ 3 ]  =  p_color [ 3 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									functions . CmdBeginDebugUtilsLabelEXT ( command_buffer - > vk_command_buffer ,  & label ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-11-24 08:23:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_end_label ( CommandBufferID  p_cmd_buffer )  {  
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  RenderingContextDriverVulkan : : Functions  & functions  =  context_driver - > functions_get ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( ! functions . CmdEndDebugUtilsLabelEXT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( functions . CmdDebugMarkerEndEXT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Debug marker extensions.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											functions . CmdDebugMarkerEndEXT ( command_buffer - > vk_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									functions . CmdEndDebugUtilsLabelEXT ( command_buffer - > vk_command_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** DEBUG *****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/****************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : command_insert_breadcrumb ( CommandBufferID  p_cmd_buffer ,  uint32_t  p_data )  {  
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( p_data  = =  BreadcrumbMarker : : NONE )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  CommandBufferInfo  * command_buffer  =  ( const  CommandBufferInfo  * ) p_cmd_buffer . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( Engine : : get_singleton ( ) - > is_accurate_breadcrumbs_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// Force a full barrier so commands are not executed in parallel.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// This will mean that the last breadcrumb to see was actually the
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										// last (group of) command to be executed (hence, the one causing the crash).
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										VkMemoryBarrier  memoryBarrier ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memoryBarrier . sType  =  VK_STRUCTURE_TYPE_MEMORY_BARRIER ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memoryBarrier . pNext  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memoryBarrier . srcAccessMask  =  VK_ACCESS_INDIRECT_COMMAND_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_INDEX_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_UNIFORM_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_INPUT_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_SHADER_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_SHADER_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_COLOR_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_TRANSFER_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_TRANSFER_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_HOST_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_HOST_WRITE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memoryBarrier . dstAccessMask  =  VK_ACCESS_INDIRECT_COMMAND_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_INDEX_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_UNIFORM_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_INPUT_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_SHADER_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_SHADER_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_COLOR_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_TRANSFER_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_TRANSFER_WRITE_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_HOST_READ_BIT  | 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_ACCESS_HOST_WRITE_BIT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vkCmdPipelineBarrier ( 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												command_buffer - > vk_command_buffer , 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												VK_PIPELINE_STAGE_ALL_COMMANDS_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												VK_PIPELINE_STAGE_ALL_COMMANDS_BIT , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												0 ,  1u ,  & memoryBarrier ,  0u ,  nullptr ,  0u ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// We write to a circular buffer. If you're getting barrier sync errors here,
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// increase the value of BREADCRUMB_BUFFER_ENTRIES.
 
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									vkCmdFillBuffer ( command_buffer - > vk_command_buffer ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > vk_buffer ,  breadcrumb_offset ,  sizeof ( uint32_t ) ,  breadcrumb_id + + ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkCmdFillBuffer ( command_buffer - > vk_command_buffer ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > vk_buffer ,  breadcrumb_offset  +  sizeof ( uint32_t ) ,  sizeof ( uint32_t ) ,  p_data ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									breadcrumb_offset  + =  sizeof ( uint32_t )  *  2u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( breadcrumb_offset  > =  BREADCRUMB_BUFFER_ENTRIES  *  sizeof ( uint32_t )  *  2u )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										breadcrumb_offset  =  0u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : on_device_lost ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( device_functions . GetDeviceFaultInfoEXT  = =  nullptr )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  " VK_EXT_device_fault not available. " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDeviceFaultCountsEXT  fault_counts  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fault_counts . sType  =  VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  vkres  =  device_functions . GetDeviceFaultInfoEXT ( vk_device ,  & fault_counts ,  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( vkres  ! =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  " vkGetDeviceFaultInfoEXT returned  "  +  itos ( vkres )  +  "  when getting fault count, skipping VK_EXT_device_fault report... " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									String  err_msg ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkDeviceFaultInfoEXT  fault_info  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fault_info . sType  =  VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fault_info . pVendorInfos  =  fault_counts . vendorInfoCount 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											?  ( VkDeviceFaultVendorInfoEXT  * ) memalloc ( fault_counts . vendorInfoCount  *  sizeof ( VkDeviceFaultVendorInfoEXT ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											:  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fault_info . pAddressInfos  = 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											fault_counts . addressInfoCount 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											?  ( VkDeviceFaultAddressInfoEXT  * ) memalloc ( fault_counts . addressInfoCount  *  sizeof ( VkDeviceFaultAddressInfoEXT ) ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											:  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fault_counts . vendorBinarySize  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vkres  =  device_functions . GetDeviceFaultInfoEXT ( vk_device ,  & fault_counts ,  & fault_info ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( vkres  ! =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  " vkGetDeviceFaultInfoEXT returned  "  +  itos ( vkres )  +  "  when getting fault info, skipping VK_EXT_device_fault report... " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err_msg  + =  " ** Report from VK_EXT_device_fault ** " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err_msg  + =  " \n Description:  "  +  String ( fault_info . description ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err_msg  + =  " \n Vendor infos: " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  vd  =  0 ;  vd  <  fault_counts . vendorInfoCount ;  + + vd )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkDeviceFaultVendorInfoEXT  * vendor_info  =  & fault_info . pVendorInfos [ vd ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n Info  "  +  itos ( vd ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Description:  "  +  String ( vendor_info - > description ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Fault code :  "  +  itos ( vendor_info - > vendorFaultCode ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Fault data :  "  +  itos ( vendor_info - > vendorFaultData ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										static  constexpr  const  char  * addressTypeNames [ ]  =  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" NONE " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" READ_INVALID " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" WRITE_INVALID " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" EXECUTE_INVALID " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" INSTRUCTION_POINTER_UNKNOWN " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" INSTRUCTION_POINTER_INVALID " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											" INSTRUCTION_POINTER_FAULT " , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										err_msg  + =  " \n Addresses info: " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( uint32_t  ad  =  0 ;  ad  <  fault_counts . addressInfoCount ;  + + ad )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkDeviceFaultAddressInfoEXT  * addr_info  =  & fault_info . pAddressInfos [ ad ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// From https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDeviceFaultAddressInfoEXT.html
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkDeviceAddress  lower  =  ( addr_info - > reportedAddress  &  ~ ( addr_info - > addressPrecision  -  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  VkDeviceAddress  upper  =  ( addr_info - > reportedAddress  |  ( addr_info - > addressPrecision  -  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n Info  "  +  itos ( ad ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Type            :  "  +  String ( addressTypeNames [ addr_info - > addressType ] ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Reported address:  "  +  itos ( addr_info - > reportedAddress ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Lower address   :  "  +  itos ( lower ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Upper address   :  "  +  itos ( upper ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											err_msg  + =  " \n    Precision       :  "  +  itos ( addr_info - > addressPrecision ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  err_msg ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fault_info . pVendorInfos )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memfree ( fault_info . pVendorInfos ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( fault_info . pAddressInfos )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										memfree ( fault_info . pAddressInfos ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-08-24 17:39:46 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  context_driver - > get_driver_and_device_memory_report ( ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : print_lost_device_info ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										String  error_msg  =  " Printing last known breadcrumbs in reverse order (last executed first). " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										if  ( ! Engine : : get_singleton ( ) - > is_accurate_breadcrumbs_enabled ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											error_msg  + =  " \n Some of them might be inaccurate. Try running with --accurate-breadcrumbs for precise information. " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  error_msg ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint8_t  * breadcrumb_ptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VkResult  map_result  =  VK_SUCCESS ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaFlushAllocation ( allocator ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > allocation . handle ,  0 ,  BREADCRUMB_BUFFER_ENTRIES  *  sizeof ( uint32_t )  *  2u ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaInvalidateAllocation ( allocator ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > allocation . handle ,  0 ,  BREADCRUMB_BUFFER_ENTRIES  *  sizeof ( uint32_t )  *  2u ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										void  * ptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										map_result  =  vmaMapMemory ( allocator ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > allocation . handle ,  & ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										breadcrumb_ptr  =  reinterpret_cast < uint8_t  * > ( ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( breadcrumb_ptr  & &  map_result  = =  VK_SUCCESS )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										uint32_t  last_breadcrumb_offset  =  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										{ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_err_print_error_asap ( " Searching last breadcrumb. We've sent up to ID:  "  +  itos ( breadcrumb_id  -  1u ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// Scan the whole buffer to find the offset with the highest ID.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// That means that was the last one to be written.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											//
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// We use "breadcrumb_id - id" to account for wraparound.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// e.g. breadcrumb_id = 2 and id = 4294967294; then 2 - 4294967294 = 4.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// The one with the smallest difference is the closest to breadcrumb_id, which means it's
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// the last written command.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  biggest_id  =  0u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											uint32_t  smallest_id_diff  =  std : : numeric_limits < uint32_t > : : max ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  * breadcrumb_ptr32  =  reinterpret_cast < const  uint32_t  * > ( breadcrumb_ptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( size_t  i  =  0u ;  i  <  BREADCRUMB_BUFFER_ENTRIES ;  + + i )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  uint32_t  id  =  breadcrumb_ptr32 [ i  *  2u ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												const  uint32_t  id_diff  =  breadcrumb_id  -  id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												if  ( id_diff  <  smallest_id_diff )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													biggest_id  =  i ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													smallest_id_diff  =  id_diff ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_err_print_error_asap ( " Last breadcrumb ID found:  "  +  itos ( breadcrumb_ptr32 [ biggest_id  *  2u ] ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											last_breadcrumb_offset  =  biggest_id  *  sizeof ( uint32_t )  *  2u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										const  size_t  entries_to_print  =  8u ;  // Note: The value is arbitrary.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( size_t  i  =  0u ;  i  <  entries_to_print ;  + + i )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  last_breadcrumb  =  * reinterpret_cast < uint32_t  * > ( breadcrumb_ptr  +  last_breadcrumb_offset  +  sizeof ( uint32_t ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  phase  =  last_breadcrumb  &  uint32_t ( ~ ( ( 1  < <  16 )  -  1 ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  uint32_t  user_data  =  last_breadcrumb  &  ( ( 1  < <  16 )  -  1 ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											String  error_msg  =  " Last known breadcrumb:  " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											switch  ( phase )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : ALPHA_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " ALPHA_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : BLIT_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " BLIT_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : DEBUG_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " DEBUG_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : LIGHTMAPPER_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " LIGHTMAPPER_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : OPAQUE_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " OPAQUE_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : POST_PROCESSING_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " POST_PROCESSING_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : REFLECTION_PROBES : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " REFLECTION_PROBES " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : SHADOW_PASS_CUBE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " SHADOW_PASS_CUBE " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : SHADOW_PASS_DIRECTIONAL : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " SHADOW_PASS_DIRECTIONAL " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : SKY_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " SKY_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : TRANSPARENT_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " TRANSPARENT_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												case  BreadcrumbMarker : : UI_PASS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " UI_PASS " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												default : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													error_msg  + =  " UNKNOWN_BREADCRUMB( "  +  itos ( ( uint32_t ) phase )  +  ' ) ' ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
													break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( user_data  ! =  0 )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												error_msg  + =  "  | User data:  "  +  itos ( user_data ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											_err_print_error_asap ( error_msg ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											if  ( last_breadcrumb_offset  = =  0u )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												// Decrement last_breadcrumb_idx, wrapping underflow.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												last_breadcrumb_offset  =  BREADCRUMB_BUFFER_ENTRIES  *  sizeof ( uint32_t )  *  2u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											last_breadcrumb_offset  - =  sizeof ( uint32_t )  *  2u ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vmaUnmapMemory ( allocator ,  ( ( BufferInfo  * ) breadcrumb_buffer . id ) - > allocation . handle ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										breadcrumb_ptr  =  nullptr ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										_err_print_error ( FUNCTION_STR ,  __FILE__ ,  __LINE__ ,  " Couldn't map breadcrumb buffer. VkResult =  "  +  itos ( map_result ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									on_device_lost ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-06 02:17:04 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								inline  String  RenderingDeviceDriverVulkan : : get_vulkan_result ( VkResult  err )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( err  = =  VK_ERROR_OUT_OF_HOST_MEMORY )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  " VK_ERROR_OUT_OF_HOST_MEMORY " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( err  = =  VK_ERROR_OUT_OF_DEVICE_MEMORY )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  " VK_ERROR_OUT_OF_DEVICE_MEMORY " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( err  = =  VK_ERROR_DEVICE_LOST )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  " VK_ERROR_DEVICE_LOST " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( err  = =  VK_ERROR_SURFACE_LOST_KHR )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  " VK_ERROR_SURFACE_LOST_KHR " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									}  else  if  ( err  = =  VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  " VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  itos ( err ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** SUBMISSION ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/********************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : begin_segment ( uint32_t  p_frame_index ,  uint32_t  p_frames_drawn )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									// Per-frame segments are not required in Vulkan.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : end_segment ( )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Per-frame segments are not required in Vulkan.
 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**** MISC ****/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								/**************/  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								void  RenderingDeviceDriverVulkan : : set_object_name ( ObjectType  p_type ,  ID  p_driver_id ,  const  String  & p_name )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											if  ( tex_info - > allocation . handle )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												_set_object_name ( VK_OBJECT_TYPE_IMAGE ,  ( uint64_t ) tex_info - > vk_view_create_info . image ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_IMAGE_VIEW ,  ( uint64_t ) tex_info - > vk_view ,  p_name  +  "  View " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_SAMPLER :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_SAMPLER ,  p_driver_id . id ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_BUFFER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  BufferInfo  * buf_info  =  ( const  BufferInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_BUFFER ,  ( uint64_t ) buf_info - > vk_buffer ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											if  ( buf_info - > vk_view )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												_set_object_name ( VK_OBJECT_TYPE_BUFFER_VIEW ,  ( uint64_t ) buf_info - > vk_view ,  p_name  +  "  View " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_SHADER :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  ShaderInfo  * shader_info  =  ( const  ShaderInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( uint32_t  i  =  0 ;  i  <  shader_info - > vk_descriptor_set_layouts . size ( ) ;  i + + )  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
												_set_object_name ( VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT ,  ( uint64_t ) shader_info - > vk_descriptor_set_layouts [ i ] ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_PIPELINE_LAYOUT ,  ( uint64_t ) shader_info - > vk_pipeline_layout ,  p_name  +  "  Pipeline Layout " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_UNIFORM_SET :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  UniformSetInfo  * usi  =  ( const  UniformSetInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_DESCRIPTOR_SET ,  ( uint64_t ) usi - > vk_descriptor_set ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  OBJECT_TYPE_PIPELINE :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											_set_object_name ( VK_OBJECT_TYPE_PIPELINE ,  ( uint64_t ) p_driver_id . id ,  p_name ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										}  break ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											DEV_ASSERT ( false ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : get_resource_native_handle ( DriverResource  p_type ,  ID  p_driver_id )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_type )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_LOGICAL_DEVICE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) vk_device ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_PHYSICAL_DEVICE :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  ( uint64_t ) physical_device ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_TOPMOST_OBJECT :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  ( uint64_t ) context_driver - > instance_get ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_COMMAND_QUEUE :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											const  CommandQueue  * queue_info  =  ( const  CommandQueue  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) queue_families [ queue_info - > queue_family ] [ queue_info - > queue_index ] . queue ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_QUEUE_FAMILY :  { 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  uint32_t ( p_driver_id . id )  -  1 ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_TEXTURE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) tex_info - > vk_view_create_info . image ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_TEXTURE_VIEW :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) tex_info - > vk_view ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_TEXTURE_DATA_FORMAT :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											const  TextureInfo  * tex_info  =  ( const  TextureInfo  * ) p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) tex_info - > vk_view_create_info . format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_SAMPLER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_BUFFER : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_COMPUTE_PIPELINE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  DRIVER_RESOURCE_RENDER_PIPELINE :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  p_driver_id . id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  0 ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : get_total_memory_used ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									VmaTotalStatistics  stats  =  { } ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaCalculateStatistics ( allocator ,  & stats ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  stats . total . statistics . allocationBytes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : get_lazily_memory_used ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  vmaCalculateLazilyAllocatedBytes ( allocator ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : limit_get ( Limit  p_limit )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									const  VkPhysicalDeviceLimits  & limits  =  physical_device_properties . limits ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-19 14:29:45 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									uint64_t  safe_unbounded  =  ( ( uint64_t ) 1  < <  30 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									switch  ( p_limit )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_BOUND_UNIFORM_SETS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxBoundDescriptorSets ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxColorAttachments ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURES_PER_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDescriptorSetSampledImages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDescriptorSetSamplers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDescriptorSetStorageBuffers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDescriptorSetStorageImages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDescriptorSetUniformBuffers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_DRAW_INDEXED_INDEX : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxDrawIndexedIndexValue ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_FRAMEBUFFER_HEIGHT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxFramebufferHeight ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_FRAMEBUFFER_WIDTH : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxFramebufferWidth ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURE_ARRAY_LAYERS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxImageArrayLayers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURE_SIZE_1D : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxImageDimension1D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURE_SIZE_2D : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxImageDimension2D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURE_SIZE_3D : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxImageDimension3D ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURE_SIZE_CUBE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxImageDimensionCube ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_TEXTURES_PER_SHADER_STAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPerStageDescriptorSampledImages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPerStageDescriptorSamplers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPerStageDescriptorStorageBuffers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPerStageDescriptorStorageImages ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPerStageDescriptorUniformBuffers ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_PUSH_CONSTANT_SIZE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxPushConstantsSize ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_UNIFORM_BUFFER_SIZE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxUniformBufferRange ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxVertexInputAttributeOffset ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxVertexInputAttributes ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VERTEX_INPUT_BINDINGS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxVertexInputBindings ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxVertexInputBindingStride ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . minUniformBufferOffsetAlignment ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupCount [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupCount [ 1 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupCount [ 2 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupInvocations ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupSize [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupSize [ 1 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeWorkGroupSize [ 2 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-19 14:29:45 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxComputeSharedMemorySize ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VIEWPORT_DIMENSIONS_X : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxViewportDimensions [ 0 ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  LIMIT_MAX_VIEWPORT_DIMENSIONS_Y : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  limits . maxViewportDimensions [ 1 ] ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_SUBGROUP_SIZE : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  subgroup_capabilities . size ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_SUBGROUP_MIN_SIZE : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  subgroup_capabilities . min_size ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_SUBGROUP_MAX_SIZE : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  subgroup_capabilities . max_size ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_SUBGROUP_IN_SHADERS : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  subgroup_capabilities . supported_stages_flags_rd ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_SUBGROUP_OPERATIONS : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  subgroup_capabilities . supported_operations_flags_rd ( ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-02-12 16:12:46 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  LIMIT_MAX_SHADER_VARYINGS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// practice, that doesn't seem to be the case. The validation layers don't even complain.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  MIN ( limits . maxVertexOutputComponents  /  4 ,  limits . maxFragmentInputComponents  /  4 ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-01-19 14:29:45 -08:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										default :  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# ifdef DEV_ENABLED 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											WARN_PRINT ( " Returning maximum value for unknown limit  "  +  itos ( p_limit )  +  " . " ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  safe_unbounded ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								uint64_t  RenderingDeviceDriverVulkan : : api_trait_get ( ApiTrait  p_trait )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_trait )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  ( uint64_t ) MAX ( ( uint64_t ) 16 ,  physical_device_properties . limits . optimalBufferCopyOffsetAlignment ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
										case  API_TRAIT_SHADER_CHANGE_INVALIDATION : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  ( uint64_t ) SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										default : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  RenderingDeviceDriver : : api_trait_get ( p_trait ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : has_feature ( Features  p_feature )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									switch  ( p_feature )  { 
							 
						 
					
						
							
								
									
										
										
										
											2025-05-28 11:58:07 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SUPPORTS_HALF_FLOAT : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
											return  shader_capabilities . shader_float16_is_supported  & &  physical_device_features . shaderInt16  & &  storage_buffer_capabilities . storage_buffer_16_bit_access_is_supported ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										case  SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  true ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-12-05 18:32:45 +01:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SUPPORTS_BUFFER_DEVICE_ADDRESS : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  buffer_device_address_support ; 
							 
						 
					
						
							
								
									
										
										
										
											2025-06-27 09:59:21 +10:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SUPPORTS_IMAGE_ATOMIC_32_BIT : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)) 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											// MoltenVK has previously had issues with 32-bit atomics on images.
 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# else 
  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  true ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2025-07-24 09:22:44 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										case  SUPPORTS_VULKAN_MEMORY_MODEL : 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											return  vulkan_memory_model_support  & &  vulkan_memory_model_device_scope_support ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										default : 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
											return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const  RDD : : MultiviewCapabilities  & RenderingDeviceDriverVulkan : : get_multiview_capabilities ( )  {  
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									return  multiview_capabilities ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-11-22 11:55:06 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								const  RDD : : FragmentShadingRateCapabilities  & RenderingDeviceDriverVulkan : : get_fragment_shading_rate_capabilities ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  fsr_capabilities ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								const  RDD : : FragmentDensityMapCapabilities  & RenderingDeviceDriverVulkan : : get_fragment_density_map_capabilities ( )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  fdm_capabilities ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								String  RenderingDeviceDriverVulkan : : get_api_name ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  " Vulkan " ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								String  RenderingDeviceDriverVulkan : : get_api_version ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									uint32_t  api_version  =  physical_device_properties . apiVersion ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  vformat ( " %d.%d.%d " ,  VK_API_VERSION_MAJOR ( api_version ) ,  VK_API_VERSION_MINOR ( api_version ) ,  VK_API_VERSION_PATCH ( api_version ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								String  RenderingDeviceDriverVulkan : : get_pipeline_cache_uuid ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  pipeline_cache_id ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								const  RDD : : Capabilities  & RenderingDeviceDriverVulkan : : get_capabilities ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  device_capabilities ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2025-01-13 16:13:39 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								const  RenderingShaderContainerFormat  & RenderingDeviceDriverVulkan : : get_shader_container_format ( )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  shader_container_format ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-05-03 11:48:46 +03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								bool  RenderingDeviceDriverVulkan : : is_composite_alpha_supported ( CommandQueueID  p_queue )  const  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									if  ( has_comp_alpha . has ( ( uint64_t ) p_queue . id ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										return  has_comp_alpha [ ( uint64_t ) p_queue . id ] ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									return  false ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/******************/  
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								RenderingDeviceDriverVulkan : : RenderingDeviceDriverVulkan ( RenderingContextDriverVulkan  * p_context_driver )  {  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									DEV_ASSERT ( p_context_driver  ! =  nullptr ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									context_driver  =  p_context_driver ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									max_descriptor_sets_per_pool  =  GLOBAL_GET ( " rendering/rendering_device/vulkan/max_descriptors_per_pool " ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								RenderingDeviceDriverVulkan : : ~ RenderingDeviceDriverVulkan ( )  {  
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# if defined(DEBUG_ENABLED) || defined(DEV_ENABLED) 
  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									buffer_free ( breadcrumb_buffer ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2024-10-21 15:35:22 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								# endif 
  
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
									while  ( small_allocs_pools . size ( ) )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										HashMap < uint32_t ,  VmaPool > : : Iterator  E  =  small_allocs_pools . begin ( ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										vmaDestroyPool ( allocator ,  E - > value ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										small_allocs_pools . remove ( E ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									vmaDestroyAllocator ( allocator ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2024-12-12 12:47:08 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									// Destroy linearly allocated descriptor pools.
 
							 
						 
					
						
							
								
									
										
											 
										
											
												Improvements from TheForge (see description)
The work was performed by collaboration of TheForge and Google. I am
merely splitting it up into smaller PRs and cleaning it up.
This is the most "risky" PR so far because the previous ones have been
miscellaneous stuff aimed at either [improve
debugging](https://github.com/godotengine/godot/pull/90993) (e.g. device
lost), [improve Android
experience](https://github.com/godotengine/godot/pull/96439) (add Swappy
for better Frame Pacing + Pre-Transformed Swapchains for slightly better
performance), or harmless [ASTC
improvements](https://github.com/godotengine/godot/pull/96045) (better
performance by simply toggling a feature when available).
However this PR contains larger modifications aimed at improving
performance or reducing memory fragmentation. With greater
modifications, come greater risks of bugs or breakage.
Changes introduced by this PR:
TBDR GPUs (e.g. most of Android + iOS + M1 Apple) support rendering to
Render Targets that are not backed by actual GPU memory (everything
stays in cache). This works as long as load action isn't `LOAD`, and
store action must be `DONT_CARE`. This saves VRAM (it also makes
painfully obvious when a mistake introduces a performance regression).
Of particular usefulness is when doing MSAA and keeping the raw MSAA
content is not necessary.
Some GPUs get faster when the sampler settings are hard-coded into the
GLSL shaders (instead of being dynamically bound at runtime). This
required changes to the GLSL shaders, PSO creation routines, Descriptor
creation routines, and Descriptor binding routines.
 - `bool immutable_samplers_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Immutable samplers requires that the samplers stay... immutable, hence
this boolean is useful if the promise gets broken. We might want to turn
this into a `GLOBAL_DEF` setting.
Instead of creating dozen/hundreds/thousands of `VkDescriptorSet` every
frame that need to be freed individually when they are no longer needed,
they all get freed at once by resetting the whole pool. Once the whole
pool is no longer in use by the GPU, it gets reset and its memory
recycled. Descriptor sets that are created to be kept around for longer
or forever (i.e. not created and freed within the same frame) **must
not** use linear pools. There may be more than one pool per frame. How
many pools per frame Godot ends up with depends on its capacity, and
that is controlled by
`rendering/rendering_device/vulkan/max_descriptors_per_pool`.
- **Possible improvement for later:** It should be possible for Godot
to adapt to how many descriptors per pool are needed on a per-key basis
(i.e. grow their capacity like `std::vector` does) after rendering a few
frames; which would be better than the current solution of having a
single global value for all pools (`max_descriptors_per_pool`) that the
user needs to tweak.
 - `bool linear_descriptor_pools_enabled = true`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
Setting it to false is required when workarounding driver bugs (e.g.
Adreno 730).
A ridiculous optimization. Ridiculous because the original code
should've done this in the first place. Previously Godot was doing the
following:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2. This
resets the cmd buffer because Godot requests the
`VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT` flag.
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, and repeat from step 3.
The problem here is that step 3 resets each command buffer individually.
Initially Godot used to have 1 cmd buffer per pool, thus the impact is
very low.
But not anymore (specially with Adreno workarounds to force splitting
compute dispatches into a new cmd buffer, more on this later). However
Godot keeps around a very low amount of command buffers per frame.
The recommended method is to reset the whole pool, to reset all cmd
buffers at once. Hence the new steps would be:
  1. Create a command buffer **pool**. One per frame.
  2. Create multiple command buffers from the pool in point 1.
3. Call `vkBeginCommandBuffer` on the cmd buffer in point 2, which is
already reset/empty (see step 6).
  4. Add commands to the cmd buffers from point 2.
  5. Submit those commands.
6. On frame N + 2, recycle the buffer pool and cmd buffers from pt 1 &
2, call `vkResetCommandPool` and repeat from step 3.
**Possible issues:** @dariosamo added `transfer_worker` which creates a
command buffer pool:
```cpp
transfer_worker->command_pool =
driver->command_pool_create(transfer_queue_family,
RDD::COMMAND_BUFFER_TYPE_PRIMARY);
```
As expected, validation was complaining that command buffers were being
reused without being reset (that's good, we now know Validation Layers
will warn us of wrong use).
I fixed it by adding:
```cpp
void RenderingDevice::_wait_for_transfer_worker(TransferWorker
*p_transfer_worker) {
	driver->fence_wait(p_transfer_worker->command_fence);
	driver->command_pool_reset(p_transfer_worker->command_pool); //
! New line !
```
**Secondary cmd buffers are subject to the same issue but I didn't alter
them. I talked this with Dario and he is aware of this.**
Secondary cmd buffers are currently disabled due to other issues (it's
disabled on master).
 - `bool RenderingDeviceCommons::command_pool_reset_enabled`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Adds `command_bind_render_uniform_sets` and
`add_draw_list_bind_uniform_sets` (+ compute variants).
It performs the same as `add_draw_list_bind_uniform_set` (notice
singular vs plural), but on multiple consecutive uniform sets, thus
reducing graph and draw call overhead.
 - `bool descriptor_set_batching = true;`
Setting it to false enforces the old behavior. Useful for debugging bugs
and regressions.
There's no other reason for this boolean. Possibly once it becomes well
tested, the boolean could be removed entirely.
Godot currently does the following:
 1. Fill the entire cmd buffer with commands.
 2. `submit()`
    - Wait with a semaphore for the swapchain.
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 3. `present()`
The optimization opportunity here is that 95% of Godot's rendering is
done offscreen.
Then a fullscreen pass copies everything to the swapchain. Godot doesn't
practically render directly to the swapchain.
The problem with this is that the GPU has to wait for the swapchain to
be released **to start anything**, when we could start *much earlier*.
Only the final blit pass must wait for the swapchain.
TheForge changed it to the following (more complicated, I'm simplifying
the idea):
 1. Fill the entire cmd buffer with commands.
 2. In `screen_prepare_for_drawing` do `submit()`
    - There are no semaphore waits for the swapchain.
    - Trigger a semaphore to indicate when we're done.
3. Fill a new cmd buffer that only does the final blit to the
swapchain.
 4. `submit()`
    - Wait with a semaphore for the submit() from step 2.
- Wait with a semaphore for the swapchain (so the swapchain can
submit).
- Trigger a semaphore to indicate when we're done (so the swapchain
can submit).
 5. `present()`
Dario discovered this problem independently while working on a different
platform.
**However TheForge's solution had to be rewritten from scratch:** The
complexity to achieve the solution was high and quite difficult to
maintain with the way Godot works now (after Übershaders PR).
But on the other hand, re-implementing the solution became much simpler
because Dario already had to do something similar: To fix an Adreno 730
driver bug, he had to implement splitting command buffers. **This is
exactly what we need!**. Thus it was re-written using this existing
functionality for a new purpose.
To achieve this, I added a new argument, `bool p_split_cmd_buffer`, to
`RenderingDeviceGraph::add_draw_list_begin`, which is only set to true
by `RenderingDevice::draw_list_begin_for_screen`.
The graph will split the draw list into its own command buffer.
 - `bool split_swapchain_into_its_own_cmd_buffer = true;`
Setting it to false enforces the old behavior. This might be necessary
for consoles which follow an alternate solution to the same problem.
If not, then we should consider removing it.
PR #90993 added `shader_destroy_modules()` but it was not actually in
use.
This PR adds several places where `shader_destroy_modules()` is called
after initialization to free up memory of SPIR-V structures that are no
longer needed.
											 
										 
										
											2024-11-14 13:03:14 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									for  ( KeyValue < int ,  DescriptorSetPools >  & pool_map  :  linear_descriptor_set_pools )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										for  ( KeyValue < DescriptorSetPoolKey ,  HashMap < VkDescriptorPool ,  uint32_t > >  pools  :  pool_map . value )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											for  ( KeyValue < VkDescriptorPool ,  uint32_t >  descriptor_pool  :  pools . value )  { 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
												vkDestroyDescriptorPool ( vk_device ,  descriptor_pool . key ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DESCRIPTOR_POOL ) ) ; 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
											} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
										} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									if  ( vk_device  ! =  VK_NULL_HANDLE )  { 
							 
						 
					
						
							
								
									
										
										
										
											2024-06-30 19:30:54 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
										vkDestroyDevice ( vk_device ,  VKC : : get_allocation_callbacks ( VK_OBJECT_TYPE_DEVICE ) ) ; 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 14:57:56 -03:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
									} 
							 
						 
					
						
							
								
									
										
										
										
											2023-12-19 12:48:02 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								}