Refactor descriptor heaps in D3D12 driver.

This commit is contained in:
Skyth 2025-11-24 18:42:11 +03:00
parent 9f5309a2a4
commit a8d3ecec13
6 changed files with 640 additions and 1138 deletions

View file

@ -1813,12 +1813,10 @@ ProjectSettings::ProjectSettings() {
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/rendering_device/pipeline_cache/save_chunk_size_mb", PROPERTY_HINT_RANGE, "0.000001,64.0,0.001,or_greater"), 3.0); GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/rendering_device/pipeline_cache/save_chunk_size_mb", PROPERTY_HINT_RANGE, "0.000001,64.0,0.001,or_greater"), 3.0);
GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/rendering_device/vulkan/max_descriptors_per_pool", PROPERTY_HINT_RANGE, "1,256,1,or_greater"), 64); GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/rendering_device/vulkan/max_descriptors_per_pool", PROPERTY_HINT_RANGE, "1,256,1,or_greater"), 64);
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", 16384); GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors", 65536);
custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", PROPERTY_HINT_RANGE, "512,262144"); custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors", PROPERTY_HINT_RANGE, "512,1000000");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", 1024); GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors", 1024);
custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", PROPERTY_HINT_RANGE, "256,2048"); custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors", PROPERTY_HINT_RANGE, "256,2048");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", 512);
custom_prop_info["rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", PROPERTY_HINT_RANGE, "32,4096");
// The default value must match the minor part of the Agility SDK version // The default value must match the minor part of the Agility SDK version
// installed by the scripts provided in the repository // installed by the scripts provided in the repository

View file

@ -3256,16 +3256,12 @@
<member name="rendering/rendering_device/d3d12/agility_sdk_version" type="int" setter="" getter="" default="613"> <member name="rendering/rendering_device/d3d12/agility_sdk_version" type="int" setter="" getter="" default="613">
Version code of the [url=https://devblogs.microsoft.com/directx/directx12agility/]Direct3D 12 Agility SDK[/url] to use ([code]D3D12SDKVersion[/code]). This must match the [i]minor[/i] version that is installed next to the editor binary and in the export templates directory for the current editor version. For example, if you have [code]1.613.3[/code] installed, you need to input [code]613[/code] here. Version code of the [url=https://devblogs.microsoft.com/directx/directx12agility/]Direct3D 12 Agility SDK[/url] to use ([code]D3D12SDKVersion[/code]). This must match the [i]minor[/i] version that is installed next to the editor binary and in the export templates directory for the current editor version. For example, if you have [code]1.613.3[/code] installed, you need to input [code]613[/code] here.
</member> </member>
<member name="rendering/rendering_device/d3d12/max_misc_descriptors_per_frame" type="int" setter="" getter="" default="512"> <member name="rendering/rendering_device/d3d12/max_resource_descriptors" type="int" setter="" getter="" default="65536">
The number of entries in the miscellaneous descriptors heap the Direct3D 12 rendering driver uses each frame, used for various operations like clearing a texture. The number of entries in the resource descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised. Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member> </member>
<member name="rendering/rendering_device/d3d12/max_resource_descriptors_per_frame" type="int" setter="" getter="" default="16384"> <member name="rendering/rendering_device/d3d12/max_sampler_descriptors" type="int" setter="" getter="" default="1024">
The number of entries in the resource descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations. The number of entries in the sampler descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member>
<member name="rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame" type="int" setter="" getter="" default="1024">
The number of entries in the sampler descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised. Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member> </member>
<member name="rendering/rendering_device/driver" type="String" setter="" getter="" default="&quot;vulkan&quot;"> <member name="rendering/rendering_device/driver" type="String" setter="" getter="" default="&quot;vulkan&quot;">

File diff suppressed because it is too large Load diff

View file

@ -33,6 +33,7 @@
#include "core/templates/a_hash_map.h" #include "core/templates/a_hash_map.h"
#include "core/templates/hash_map.h" #include "core/templates/hash_map.h"
#include "core/templates/paged_allocator.h" #include "core/templates/paged_allocator.h"
#include "core/templates/rb_map.h"
#include "core/templates/self_list.h" #include "core/templates/self_list.h"
#include "rendering_shader_container_d3d12.h" #include "rendering_shader_container_d3d12.h"
#include "servers/rendering/rendering_device_driver.h" #include "servers/rendering/rendering_device_driver.h"
@ -71,6 +72,7 @@ class RenderingContextDriverD3D12;
namespace D3D12MA { namespace D3D12MA {
class Allocation; class Allocation;
class Allocator; class Allocator;
class VirtualBlock;
}; // namespace D3D12MA }; // namespace D3D12MA
struct IDXGIAdapter; struct IDXGIAdapter;
@ -143,106 +145,51 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
String pipeline_cache_id; String pipeline_cache_id;
D3D12_HEAP_TYPE dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD; D3D12_HEAP_TYPE dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD;
class CPUDescriptorsHeapPool; struct DescriptorHeap {
struct Allocation {
uint64_t virtual_alloc_handle = {}; // This is the handle value in "D3D12MA::VirtualAllocation".
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
};
struct CPUDescriptorsHeapHandle {
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap; Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
CPUDescriptorsHeapPool *pool = nullptr; D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
uint32_t offset = 0; D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
uint32_t base_offset = 0; uint32_t increment_size = 0;
uint32_t count = 0;
uint32_t nonce = 0;
uint32_t global_offset() const { return offset + base_offset; } Microsoft::WRL::ComPtr<D3D12MA::VirtualBlock> virtual_block;
Error initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_num_descriptors, bool p_shader_visible);
Error allocate(uint32_t p_descriptor_count, Allocation &r_allocation);
void free(const Allocation &p_allocation);
}; };
class CPUDescriptorsHeapPool { // Some IHVs do not allow creating descriptor heaps beyond a certain limit, so they must be pooled.
Mutex mutex; struct CPUDescriptorHeapPool {
struct Allocation : DescriptorHeap::Allocation {
struct FreeBlockInfo { uint32_t heap_index = UINT_MAX;
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
uint32_t global_offset = 0; // Global offset in an address space shared by all the heaps.
uint32_t base_offset = 0; // The offset inside the space of this heap.
uint32_t size = 0;
uint32_t nonce = 0;
}; };
struct FreeBlockSortIndexSort { BinaryMutex mutex;
_FORCE_INLINE_ bool operator()(const uint32_t &p_l, const uint32_t &p_r) const { LocalVector<DescriptorHeap> heaps;
return p_l > p_r;
} D3D12_DESCRIPTOR_HEAP_TYPE type = {};
uint32_t increment_size = 0;
void initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type);
Error allocate(uint32_t p_descriptor_count, ID3D12Device *p_device, Allocation &r_allocation);
void free(const Allocation &p_allocation);
}; };
typedef RBMap<uint32_t, FreeBlockInfo> OffsetTableType; DescriptorHeap resource_descriptor_heap;
typedef RBMap<uint32_t, List<uint32_t>, FreeBlockSortIndexSort> SizeTableType; DescriptorHeap sampler_descriptor_heap;
CPUDescriptorHeapPool resource_descriptor_heap_pool;
CPUDescriptorHeapPool rtv_descriptor_heap_pool;
CPUDescriptorHeapPool dsv_descriptor_heap_pool;
OffsetTableType free_blocks_by_offset; CPUDescriptorHeapPool::Allocation null_rtv_alloc;
SizeTableType free_blocks_by_size;
uint32_t current_offset = 0;
uint32_t current_nonce = 0;
void add_to_size_map(const FreeBlockInfo &p_block);
void remove_from_size_map(const FreeBlockInfo &p_block);
void verify();
public:
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
Error release(const CPUDescriptorsHeapHandle &p_result);
};
class CPUDescriptorsHeapPools {
CPUDescriptorsHeapPool pools[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
public:
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
};
struct CPUDescriptorsHeapWalker {
uint32_t handle_size = 0;
uint32_t handle_count = 0;
D3D12_CPU_DESCRIPTOR_HANDLE first_cpu_handle = {};
uint32_t handle_index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE get_curr_cpu_handle();
_FORCE_INLINE_ void rewind() { handle_index = 0; }
void advance(uint32_t p_count = 1);
uint32_t get_current_handle_index() const { return handle_index; }
uint32_t get_free_handles() { return handle_count - handle_index; }
bool is_at_eof() { return handle_index == handle_count; }
};
struct GPUDescriptorsHeapWalker : CPUDescriptorsHeapWalker {
D3D12_GPU_DESCRIPTOR_HANDLE first_gpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE get_curr_gpu_handle();
};
class CPUDescriptorsHeap {
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
CPUDescriptorsHeapHandle handle;
uint32_t handle_size = 0;
public:
CPUDescriptorsHeap() = default;
Error allocate(RenderingDeviceDriverD3D12 *p_driver, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
~CPUDescriptorsHeap();
CPUDescriptorsHeapWalker make_walker() const;
};
class GPUDescriptorsHeap {
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
uint32_t handle_size = 0;
public:
Error allocate(RenderingDeviceDriverD3D12 *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
ID3D12DescriptorHeap *get_heap() const { return heap.Get(); }
GPUDescriptorsHeapWalker make_walker() const;
};
CPUDescriptorsHeapPools cpu_descriptor_pool;
struct { struct {
Microsoft::WRL::ComPtr<ID3D12CommandSignature> draw; Microsoft::WRL::ComPtr<ID3D12CommandSignature> draw;
@ -317,10 +264,10 @@ private:
/*****************/ /*****************/
struct BufferInfo : public ResourceInfo { struct BufferInfo : public ResourceInfo {
D3D12_GPU_VIRTUAL_ADDRESS gpu_virtual_address = {};
DataFormat texel_format = DATA_FORMAT_MAX; DataFormat texel_format = DATA_FORMAT_MAX;
uint64_t size = 0; uint64_t size = 0;
struct { struct {
bool usable_as_uav : 1;
bool is_dynamic : 1; // Only used for tracking (e.g. Vulkan needs these checks). bool is_dynamic : 1; // Only used for tracking (e.g. Vulkan needs these checks).
} flags = {}; } flags = {};
@ -407,6 +354,13 @@ public:
private: private:
LocalVector<D3D12_SAMPLER_DESC> samplers; LocalVector<D3D12_SAMPLER_DESC> samplers;
struct SamplerDescriptorHeapAllocation : DescriptorHeap::Allocation {
uint32_t key = 0;
uint32_t use_count = 1;
};
RBMap<uint32_t, SamplerDescriptorHeapAllocation> sampler_descriptor_heap_allocations;
public: public:
virtual SamplerID sampler_create(const SamplerState &p_state) final override; virtual SamplerID sampler_create(const SamplerState &p_state) final override;
virtual void sampler_free(SamplerID p_sampler) final override; virtual void sampler_free(SamplerID p_sampler) final override;
@ -551,6 +505,9 @@ private:
LocalVector<D3D12_RESOURCE_BARRIER> res_barriers; LocalVector<D3D12_RESOURCE_BARRIER> res_barriers;
uint32_t res_barriers_count = 0; uint32_t res_barriers_count = 0;
uint32_t res_barriers_batch = 0; uint32_t res_barriers_batch = 0;
CPUDescriptorHeapPool::Allocation uav_alloc;
CPUDescriptorHeapPool::Allocation rtv_alloc;
}; };
public: public:
@ -596,9 +553,10 @@ private:
struct FramebufferInfo { struct FramebufferInfo {
bool is_screen = false; bool is_screen = false;
Size2i size; Size2i size;
TightLocalVector<uint32_t> attachments_handle_inds; // RTV heap index for color; DSV heap index for DSV. TightLocalVector<uint32_t> attachments_handle_inds; // RTV heap index for color; DSV heap index for DSV.
CPUDescriptorsHeap rtv_heap; CPUDescriptorHeapPool::Allocation rtv_alloc;
CPUDescriptorsHeap dsv_heap; // Used only for depth-stencil attachments. CPUDescriptorHeapPool::Allocation dsv_alloc; // Used only for depth-stencil attachments.
TightLocalVector<TextureID> attachments; // Color and depth-stencil. Used if not screen. TightLocalVector<TextureID> attachments; // Color and depth-stencil. Used if not screen.
TextureID vrs_attachment; TextureID vrs_attachment;
@ -645,25 +603,18 @@ private:
ResourceClass res_class = RES_CLASS_INVALID; ResourceClass res_class = RES_CLASS_INVALID;
UniformType type = UNIFORM_TYPE_MAX; UniformType type = UNIFORM_TYPE_MAX;
uint32_t length = UINT32_MAX; uint32_t length = UINT32_MAX;
#ifdef DEV_ENABLED
bool writable = false; bool writable = false;
#endif uint32_t resource_descriptor_offset = UINT32_MAX;
struct RootSignatureLocation { uint32_t sampler_descriptor_offset = UINT32_MAX;
uint32_t root_param_idx = UINT32_MAX; uint32_t root_param_idx = UINT32_MAX;
uint32_t range_idx = UINT32_MAX;
};
struct {
RootSignatureLocation resource;
RootSignatureLocation sampler;
} root_sig_locations;
}; };
struct UniformSet { struct UniformSet {
TightLocalVector<UniformBindingInfo> bindings; TightLocalVector<UniformBindingInfo> bindings;
struct { uint32_t resource_root_param_idx = UINT32_MAX;
uint32_t resources = 0; uint32_t resource_descriptor_count = 0;
uint32_t samplers = 0; uint32_t sampler_root_param_idx = UINT32_MAX;
} num_root_params; uint32_t sampler_descriptor_count = 0;
}; };
TightLocalVector<UniformSet> sets; TightLocalVector<UniformSet> sets;
@ -701,16 +652,16 @@ public:
/*********************/ /*********************/
private: private:
struct RootDescriptorTable { struct UniformSetInfo {
uint32_t root_param_idx = UINT32_MAX; DescriptorHeap::Allocation resource_descriptor_heap_alloc;
D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle = {}; SamplerDescriptorHeapAllocation *sampler_descriptor_heap_alloc = nullptr;
struct DynamicBuffer {
BufferDynamicInfo const *info = nullptr;
uint32_t binding = UINT_MAX;
}; };
struct UniformSetInfo { TightLocalVector<DynamicBuffer> dynamic_buffers;
struct {
CPUDescriptorsHeap resources;
CPUDescriptorsHeap samplers;
} desc_heaps;
struct StateRequirement { struct StateRequirement {
ResourceInfo *resource = nullptr; ResourceInfo *resource = nullptr;
@ -718,29 +669,8 @@ private:
D3D12_RESOURCE_STATES states = {}; D3D12_RESOURCE_STATES states = {};
uint64_t shader_uniform_idx_mask = 0; uint64_t shader_uniform_idx_mask = 0;
}; };
TightLocalVector<StateRequirement> resource_states; TightLocalVector<StateRequirement> resource_states;
struct RecentBind {
uint64_t segment_serial = 0;
uint32_t dynamic_state_mask = 0;
uint32_t root_signature_crc = 0;
struct {
TightLocalVector<RootDescriptorTable> resources;
TightLocalVector<RootDescriptorTable> samplers;
} root_tables;
int uses = 0;
} recent_binds[4]; // A better amount may be empirically found.
TightLocalVector<BufferDynamicInfo const *, uint32_t> dynamic_buffers;
#ifdef DEV_ENABLED
// Filthy, but useful for dev.
struct ResourceDescInfo {
D3D12_DESCRIPTOR_RANGE_TYPE type;
D3D12_SRV_DIMENSION srv_dimension;
};
TightLocalVector<ResourceDescInfo> resources_desc_info;
#endif
}; };
public: public:
@ -754,7 +684,7 @@ public:
private: private:
void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer); void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer);
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets, bool p_for_compute); DescriptorHeap::Allocation _command_allocate_per_frame_descriptor();
public: public:
/******************/ /******************/
@ -942,35 +872,12 @@ public:
/********************/ /********************/
private: private:
struct FrameInfo { struct FrameInfo {
struct { LocalVector<DescriptorHeap::Allocation> descriptor_allocations;
GPUDescriptorsHeap resources; uint32_t descriptor_allocation_count = 0;
GPUDescriptorsHeap samplers;
CPUDescriptorsHeap aux;
CPUDescriptorsHeap rtv;
} desc_heaps;
struct {
GPUDescriptorsHeapWalker resources;
GPUDescriptorsHeapWalker samplers;
CPUDescriptorsHeapWalker aux;
CPUDescriptorsHeapWalker rtv;
} desc_heap_walkers;
struct {
bool resources = false;
bool samplers = false;
bool aux = false;
bool rtv = false;
} desc_heaps_exhausted_reported;
CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]].
uint32_t segment_serial = 0;
#ifdef DEV_ENABLED
uint32_t uniform_set_reused = 0;
#endif
}; };
TightLocalVector<FrameInfo> frames; TightLocalVector<FrameInfo> frames;
uint32_t frame_idx = 0; uint32_t frame_idx = 0;
uint32_t frames_drawn = 0; uint32_t frames_drawn = 0;
uint32_t segment_serial = 0;
bool segment_begun = false; bool segment_begun = false;
HashMap<uint64_t, bool> has_comp_alpha; HashMap<uint64_t, bool> has_comp_alpha;

View file

@ -194,7 +194,11 @@ uint32_t RenderingShaderContainerD3D12::_format_version() const {
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) { uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes); reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes);
return sizeof(ReflectionDataD3D12); reflection_binding_set_data_d3d12.resize(reflection_data.set_count);
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
reflection_binding_set_data_d3d12.ptrw()[i] = *(const ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12)));
}
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
} }
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) { uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
@ -228,9 +232,12 @@ uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const { uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
if (p_bytes != nullptr) { if (p_bytes != nullptr) {
*(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12; *(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12;
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
*(ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12))) = reflection_binding_set_data_d3d12[i];
}
} }
return sizeof(ReflectionDataD3D12); return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
} }
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const { uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
@ -269,14 +276,10 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER; dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
dxil_runtime_conf.zero_based_vertex_instance_id = true; dxil_runtime_conf.zero_based_vertex_instance_id = true;
dxil_runtime_conf.zero_based_compute_workgroup_id = true; dxil_runtime_conf.zero_based_compute_workgroup_id = true;
dxil_runtime_conf.declared_read_only_images_as_srvs = true;
// Making this explicit to let maintainers know that in practice this didn't improve performance, // Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
// probably because data generated by one shader and consumed by another one forces the resource // It also makes it easier to implement descriptor heaps and enhanced barriers.
// to transition from UAV to SRV, and back, instead of being an UAV all the time. dxil_runtime_conf.declared_read_only_images_as_srvs = false;
// In case someone wants to try, care must be taken so in case of incompatible bindings across stages
// happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only
// uses an allegedly writable resource only for reading but the next stage doesn't.
dxil_runtime_conf.inferred_read_only_images_as_srvs = false; dxil_runtime_conf.inferred_read_only_images_as_srvs = false;
// Translate SPIR-V to NIR. // Translate SPIR-V to NIR.
@ -482,7 +485,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
struct TraceableDescriptorTable { struct TraceableDescriptorTable {
uint32_t stages_mask = {}; uint32_t stages_mask = {};
Vector<D3D12_DESCRIPTOR_RANGE1> ranges; Vector<D3D12_DESCRIPTOR_RANGE1> ranges;
Vector<RootSignatureLocation *> root_signature_locations; uint32_t set = UINT_MAX;
}; };
uint32_t binding_start = 0; uint32_t binding_start = 0;
@ -495,31 +498,35 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
for (uint32_t j = 0; j < uniform_count; j++) { for (uint32_t j = 0; j < uniform_count; j++) {
const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j]; const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j];
ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j]; ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j];
bool really_used = uniform_d3d12.dxil_stages != 0;
#ifdef DEV_ENABLED #ifdef DEV_ENABLED
bool really_used = uniform_d3d12.dxil_stages != 0;
bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler; bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler;
DEV_ASSERT(anybody_home == really_used); DEV_ASSERT(anybody_home == really_used);
#endif #endif
if (!really_used) {
continue; // Existed in SPIR-V; went away in DXIL.
}
auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type, auto insert_range = [i](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
uint32_t p_num_descriptors, uint32_t p_num_descriptors,
uint32_t p_dxil_register, uint32_t p_dxil_register,
uint32_t p_dxil_stages_mask, uint32_t p_dxil_stages_mask,
RootSignatureLocation *p_root_sig_locations, uint32_t &r_descriptor_offset,
Vector<TraceableDescriptorTable> &r_tables, uint32_t &r_descriptor_count,
bool &r_first_in_set) { bool &r_first_in_set,
Vector<TraceableDescriptorTable> &r_tables) {
r_descriptor_offset = r_descriptor_count;
if (r_first_in_set) { if (r_first_in_set) {
r_tables.resize(r_tables.size() + 1); r_tables.resize(r_tables.size() + 1);
r_first_in_set = false; r_first_in_set = false;
} }
TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1]; TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1];
DEV_ASSERT(table.set == UINT_MAX || table.set == i);
table.stages_mask |= p_dxil_stages_mask; table.stages_mask |= p_dxil_stages_mask;
table.set = i;
CD3DX12_DESCRIPTOR_RANGE1 range; CD3DX12_DESCRIPTOR_RANGE1 range;
// Due to the aliasing hack for SRV-UAV of different families, // Due to the aliasing hack for SRV-UAV of different families,
// we can be causing an unintended change of data (sometimes the validation layers catch it). // we can be causing an unintended change of data (sometimes the validation layers catch it).
D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE; D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
@ -528,79 +535,130 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
} else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) { } else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) {
flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE; flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE;
} }
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags);
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags, r_descriptor_offset);
r_descriptor_count += p_num_descriptors;
table.ranges.push_back(range); table.ranges.push_back(range);
table.root_signature_locations.push_back(p_root_sig_locations);
}; };
D3D12_DESCRIPTOR_RANGE_TYPE range_type = (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX;
bool has_sampler = false;
uint32_t num_descriptors = 1; uint32_t num_descriptors = 1;
D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {};
switch ((ResourceClass)(uniform_d3d12.resource_class)) { switch (uniform.type) {
case RES_CLASS_INVALID: { case RDC::UNIFORM_TYPE_SAMPLER: {
has_sampler = true;
num_descriptors = uniform.length; num_descriptors = uniform.length;
DEV_ASSERT(uniform_d3d12.has_sampler);
} break; } break;
case RES_CLASS_CBV: { case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV; range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
DEV_ASSERT(!uniform_d3d12.has_sampler); has_sampler = true;
num_descriptors = MAX(1u, uniform.length);
} break; } break;
case RES_CLASS_SRV: { case RDC::UNIFORM_TYPE_TEXTURE: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV; range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size. num_descriptors = MAX(1u, uniform.length);
} break; } break;
case RES_CLASS_UAV: { case RDC::UNIFORM_TYPE_IMAGE: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV; range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size. num_descriptors = MAX(1u, uniform.length);
DEV_ASSERT(!uniform_d3d12.has_sampler);
} break; } break;
case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_IMAGE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
default: {
DEV_ASSERT(false);
}
} }
uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER; uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER;
if (uniform_d3d12.resource_class != RES_CLASS_INVALID) { if (range_type != (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX) {
insert_range( // Dynamic buffers are converted to root descriptors to prevent copying descriptors during command recording.
resource_range_type, // Out of bounds accesses are not a concern because that's already undefined behavior on Vulkan.
num_descriptors, if (uniform.type == RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC || uniform.type == RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC) {
dxil_register, CD3DX12_ROOT_PARAMETER1 root_param = {};
uniform_d3d12.dxil_stages, D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(uniform.stages);
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE],
resource_tables_maps, switch (range_type) {
first_resource_in_set); case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: {
root_param.InitAsConstantBufferView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: {
root_param.InitAsShaderResourceView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: {
root_param.InitAsUnorderedAccessView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
default: {
DEV_ASSERT(false && "Unrecognized range type.");
} break;
} }
if (uniform_d3d12.has_sampler) { uniform_d3d12.root_param_idx = root_params.size();
root_params.push_back(root_param);
} else {
insert_range(
range_type,
num_descriptors,
dxil_register,
uniform.stages,
uniform_d3d12.resource_descriptor_offset,
reflection_binding_set_data_d3d12.ptrw()[i].resource_descriptor_count,
first_resource_in_set,
resource_tables_maps);
}
}
if (has_sampler) {
insert_range( insert_range(
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER, D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
num_descriptors, num_descriptors,
dxil_register, dxil_register,
uniform_d3d12.dxil_stages, uniform.stages,
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER], uniform_d3d12.sampler_descriptor_offset,
sampler_tables_maps, reflection_binding_set_data_d3d12.ptrw()[i].sampler_descriptor_count,
first_sampler_in_set); first_sampler_in_set,
sampler_tables_maps);
} }
} }
binding_start += uniform_count; binding_start += uniform_count;
} }
auto make_descriptor_tables = [&root_params](const Vector<TraceableDescriptorTable> &p_tables) { for (const TraceableDescriptorTable &table : resource_tables_maps) {
for (const TraceableDescriptorTable &table : p_tables) { CD3DX12_ROOT_PARAMETER1 root_table = {};
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask); root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size()); reflection_binding_set_data_d3d12.ptrw()[table.set].resource_root_param_idx = root_params.size();
for (int i = 0; i < table.ranges.size(); i++) {
// By now we know very well which root signature location corresponds to the pointed uniform.
table.root_signature_locations[i]->root_param_index = root_params.size();
table.root_signature_locations[i]->range_index = i;
}
CD3DX12_ROOT_PARAMETER1 root_table;
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility);
root_params.push_back(root_table); root_params.push_back(root_table);
} }
};
make_descriptor_tables(resource_tables_maps); for (const TraceableDescriptorTable &table : sampler_tables_maps) {
make_descriptor_tables(sampler_tables_maps); CD3DX12_ROOT_PARAMETER1 root_table = {};
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
reflection_binding_set_data_d3d12.ptrw()[table.set].sampler_root_param_idx = root_params.size();
root_params.push_back(root_table);
}
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {}; CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {};
D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags = D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
@ -755,6 +813,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi
#endif #endif
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) { void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
reflection_binding_set_data_d3d12.resize(reflection_binding_set_uniforms_count.size());
reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size()); reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
reflection_specialization_data_d3d12.resize(reflection_specialization_data.size()); reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
@ -841,6 +900,7 @@ RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D
reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask; reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask;
reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages; reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages;
reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx; reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx;
reflection.reflection_binding_sets_d3d12 = reflection_binding_set_data_d3d12;
reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12; reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12;
reflection.root_signature_bytes = root_signature_bytes; reflection.root_signature_bytes = root_signature_bytes;
reflection.root_signature_crc = root_signature_crc; reflection.root_signature_crc = root_signature_crc;

View file

@ -86,16 +86,20 @@ public:
2, // SHADER_STAGE_COMPUTE 2, // SHADER_STAGE_COMPUTE
}; };
struct RootSignatureLocation { struct ReflectionBindingSetDataD3D12 {
uint32_t root_param_index = UINT32_MAX; uint32_t resource_root_param_idx = UINT32_MAX;
uint32_t range_index = UINT32_MAX; uint32_t resource_descriptor_count = 0;
uint32_t sampler_root_param_idx = UINT32_MAX;
uint32_t sampler_descriptor_count = 0;
}; };
struct ReflectionBindingDataD3D12 { struct ReflectionBindingDataD3D12 {
uint32_t resource_class = 0; uint32_t resource_class = 0;
uint32_t has_sampler = 0; uint32_t has_sampler = 0;
uint32_t dxil_stages = 0; uint32_t dxil_stages = 0;
RootSignatureLocation root_signature_locations[2]; uint32_t resource_descriptor_offset = UINT32_MAX;
uint32_t sampler_descriptor_offset = UINT32_MAX;
uint32_t root_param_idx = UINT32_MAX; // Root descriptor only.
}; };
struct ReflectionSpecializationDataD3D12 { struct ReflectionSpecializationDataD3D12 {
@ -116,6 +120,7 @@ protected:
void *lib_d3d12 = nullptr; void *lib_d3d12 = nullptr;
ReflectionDataD3D12 reflection_data_d3d12; ReflectionDataD3D12 reflection_data_d3d12;
Vector<ReflectionBindingSetDataD3D12> reflection_binding_set_data_d3d12;
Vector<ReflectionBindingDataD3D12> reflection_binding_set_uniforms_data_d3d12; Vector<ReflectionBindingDataD3D12> reflection_binding_set_uniforms_data_d3d12;
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12; Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
Vector<uint8_t> root_signature_bytes; Vector<uint8_t> root_signature_bytes;
@ -154,6 +159,7 @@ public:
uint32_t spirv_specialization_constants_ids_mask = 0; uint32_t spirv_specialization_constants_ids_mask = 0;
uint32_t dxil_push_constant_stages = 0; uint32_t dxil_push_constant_stages = 0;
uint32_t nir_runtime_data_root_param_idx = 0; uint32_t nir_runtime_data_root_param_idx = 0;
Vector<ReflectionBindingSetDataD3D12> reflection_binding_sets_d3d12;
Vector<Vector<ReflectionBindingDataD3D12>> reflection_binding_set_uniforms_d3d12; Vector<Vector<ReflectionBindingDataD3D12>> reflection_binding_set_uniforms_d3d12;
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12; Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
Vector<uint8_t> root_signature_bytes; Vector<uint8_t> root_signature_bytes;