Refactor descriptor heaps in D3D12 driver.

This commit is contained in:
Skyth 2025-11-24 18:42:11 +03:00
parent 9f5309a2a4
commit a8d3ecec13
6 changed files with 640 additions and 1138 deletions

View file

@ -1813,12 +1813,10 @@ ProjectSettings::ProjectSettings() {
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/rendering_device/pipeline_cache/save_chunk_size_mb", PROPERTY_HINT_RANGE, "0.000001,64.0,0.001,or_greater"), 3.0);
GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/rendering_device/vulkan/max_descriptors_per_pool", PROPERTY_HINT_RANGE, "1,256,1,or_greater"), 64);
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", 16384);
custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", PROPERTY_HINT_RANGE, "512,262144");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", 1024);
custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", PROPERTY_HINT_RANGE, "256,2048");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", 512);
custom_prop_info["rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", PROPERTY_HINT_RANGE, "32,4096");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors", 65536);
custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors", PROPERTY_HINT_RANGE, "512,1000000");
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors", 1024);
custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors", PROPERTY_HINT_RANGE, "256,2048");
// The default value must match the minor part of the Agility SDK version
// installed by the scripts provided in the repository

View file

@ -3256,16 +3256,12 @@
<member name="rendering/rendering_device/d3d12/agility_sdk_version" type="int" setter="" getter="" default="613">
Version code of the [url=https://devblogs.microsoft.com/directx/directx12agility/]Direct3D 12 Agility SDK[/url] to use ([code]D3D12SDKVersion[/code]). This must match the [i]minor[/i] version that is installed next to the editor binary and in the export templates directory for the current editor version. For example, if you have [code]1.613.3[/code] installed, you need to input [code]613[/code] here.
</member>
<member name="rendering/rendering_device/d3d12/max_misc_descriptors_per_frame" type="int" setter="" getter="" default="512">
The number of entries in the miscellaneous descriptors heap the Direct3D 12 rendering driver uses each frame, used for various operations like clearing a texture.
<member name="rendering/rendering_device/d3d12/max_resource_descriptors" type="int" setter="" getter="" default="65536">
The number of entries in the resource descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member>
<member name="rendering/rendering_device/d3d12/max_resource_descriptors_per_frame" type="int" setter="" getter="" default="16384">
The number of entries in the resource descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member>
<member name="rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame" type="int" setter="" getter="" default="1024">
The number of entries in the sampler descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations.
<member name="rendering/rendering_device/d3d12/max_sampler_descriptors" type="int" setter="" getter="" default="1024">
The number of entries in the sampler descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
</member>
<member name="rendering/rendering_device/driver" type="String" setter="" getter="" default="&quot;vulkan&quot;">

File diff suppressed because it is too large Load diff

View file

@ -33,6 +33,7 @@
#include "core/templates/a_hash_map.h"
#include "core/templates/hash_map.h"
#include "core/templates/paged_allocator.h"
#include "core/templates/rb_map.h"
#include "core/templates/self_list.h"
#include "rendering_shader_container_d3d12.h"
#include "servers/rendering/rendering_device_driver.h"
@ -71,6 +72,7 @@ class RenderingContextDriverD3D12;
namespace D3D12MA {
class Allocation;
class Allocator;
class VirtualBlock;
}; // namespace D3D12MA
struct IDXGIAdapter;
@ -143,106 +145,51 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
String pipeline_cache_id;
D3D12_HEAP_TYPE dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD;
class CPUDescriptorsHeapPool;
struct CPUDescriptorsHeapHandle {
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
CPUDescriptorsHeapPool *pool = nullptr;
uint32_t offset = 0;
uint32_t base_offset = 0;
uint32_t count = 0;
uint32_t nonce = 0;
uint32_t global_offset() const { return offset + base_offset; }
};
class CPUDescriptorsHeapPool {
Mutex mutex;
struct FreeBlockInfo {
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
uint32_t global_offset = 0; // Global offset in an address space shared by all the heaps.
uint32_t base_offset = 0; // The offset inside the space of this heap.
uint32_t size = 0;
uint32_t nonce = 0;
struct DescriptorHeap {
struct Allocation {
uint64_t virtual_alloc_handle = {}; // This is the handle value in "D3D12MA::VirtualAllocation".
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
};
struct FreeBlockSortIndexSort {
_FORCE_INLINE_ bool operator()(const uint32_t &p_l, const uint32_t &p_r) const {
return p_l > p_r;
}
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
uint32_t increment_size = 0;
Microsoft::WRL::ComPtr<D3D12MA::VirtualBlock> virtual_block;
Error initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_num_descriptors, bool p_shader_visible);
Error allocate(uint32_t p_descriptor_count, Allocation &r_allocation);
void free(const Allocation &p_allocation);
};
// Some IHVs do not allow creating descriptor heaps beyond a certain limit, so they must be pooled.
struct CPUDescriptorHeapPool {
struct Allocation : DescriptorHeap::Allocation {
uint32_t heap_index = UINT_MAX;
};
typedef RBMap<uint32_t, FreeBlockInfo> OffsetTableType;
typedef RBMap<uint32_t, List<uint32_t>, FreeBlockSortIndexSort> SizeTableType;
BinaryMutex mutex;
LocalVector<DescriptorHeap> heaps;
OffsetTableType free_blocks_by_offset;
SizeTableType free_blocks_by_size;
uint32_t current_offset = 0;
uint32_t current_nonce = 0;
D3D12_DESCRIPTOR_HEAP_TYPE type = {};
uint32_t increment_size = 0;
void add_to_size_map(const FreeBlockInfo &p_block);
void remove_from_size_map(const FreeBlockInfo &p_block);
void verify();
void initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type);
public:
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
Error release(const CPUDescriptorsHeapHandle &p_result);
Error allocate(uint32_t p_descriptor_count, ID3D12Device *p_device, Allocation &r_allocation);
void free(const Allocation &p_allocation);
};
class CPUDescriptorsHeapPools {
CPUDescriptorsHeapPool pools[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
DescriptorHeap resource_descriptor_heap;
DescriptorHeap sampler_descriptor_heap;
CPUDescriptorHeapPool resource_descriptor_heap_pool;
CPUDescriptorHeapPool rtv_descriptor_heap_pool;
CPUDescriptorHeapPool dsv_descriptor_heap_pool;
public:
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
};
struct CPUDescriptorsHeapWalker {
uint32_t handle_size = 0;
uint32_t handle_count = 0;
D3D12_CPU_DESCRIPTOR_HANDLE first_cpu_handle = {};
uint32_t handle_index = 0;
D3D12_CPU_DESCRIPTOR_HANDLE get_curr_cpu_handle();
_FORCE_INLINE_ void rewind() { handle_index = 0; }
void advance(uint32_t p_count = 1);
uint32_t get_current_handle_index() const { return handle_index; }
uint32_t get_free_handles() { return handle_count - handle_index; }
bool is_at_eof() { return handle_index == handle_count; }
};
struct GPUDescriptorsHeapWalker : CPUDescriptorsHeapWalker {
D3D12_GPU_DESCRIPTOR_HANDLE first_gpu_handle = {};
D3D12_GPU_DESCRIPTOR_HANDLE get_curr_gpu_handle();
};
class CPUDescriptorsHeap {
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
CPUDescriptorsHeapHandle handle;
uint32_t handle_size = 0;
public:
CPUDescriptorsHeap() = default;
Error allocate(RenderingDeviceDriverD3D12 *p_driver, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
~CPUDescriptorsHeap();
CPUDescriptorsHeapWalker make_walker() const;
};
class GPUDescriptorsHeap {
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
uint32_t handle_size = 0;
public:
Error allocate(RenderingDeviceDriverD3D12 *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
ID3D12DescriptorHeap *get_heap() const { return heap.Get(); }
GPUDescriptorsHeapWalker make_walker() const;
};
CPUDescriptorsHeapPools cpu_descriptor_pool;
CPUDescriptorHeapPool::Allocation null_rtv_alloc;
struct {
Microsoft::WRL::ComPtr<ID3D12CommandSignature> draw;
@ -317,10 +264,10 @@ private:
/*****************/
struct BufferInfo : public ResourceInfo {
D3D12_GPU_VIRTUAL_ADDRESS gpu_virtual_address = {};
DataFormat texel_format = DATA_FORMAT_MAX;
uint64_t size = 0;
struct {
bool usable_as_uav : 1;
bool is_dynamic : 1; // Only used for tracking (e.g. Vulkan needs these checks).
} flags = {};
@ -407,6 +354,13 @@ public:
private:
LocalVector<D3D12_SAMPLER_DESC> samplers;
struct SamplerDescriptorHeapAllocation : DescriptorHeap::Allocation {
uint32_t key = 0;
uint32_t use_count = 1;
};
RBMap<uint32_t, SamplerDescriptorHeapAllocation> sampler_descriptor_heap_allocations;
public:
virtual SamplerID sampler_create(const SamplerState &p_state) final override;
virtual void sampler_free(SamplerID p_sampler) final override;
@ -551,6 +505,9 @@ private:
LocalVector<D3D12_RESOURCE_BARRIER> res_barriers;
uint32_t res_barriers_count = 0;
uint32_t res_barriers_batch = 0;
CPUDescriptorHeapPool::Allocation uav_alloc;
CPUDescriptorHeapPool::Allocation rtv_alloc;
};
public:
@ -596,9 +553,10 @@ private:
struct FramebufferInfo {
bool is_screen = false;
Size2i size;
TightLocalVector<uint32_t> attachments_handle_inds; // RTV heap index for color; DSV heap index for DSV.
CPUDescriptorsHeap rtv_heap;
CPUDescriptorsHeap dsv_heap; // Used only for depth-stencil attachments.
CPUDescriptorHeapPool::Allocation rtv_alloc;
CPUDescriptorHeapPool::Allocation dsv_alloc; // Used only for depth-stencil attachments.
TightLocalVector<TextureID> attachments; // Color and depth-stencil. Used if not screen.
TextureID vrs_attachment;
@ -645,25 +603,18 @@ private:
ResourceClass res_class = RES_CLASS_INVALID;
UniformType type = UNIFORM_TYPE_MAX;
uint32_t length = UINT32_MAX;
#ifdef DEV_ENABLED
bool writable = false;
#endif
struct RootSignatureLocation {
uint32_t root_param_idx = UINT32_MAX;
uint32_t range_idx = UINT32_MAX;
};
struct {
RootSignatureLocation resource;
RootSignatureLocation sampler;
} root_sig_locations;
uint32_t resource_descriptor_offset = UINT32_MAX;
uint32_t sampler_descriptor_offset = UINT32_MAX;
uint32_t root_param_idx = UINT32_MAX;
};
struct UniformSet {
TightLocalVector<UniformBindingInfo> bindings;
struct {
uint32_t resources = 0;
uint32_t samplers = 0;
} num_root_params;
uint32_t resource_root_param_idx = UINT32_MAX;
uint32_t resource_descriptor_count = 0;
uint32_t sampler_root_param_idx = UINT32_MAX;
uint32_t sampler_descriptor_count = 0;
};
TightLocalVector<UniformSet> sets;
@ -701,16 +652,16 @@ public:
/*********************/
private:
struct RootDescriptorTable {
uint32_t root_param_idx = UINT32_MAX;
D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle = {};
};
struct UniformSetInfo {
struct {
CPUDescriptorsHeap resources;
CPUDescriptorsHeap samplers;
} desc_heaps;
DescriptorHeap::Allocation resource_descriptor_heap_alloc;
SamplerDescriptorHeapAllocation *sampler_descriptor_heap_alloc = nullptr;
struct DynamicBuffer {
BufferDynamicInfo const *info = nullptr;
uint32_t binding = UINT_MAX;
};
TightLocalVector<DynamicBuffer> dynamic_buffers;
struct StateRequirement {
ResourceInfo *resource = nullptr;
@ -718,29 +669,8 @@ private:
D3D12_RESOURCE_STATES states = {};
uint64_t shader_uniform_idx_mask = 0;
};
TightLocalVector<StateRequirement> resource_states;
struct RecentBind {
uint64_t segment_serial = 0;
uint32_t dynamic_state_mask = 0;
uint32_t root_signature_crc = 0;
struct {
TightLocalVector<RootDescriptorTable> resources;
TightLocalVector<RootDescriptorTable> samplers;
} root_tables;
int uses = 0;
} recent_binds[4]; // A better amount may be empirically found.
TightLocalVector<BufferDynamicInfo const *, uint32_t> dynamic_buffers;
#ifdef DEV_ENABLED
// Filthy, but useful for dev.
struct ResourceDescInfo {
D3D12_DESCRIPTOR_RANGE_TYPE type;
D3D12_SRV_DIMENSION srv_dimension;
};
TightLocalVector<ResourceDescInfo> resources_desc_info;
#endif
};
public:
@ -754,7 +684,7 @@ public:
private:
void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer);
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets, bool p_for_compute);
DescriptorHeap::Allocation _command_allocate_per_frame_descriptor();
public:
/******************/
@ -942,35 +872,12 @@ public:
/********************/
private:
struct FrameInfo {
struct {
GPUDescriptorsHeap resources;
GPUDescriptorsHeap samplers;
CPUDescriptorsHeap aux;
CPUDescriptorsHeap rtv;
} desc_heaps;
struct {
GPUDescriptorsHeapWalker resources;
GPUDescriptorsHeapWalker samplers;
CPUDescriptorsHeapWalker aux;
CPUDescriptorsHeapWalker rtv;
} desc_heap_walkers;
struct {
bool resources = false;
bool samplers = false;
bool aux = false;
bool rtv = false;
} desc_heaps_exhausted_reported;
CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]].
uint32_t segment_serial = 0;
#ifdef DEV_ENABLED
uint32_t uniform_set_reused = 0;
#endif
LocalVector<DescriptorHeap::Allocation> descriptor_allocations;
uint32_t descriptor_allocation_count = 0;
};
TightLocalVector<FrameInfo> frames;
uint32_t frame_idx = 0;
uint32_t frames_drawn = 0;
uint32_t segment_serial = 0;
bool segment_begun = false;
HashMap<uint64_t, bool> has_comp_alpha;

View file

@ -194,7 +194,11 @@ uint32_t RenderingShaderContainerD3D12::_format_version() const {
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes);
return sizeof(ReflectionDataD3D12);
reflection_binding_set_data_d3d12.resize(reflection_data.set_count);
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
reflection_binding_set_data_d3d12.ptrw()[i] = *(const ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12)));
}
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
}
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
@ -228,9 +232,12 @@ uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
if (p_bytes != nullptr) {
*(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12;
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
*(ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12))) = reflection_binding_set_data_d3d12[i];
}
}
return sizeof(ReflectionDataD3D12);
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
}
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
@ -269,14 +276,10 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
dxil_runtime_conf.zero_based_vertex_instance_id = true;
dxil_runtime_conf.zero_based_compute_workgroup_id = true;
dxil_runtime_conf.declared_read_only_images_as_srvs = true;
// Making this explicit to let maintainers know that in practice this didn't improve performance,
// probably because data generated by one shader and consumed by another one forces the resource
// to transition from UAV to SRV, and back, instead of being an UAV all the time.
// In case someone wants to try, care must be taken so in case of incompatible bindings across stages
// happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only
// uses an allegedly writable resource only for reading but the next stage doesn't.
// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
// It also makes it easier to implement descriptor heaps and enhanced barriers.
dxil_runtime_conf.declared_read_only_images_as_srvs = false;
dxil_runtime_conf.inferred_read_only_images_as_srvs = false;
// Translate SPIR-V to NIR.
@ -482,7 +485,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
struct TraceableDescriptorTable {
uint32_t stages_mask = {};
Vector<D3D12_DESCRIPTOR_RANGE1> ranges;
Vector<RootSignatureLocation *> root_signature_locations;
uint32_t set = UINT_MAX;
};
uint32_t binding_start = 0;
@ -495,31 +498,35 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
for (uint32_t j = 0; j < uniform_count; j++) {
const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j];
ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j];
bool really_used = uniform_d3d12.dxil_stages != 0;
#ifdef DEV_ENABLED
bool really_used = uniform_d3d12.dxil_stages != 0;
bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler;
DEV_ASSERT(anybody_home == really_used);
#endif
if (!really_used) {
continue; // Existed in SPIR-V; went away in DXIL.
}
auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
auto insert_range = [i](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
uint32_t p_num_descriptors,
uint32_t p_dxil_register,
uint32_t p_dxil_stages_mask,
RootSignatureLocation *p_root_sig_locations,
Vector<TraceableDescriptorTable> &r_tables,
bool &r_first_in_set) {
uint32_t &r_descriptor_offset,
uint32_t &r_descriptor_count,
bool &r_first_in_set,
Vector<TraceableDescriptorTable> &r_tables) {
r_descriptor_offset = r_descriptor_count;
if (r_first_in_set) {
r_tables.resize(r_tables.size() + 1);
r_first_in_set = false;
}
TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1];
DEV_ASSERT(table.set == UINT_MAX || table.set == i);
table.stages_mask |= p_dxil_stages_mask;
table.set = i;
CD3DX12_DESCRIPTOR_RANGE1 range;
// Due to the aliasing hack for SRV-UAV of different families,
// we can be causing an unintended change of data (sometimes the validation layers catch it).
D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
@ -528,79 +535,130 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
} else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) {
flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE;
}
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags);
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags, r_descriptor_offset);
r_descriptor_count += p_num_descriptors;
table.ranges.push_back(range);
table.root_signature_locations.push_back(p_root_sig_locations);
};
D3D12_DESCRIPTOR_RANGE_TYPE range_type = (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX;
bool has_sampler = false;
uint32_t num_descriptors = 1;
D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {};
switch ((ResourceClass)(uniform_d3d12.resource_class)) {
case RES_CLASS_INVALID: {
switch (uniform.type) {
case RDC::UNIFORM_TYPE_SAMPLER: {
has_sampler = true;
num_descriptors = uniform.length;
DEV_ASSERT(uniform_d3d12.has_sampler);
} break;
case RES_CLASS_CBV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
DEV_ASSERT(!uniform_d3d12.has_sampler);
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
has_sampler = true;
num_descriptors = MAX(1u, uniform.length);
} break;
case RES_CLASS_SRV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size.
case RDC::UNIFORM_TYPE_TEXTURE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
num_descriptors = MAX(1u, uniform.length);
} break;
case RES_CLASS_UAV: {
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size.
DEV_ASSERT(!uniform_d3d12.has_sampler);
case RDC::UNIFORM_TYPE_IMAGE: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
num_descriptors = MAX(1u, uniform.length);
} break;
case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_IMAGE_BUFFER: {
CRASH_NOW_MSG("Unimplemented!");
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: {
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
} break;
default: {
DEV_ASSERT(false);
}
}
uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER;
if (uniform_d3d12.resource_class != RES_CLASS_INVALID) {
insert_range(
resource_range_type,
num_descriptors,
dxil_register,
uniform_d3d12.dxil_stages,
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE],
resource_tables_maps,
first_resource_in_set);
if (range_type != (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX) {
// Dynamic buffers are converted to root descriptors to prevent copying descriptors during command recording.
// Out of bounds accesses are not a concern because that's already undefined behavior on Vulkan.
if (uniform.type == RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC || uniform.type == RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC) {
CD3DX12_ROOT_PARAMETER1 root_param = {};
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(uniform.stages);
switch (range_type) {
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: {
root_param.InitAsConstantBufferView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: {
root_param.InitAsShaderResourceView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: {
root_param.InitAsUnorderedAccessView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
} break;
default: {
DEV_ASSERT(false && "Unrecognized range type.");
} break;
}
uniform_d3d12.root_param_idx = root_params.size();
root_params.push_back(root_param);
} else {
insert_range(
range_type,
num_descriptors,
dxil_register,
uniform.stages,
uniform_d3d12.resource_descriptor_offset,
reflection_binding_set_data_d3d12.ptrw()[i].resource_descriptor_count,
first_resource_in_set,
resource_tables_maps);
}
}
if (uniform_d3d12.has_sampler) {
if (has_sampler) {
insert_range(
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
num_descriptors,
dxil_register,
uniform_d3d12.dxil_stages,
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER],
sampler_tables_maps,
first_sampler_in_set);
uniform.stages,
uniform_d3d12.sampler_descriptor_offset,
reflection_binding_set_data_d3d12.ptrw()[i].sampler_descriptor_count,
first_sampler_in_set,
sampler_tables_maps);
}
}
binding_start += uniform_count;
}
auto make_descriptor_tables = [&root_params](const Vector<TraceableDescriptorTable> &p_tables) {
for (const TraceableDescriptorTable &table : p_tables) {
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask);
DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size());
for (int i = 0; i < table.ranges.size(); i++) {
// By now we know very well which root signature location corresponds to the pointed uniform.
table.root_signature_locations[i]->root_param_index = root_params.size();
table.root_signature_locations[i]->range_index = i;
}
for (const TraceableDescriptorTable &table : resource_tables_maps) {
CD3DX12_ROOT_PARAMETER1 root_table = {};
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
reflection_binding_set_data_d3d12.ptrw()[table.set].resource_root_param_idx = root_params.size();
root_params.push_back(root_table);
}
CD3DX12_ROOT_PARAMETER1 root_table;
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility);
root_params.push_back(root_table);
}
};
make_descriptor_tables(resource_tables_maps);
make_descriptor_tables(sampler_tables_maps);
for (const TraceableDescriptorTable &table : sampler_tables_maps) {
CD3DX12_ROOT_PARAMETER1 root_table = {};
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
reflection_binding_set_data_d3d12.ptrw()[table.set].sampler_root_param_idx = root_params.size();
root_params.push_back(root_table);
}
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {};
D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
@ -755,6 +813,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi
#endif
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
reflection_binding_set_data_d3d12.resize(reflection_binding_set_uniforms_count.size());
reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
@ -841,6 +900,7 @@ RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D
reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask;
reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages;
reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx;
reflection.reflection_binding_sets_d3d12 = reflection_binding_set_data_d3d12;
reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12;
reflection.root_signature_bytes = root_signature_bytes;
reflection.root_signature_crc = root_signature_crc;

View file

@ -86,16 +86,20 @@ public:
2, // SHADER_STAGE_COMPUTE
};
struct RootSignatureLocation {
uint32_t root_param_index = UINT32_MAX;
uint32_t range_index = UINT32_MAX;
struct ReflectionBindingSetDataD3D12 {
uint32_t resource_root_param_idx = UINT32_MAX;
uint32_t resource_descriptor_count = 0;
uint32_t sampler_root_param_idx = UINT32_MAX;
uint32_t sampler_descriptor_count = 0;
};
struct ReflectionBindingDataD3D12 {
uint32_t resource_class = 0;
uint32_t has_sampler = 0;
uint32_t dxil_stages = 0;
RootSignatureLocation root_signature_locations[2];
uint32_t resource_descriptor_offset = UINT32_MAX;
uint32_t sampler_descriptor_offset = UINT32_MAX;
uint32_t root_param_idx = UINT32_MAX; // Root descriptor only.
};
struct ReflectionSpecializationDataD3D12 {
@ -116,6 +120,7 @@ protected:
void *lib_d3d12 = nullptr;
ReflectionDataD3D12 reflection_data_d3d12;
Vector<ReflectionBindingSetDataD3D12> reflection_binding_set_data_d3d12;
Vector<ReflectionBindingDataD3D12> reflection_binding_set_uniforms_data_d3d12;
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
Vector<uint8_t> root_signature_bytes;
@ -154,6 +159,7 @@ public:
uint32_t spirv_specialization_constants_ids_mask = 0;
uint32_t dxil_push_constant_stages = 0;
uint32_t nir_runtime_data_root_param_idx = 0;
Vector<ReflectionBindingSetDataD3D12> reflection_binding_sets_d3d12;
Vector<Vector<ReflectionBindingDataD3D12>> reflection_binding_set_uniforms_d3d12;
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
Vector<uint8_t> root_signature_bytes;