mirror of
https://github.com/godotengine/godot.git
synced 2025-12-07 22:00:10 +00:00
Refactor descriptor heaps in D3D12 driver.
This commit is contained in:
parent
9f5309a2a4
commit
a8d3ecec13
6 changed files with 640 additions and 1138 deletions
|
|
@ -1813,12 +1813,10 @@ ProjectSettings::ProjectSettings() {
|
|||
GLOBAL_DEF(PropertyInfo(Variant::FLOAT, "rendering/rendering_device/pipeline_cache/save_chunk_size_mb", PROPERTY_HINT_RANGE, "0.000001,64.0,0.001,or_greater"), 3.0);
|
||||
GLOBAL_DEF(PropertyInfo(Variant::INT, "rendering/rendering_device/vulkan/max_descriptors_per_pool", PROPERTY_HINT_RANGE, "1,256,1,or_greater"), 64);
|
||||
|
||||
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", 16384);
|
||||
custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors_per_frame", PROPERTY_HINT_RANGE, "512,262144");
|
||||
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", 1024);
|
||||
custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame", PROPERTY_HINT_RANGE, "256,2048");
|
||||
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", 512);
|
||||
custom_prop_info["rendering/rendering_device/d3d12/max_misc_descriptors_per_frame"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_misc_descriptors_per_frame", PROPERTY_HINT_RANGE, "32,4096");
|
||||
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_resource_descriptors", 65536);
|
||||
custom_prop_info["rendering/rendering_device/d3d12/max_resource_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_resource_descriptors", PROPERTY_HINT_RANGE, "512,1000000");
|
||||
GLOBAL_DEF_RST("rendering/rendering_device/d3d12/max_sampler_descriptors", 1024);
|
||||
custom_prop_info["rendering/rendering_device/d3d12/max_sampler_descriptors"] = PropertyInfo(Variant::INT, "rendering/rendering_device/d3d12/max_sampler_descriptors", PROPERTY_HINT_RANGE, "256,2048");
|
||||
|
||||
// The default value must match the minor part of the Agility SDK version
|
||||
// installed by the scripts provided in the repository
|
||||
|
|
|
|||
|
|
@ -3256,16 +3256,12 @@
|
|||
<member name="rendering/rendering_device/d3d12/agility_sdk_version" type="int" setter="" getter="" default="613">
|
||||
Version code of the [url=https://devblogs.microsoft.com/directx/directx12agility/]Direct3D 12 Agility SDK[/url] to use ([code]D3D12SDKVersion[/code]). This must match the [i]minor[/i] version that is installed next to the editor binary and in the export templates directory for the current editor version. For example, if you have [code]1.613.3[/code] installed, you need to input [code]613[/code] here.
|
||||
</member>
|
||||
<member name="rendering/rendering_device/d3d12/max_misc_descriptors_per_frame" type="int" setter="" getter="" default="512">
|
||||
The number of entries in the miscellaneous descriptors heap the Direct3D 12 rendering driver uses each frame, used for various operations like clearing a texture.
|
||||
<member name="rendering/rendering_device/d3d12/max_resource_descriptors" type="int" setter="" getter="" default="65536">
|
||||
The number of entries in the resource descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
|
||||
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
|
||||
</member>
|
||||
<member name="rendering/rendering_device/d3d12/max_resource_descriptors_per_frame" type="int" setter="" getter="" default="16384">
|
||||
The number of entries in the resource descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations.
|
||||
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
|
||||
</member>
|
||||
<member name="rendering/rendering_device/d3d12/max_sampler_descriptors_per_frame" type="int" setter="" getter="" default="1024">
|
||||
The number of entries in the sampler descriptors heap the Direct3D 12 rendering driver uses each frame, used for most rendering operations.
|
||||
<member name="rendering/rendering_device/d3d12/max_sampler_descriptors" type="int" setter="" getter="" default="1024">
|
||||
The number of entries in the sampler descriptor heap the Direct3D 12 rendering driver uses for most rendering operations.
|
||||
Depending on the complexity of scenes, this value may be lowered or may need to be raised.
|
||||
</member>
|
||||
<member name="rendering/rendering_device/driver" type="String" setter="" getter="" default=""vulkan"">
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -33,6 +33,7 @@
|
|||
#include "core/templates/a_hash_map.h"
|
||||
#include "core/templates/hash_map.h"
|
||||
#include "core/templates/paged_allocator.h"
|
||||
#include "core/templates/rb_map.h"
|
||||
#include "core/templates/self_list.h"
|
||||
#include "rendering_shader_container_d3d12.h"
|
||||
#include "servers/rendering/rendering_device_driver.h"
|
||||
|
|
@ -71,6 +72,7 @@ class RenderingContextDriverD3D12;
|
|||
namespace D3D12MA {
|
||||
class Allocation;
|
||||
class Allocator;
|
||||
class VirtualBlock;
|
||||
}; // namespace D3D12MA
|
||||
|
||||
struct IDXGIAdapter;
|
||||
|
|
@ -143,106 +145,51 @@ class RenderingDeviceDriverD3D12 : public RenderingDeviceDriver {
|
|||
String pipeline_cache_id;
|
||||
D3D12_HEAP_TYPE dynamic_persistent_upload_heap = D3D12_HEAP_TYPE_UPLOAD;
|
||||
|
||||
class CPUDescriptorsHeapPool;
|
||||
|
||||
struct CPUDescriptorsHeapHandle {
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
|
||||
CPUDescriptorsHeapPool *pool = nullptr;
|
||||
uint32_t offset = 0;
|
||||
uint32_t base_offset = 0;
|
||||
uint32_t count = 0;
|
||||
uint32_t nonce = 0;
|
||||
|
||||
uint32_t global_offset() const { return offset + base_offset; }
|
||||
};
|
||||
|
||||
class CPUDescriptorsHeapPool {
|
||||
Mutex mutex;
|
||||
|
||||
struct FreeBlockInfo {
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
|
||||
uint32_t global_offset = 0; // Global offset in an address space shared by all the heaps.
|
||||
uint32_t base_offset = 0; // The offset inside the space of this heap.
|
||||
uint32_t size = 0;
|
||||
uint32_t nonce = 0;
|
||||
struct DescriptorHeap {
|
||||
struct Allocation {
|
||||
uint64_t virtual_alloc_handle = {}; // This is the handle value in "D3D12MA::VirtualAllocation".
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
|
||||
};
|
||||
|
||||
struct FreeBlockSortIndexSort {
|
||||
_FORCE_INLINE_ bool operator()(const uint32_t &p_l, const uint32_t &p_r) const {
|
||||
return p_l > p_r;
|
||||
}
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE cpu_handle = {};
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE gpu_handle = {};
|
||||
uint32_t increment_size = 0;
|
||||
|
||||
Microsoft::WRL::ComPtr<D3D12MA::VirtualBlock> virtual_block;
|
||||
|
||||
Error initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_num_descriptors, bool p_shader_visible);
|
||||
|
||||
Error allocate(uint32_t p_descriptor_count, Allocation &r_allocation);
|
||||
void free(const Allocation &p_allocation);
|
||||
};
|
||||
|
||||
// Some IHVs do not allow creating descriptor heaps beyond a certain limit, so they must be pooled.
|
||||
struct CPUDescriptorHeapPool {
|
||||
struct Allocation : DescriptorHeap::Allocation {
|
||||
uint32_t heap_index = UINT_MAX;
|
||||
};
|
||||
|
||||
typedef RBMap<uint32_t, FreeBlockInfo> OffsetTableType;
|
||||
typedef RBMap<uint32_t, List<uint32_t>, FreeBlockSortIndexSort> SizeTableType;
|
||||
BinaryMutex mutex;
|
||||
LocalVector<DescriptorHeap> heaps;
|
||||
|
||||
OffsetTableType free_blocks_by_offset;
|
||||
SizeTableType free_blocks_by_size;
|
||||
uint32_t current_offset = 0;
|
||||
uint32_t current_nonce = 0;
|
||||
D3D12_DESCRIPTOR_HEAP_TYPE type = {};
|
||||
uint32_t increment_size = 0;
|
||||
|
||||
void add_to_size_map(const FreeBlockInfo &p_block);
|
||||
void remove_from_size_map(const FreeBlockInfo &p_block);
|
||||
void verify();
|
||||
void initialize(ID3D12Device *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type);
|
||||
|
||||
public:
|
||||
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
|
||||
Error release(const CPUDescriptorsHeapHandle &p_result);
|
||||
Error allocate(uint32_t p_descriptor_count, ID3D12Device *p_device, Allocation &r_allocation);
|
||||
void free(const Allocation &p_allocation);
|
||||
};
|
||||
|
||||
class CPUDescriptorsHeapPools {
|
||||
CPUDescriptorsHeapPool pools[D3D12_DESCRIPTOR_HEAP_TYPE_NUM_TYPES];
|
||||
DescriptorHeap resource_descriptor_heap;
|
||||
DescriptorHeap sampler_descriptor_heap;
|
||||
CPUDescriptorHeapPool resource_descriptor_heap_pool;
|
||||
CPUDescriptorHeapPool rtv_descriptor_heap_pool;
|
||||
CPUDescriptorHeapPool dsv_descriptor_heap_pool;
|
||||
|
||||
public:
|
||||
Error allocate(ID3D12Device *p_device, const D3D12_DESCRIPTOR_HEAP_DESC &p_desc, CPUDescriptorsHeapHandle &r_result);
|
||||
};
|
||||
|
||||
struct CPUDescriptorsHeapWalker {
|
||||
uint32_t handle_size = 0;
|
||||
uint32_t handle_count = 0;
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE first_cpu_handle = {};
|
||||
uint32_t handle_index = 0;
|
||||
|
||||
D3D12_CPU_DESCRIPTOR_HANDLE get_curr_cpu_handle();
|
||||
_FORCE_INLINE_ void rewind() { handle_index = 0; }
|
||||
void advance(uint32_t p_count = 1);
|
||||
uint32_t get_current_handle_index() const { return handle_index; }
|
||||
uint32_t get_free_handles() { return handle_count - handle_index; }
|
||||
bool is_at_eof() { return handle_index == handle_count; }
|
||||
};
|
||||
|
||||
struct GPUDescriptorsHeapWalker : CPUDescriptorsHeapWalker {
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE first_gpu_handle = {};
|
||||
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE get_curr_gpu_handle();
|
||||
};
|
||||
|
||||
class CPUDescriptorsHeap {
|
||||
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
|
||||
CPUDescriptorsHeapHandle handle;
|
||||
uint32_t handle_size = 0;
|
||||
|
||||
public:
|
||||
CPUDescriptorsHeap() = default;
|
||||
Error allocate(RenderingDeviceDriverD3D12 *p_driver, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
|
||||
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
|
||||
~CPUDescriptorsHeap();
|
||||
CPUDescriptorsHeapWalker make_walker() const;
|
||||
};
|
||||
|
||||
class GPUDescriptorsHeap {
|
||||
D3D12_DESCRIPTOR_HEAP_DESC desc = {};
|
||||
Microsoft::WRL::ComPtr<ID3D12DescriptorHeap> heap;
|
||||
uint32_t handle_size = 0;
|
||||
|
||||
public:
|
||||
Error allocate(RenderingDeviceDriverD3D12 *p_device, D3D12_DESCRIPTOR_HEAP_TYPE p_type, uint32_t p_descriptor_count);
|
||||
uint32_t get_descriptor_count() const { return desc.NumDescriptors; }
|
||||
ID3D12DescriptorHeap *get_heap() const { return heap.Get(); }
|
||||
GPUDescriptorsHeapWalker make_walker() const;
|
||||
};
|
||||
|
||||
CPUDescriptorsHeapPools cpu_descriptor_pool;
|
||||
CPUDescriptorHeapPool::Allocation null_rtv_alloc;
|
||||
|
||||
struct {
|
||||
Microsoft::WRL::ComPtr<ID3D12CommandSignature> draw;
|
||||
|
|
@ -317,10 +264,10 @@ private:
|
|||
/*****************/
|
||||
|
||||
struct BufferInfo : public ResourceInfo {
|
||||
D3D12_GPU_VIRTUAL_ADDRESS gpu_virtual_address = {};
|
||||
DataFormat texel_format = DATA_FORMAT_MAX;
|
||||
uint64_t size = 0;
|
||||
struct {
|
||||
bool usable_as_uav : 1;
|
||||
bool is_dynamic : 1; // Only used for tracking (e.g. Vulkan needs these checks).
|
||||
} flags = {};
|
||||
|
||||
|
|
@ -407,6 +354,13 @@ public:
|
|||
private:
|
||||
LocalVector<D3D12_SAMPLER_DESC> samplers;
|
||||
|
||||
struct SamplerDescriptorHeapAllocation : DescriptorHeap::Allocation {
|
||||
uint32_t key = 0;
|
||||
uint32_t use_count = 1;
|
||||
};
|
||||
|
||||
RBMap<uint32_t, SamplerDescriptorHeapAllocation> sampler_descriptor_heap_allocations;
|
||||
|
||||
public:
|
||||
virtual SamplerID sampler_create(const SamplerState &p_state) final override;
|
||||
virtual void sampler_free(SamplerID p_sampler) final override;
|
||||
|
|
@ -551,6 +505,9 @@ private:
|
|||
LocalVector<D3D12_RESOURCE_BARRIER> res_barriers;
|
||||
uint32_t res_barriers_count = 0;
|
||||
uint32_t res_barriers_batch = 0;
|
||||
|
||||
CPUDescriptorHeapPool::Allocation uav_alloc;
|
||||
CPUDescriptorHeapPool::Allocation rtv_alloc;
|
||||
};
|
||||
|
||||
public:
|
||||
|
|
@ -596,9 +553,10 @@ private:
|
|||
struct FramebufferInfo {
|
||||
bool is_screen = false;
|
||||
Size2i size;
|
||||
|
||||
TightLocalVector<uint32_t> attachments_handle_inds; // RTV heap index for color; DSV heap index for DSV.
|
||||
CPUDescriptorsHeap rtv_heap;
|
||||
CPUDescriptorsHeap dsv_heap; // Used only for depth-stencil attachments.
|
||||
CPUDescriptorHeapPool::Allocation rtv_alloc;
|
||||
CPUDescriptorHeapPool::Allocation dsv_alloc; // Used only for depth-stencil attachments.
|
||||
|
||||
TightLocalVector<TextureID> attachments; // Color and depth-stencil. Used if not screen.
|
||||
TextureID vrs_attachment;
|
||||
|
|
@ -645,25 +603,18 @@ private:
|
|||
ResourceClass res_class = RES_CLASS_INVALID;
|
||||
UniformType type = UNIFORM_TYPE_MAX;
|
||||
uint32_t length = UINT32_MAX;
|
||||
#ifdef DEV_ENABLED
|
||||
bool writable = false;
|
||||
#endif
|
||||
struct RootSignatureLocation {
|
||||
uint32_t root_param_idx = UINT32_MAX;
|
||||
uint32_t range_idx = UINT32_MAX;
|
||||
};
|
||||
struct {
|
||||
RootSignatureLocation resource;
|
||||
RootSignatureLocation sampler;
|
||||
} root_sig_locations;
|
||||
uint32_t resource_descriptor_offset = UINT32_MAX;
|
||||
uint32_t sampler_descriptor_offset = UINT32_MAX;
|
||||
uint32_t root_param_idx = UINT32_MAX;
|
||||
};
|
||||
|
||||
struct UniformSet {
|
||||
TightLocalVector<UniformBindingInfo> bindings;
|
||||
struct {
|
||||
uint32_t resources = 0;
|
||||
uint32_t samplers = 0;
|
||||
} num_root_params;
|
||||
uint32_t resource_root_param_idx = UINT32_MAX;
|
||||
uint32_t resource_descriptor_count = 0;
|
||||
uint32_t sampler_root_param_idx = UINT32_MAX;
|
||||
uint32_t sampler_descriptor_count = 0;
|
||||
};
|
||||
|
||||
TightLocalVector<UniformSet> sets;
|
||||
|
|
@ -701,16 +652,16 @@ public:
|
|||
/*********************/
|
||||
|
||||
private:
|
||||
struct RootDescriptorTable {
|
||||
uint32_t root_param_idx = UINT32_MAX;
|
||||
D3D12_GPU_DESCRIPTOR_HANDLE start_gpu_handle = {};
|
||||
};
|
||||
|
||||
struct UniformSetInfo {
|
||||
struct {
|
||||
CPUDescriptorsHeap resources;
|
||||
CPUDescriptorsHeap samplers;
|
||||
} desc_heaps;
|
||||
DescriptorHeap::Allocation resource_descriptor_heap_alloc;
|
||||
SamplerDescriptorHeapAllocation *sampler_descriptor_heap_alloc = nullptr;
|
||||
|
||||
struct DynamicBuffer {
|
||||
BufferDynamicInfo const *info = nullptr;
|
||||
uint32_t binding = UINT_MAX;
|
||||
};
|
||||
|
||||
TightLocalVector<DynamicBuffer> dynamic_buffers;
|
||||
|
||||
struct StateRequirement {
|
||||
ResourceInfo *resource = nullptr;
|
||||
|
|
@ -718,29 +669,8 @@ private:
|
|||
D3D12_RESOURCE_STATES states = {};
|
||||
uint64_t shader_uniform_idx_mask = 0;
|
||||
};
|
||||
|
||||
TightLocalVector<StateRequirement> resource_states;
|
||||
|
||||
struct RecentBind {
|
||||
uint64_t segment_serial = 0;
|
||||
uint32_t dynamic_state_mask = 0;
|
||||
uint32_t root_signature_crc = 0;
|
||||
struct {
|
||||
TightLocalVector<RootDescriptorTable> resources;
|
||||
TightLocalVector<RootDescriptorTable> samplers;
|
||||
} root_tables;
|
||||
int uses = 0;
|
||||
} recent_binds[4]; // A better amount may be empirically found.
|
||||
|
||||
TightLocalVector<BufferDynamicInfo const *, uint32_t> dynamic_buffers;
|
||||
|
||||
#ifdef DEV_ENABLED
|
||||
// Filthy, but useful for dev.
|
||||
struct ResourceDescInfo {
|
||||
D3D12_DESCRIPTOR_RANGE_TYPE type;
|
||||
D3D12_SRV_DIMENSION srv_dimension;
|
||||
};
|
||||
TightLocalVector<ResourceDescInfo> resources_desc_info;
|
||||
#endif
|
||||
};
|
||||
|
||||
public:
|
||||
|
|
@ -754,7 +684,7 @@ public:
|
|||
|
||||
private:
|
||||
void _command_check_descriptor_sets(CommandBufferID p_cmd_buffer);
|
||||
void _command_bind_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index, uint32_t p_dynamic_offsets, bool p_for_compute);
|
||||
DescriptorHeap::Allocation _command_allocate_per_frame_descriptor();
|
||||
|
||||
public:
|
||||
/******************/
|
||||
|
|
@ -942,35 +872,12 @@ public:
|
|||
/********************/
|
||||
private:
|
||||
struct FrameInfo {
|
||||
struct {
|
||||
GPUDescriptorsHeap resources;
|
||||
GPUDescriptorsHeap samplers;
|
||||
CPUDescriptorsHeap aux;
|
||||
CPUDescriptorsHeap rtv;
|
||||
} desc_heaps;
|
||||
struct {
|
||||
GPUDescriptorsHeapWalker resources;
|
||||
GPUDescriptorsHeapWalker samplers;
|
||||
CPUDescriptorsHeapWalker aux;
|
||||
CPUDescriptorsHeapWalker rtv;
|
||||
} desc_heap_walkers;
|
||||
struct {
|
||||
bool resources = false;
|
||||
bool samplers = false;
|
||||
bool aux = false;
|
||||
bool rtv = false;
|
||||
} desc_heaps_exhausted_reported;
|
||||
CD3DX12_CPU_DESCRIPTOR_HANDLE null_rtv_handle = {}; // For [[MANUAL_SUBPASSES]].
|
||||
uint32_t segment_serial = 0;
|
||||
|
||||
#ifdef DEV_ENABLED
|
||||
uint32_t uniform_set_reused = 0;
|
||||
#endif
|
||||
LocalVector<DescriptorHeap::Allocation> descriptor_allocations;
|
||||
uint32_t descriptor_allocation_count = 0;
|
||||
};
|
||||
TightLocalVector<FrameInfo> frames;
|
||||
uint32_t frame_idx = 0;
|
||||
uint32_t frames_drawn = 0;
|
||||
uint32_t segment_serial = 0;
|
||||
bool segment_begun = false;
|
||||
HashMap<uint64_t, bool> has_comp_alpha;
|
||||
|
||||
|
|
|
|||
|
|
@ -194,7 +194,11 @@ uint32_t RenderingShaderContainerD3D12::_format_version() const {
|
|||
|
||||
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_extra_data(const uint8_t *p_bytes) {
|
||||
reflection_data_d3d12 = *(const ReflectionDataD3D12 *)(p_bytes);
|
||||
return sizeof(ReflectionDataD3D12);
|
||||
reflection_binding_set_data_d3d12.resize(reflection_data.set_count);
|
||||
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
|
||||
reflection_binding_set_data_d3d12.ptrw()[i] = *(const ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12)));
|
||||
}
|
||||
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
|
||||
}
|
||||
|
||||
uint32_t RenderingShaderContainerD3D12::_from_bytes_reflection_binding_uniform_extra_data_start(const uint8_t *p_bytes) {
|
||||
|
|
@ -228,9 +232,12 @@ uint32_t RenderingShaderContainerD3D12::_from_bytes_footer_extra_data(const uint
|
|||
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_extra_data(uint8_t *p_bytes) const {
|
||||
if (p_bytes != nullptr) {
|
||||
*(ReflectionDataD3D12 *)(p_bytes) = reflection_data_d3d12;
|
||||
for (uint32_t i = 0; i < reflection_binding_set_data_d3d12.size(); i++) {
|
||||
*(ReflectionBindingSetDataD3D12 *)(p_bytes + sizeof(ReflectionDataD3D12) + (i * sizeof(ReflectionBindingSetDataD3D12))) = reflection_binding_set_data_d3d12[i];
|
||||
}
|
||||
}
|
||||
|
||||
return sizeof(ReflectionDataD3D12);
|
||||
return sizeof(ReflectionDataD3D12) + (reflection_binding_set_data_d3d12.size() * sizeof(ReflectionBindingSetDataD3D12));
|
||||
}
|
||||
|
||||
uint32_t RenderingShaderContainerD3D12::_to_bytes_reflection_binding_uniform_extra_data(uint8_t *p_bytes, uint32_t p_index) const {
|
||||
|
|
@ -269,14 +276,10 @@ bool RenderingShaderContainerD3D12::_convert_spirv_to_nir(Span<ReflectShaderStag
|
|||
dxil_runtime_conf.push_constant_cbv.base_shader_register = ROOT_CONSTANT_REGISTER;
|
||||
dxil_runtime_conf.zero_based_vertex_instance_id = true;
|
||||
dxil_runtime_conf.zero_based_compute_workgroup_id = true;
|
||||
dxil_runtime_conf.declared_read_only_images_as_srvs = true;
|
||||
|
||||
// Making this explicit to let maintainers know that in practice this didn't improve performance,
|
||||
// probably because data generated by one shader and consumed by another one forces the resource
|
||||
// to transition from UAV to SRV, and back, instead of being an UAV all the time.
|
||||
// In case someone wants to try, care must be taken so in case of incompatible bindings across stages
|
||||
// happen as a result, all the stages are re-translated. That can happen if, for instance, a stage only
|
||||
// uses an allegedly writable resource only for reading but the next stage doesn't.
|
||||
// Explicitly keeping these false because converting UAV descriptors to SRVs do not seem to have real performance benefits on desktop GPUs.
|
||||
// It also makes it easier to implement descriptor heaps and enhanced barriers.
|
||||
dxil_runtime_conf.declared_read_only_images_as_srvs = false;
|
||||
dxil_runtime_conf.inferred_read_only_images_as_srvs = false;
|
||||
|
||||
// Translate SPIR-V to NIR.
|
||||
|
|
@ -482,7 +485,7 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
|
|||
struct TraceableDescriptorTable {
|
||||
uint32_t stages_mask = {};
|
||||
Vector<D3D12_DESCRIPTOR_RANGE1> ranges;
|
||||
Vector<RootSignatureLocation *> root_signature_locations;
|
||||
uint32_t set = UINT_MAX;
|
||||
};
|
||||
|
||||
uint32_t binding_start = 0;
|
||||
|
|
@ -495,31 +498,35 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
|
|||
for (uint32_t j = 0; j < uniform_count; j++) {
|
||||
const ReflectionBindingData &uniform = reflection_binding_set_uniforms_data[binding_start + j];
|
||||
ReflectionBindingDataD3D12 &uniform_d3d12 = reflection_binding_set_uniforms_data_d3d12.ptrw()[binding_start + j];
|
||||
bool really_used = uniform_d3d12.dxil_stages != 0;
|
||||
#ifdef DEV_ENABLED
|
||||
bool really_used = uniform_d3d12.dxil_stages != 0;
|
||||
bool anybody_home = (ResourceClass)(uniform_d3d12.resource_class) != RES_CLASS_INVALID || uniform_d3d12.has_sampler;
|
||||
DEV_ASSERT(anybody_home == really_used);
|
||||
#endif
|
||||
if (!really_used) {
|
||||
continue; // Existed in SPIR-V; went away in DXIL.
|
||||
}
|
||||
|
||||
auto insert_range = [](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
|
||||
auto insert_range = [i](D3D12_DESCRIPTOR_RANGE_TYPE p_range_type,
|
||||
uint32_t p_num_descriptors,
|
||||
uint32_t p_dxil_register,
|
||||
uint32_t p_dxil_stages_mask,
|
||||
RootSignatureLocation *p_root_sig_locations,
|
||||
Vector<TraceableDescriptorTable> &r_tables,
|
||||
bool &r_first_in_set) {
|
||||
uint32_t &r_descriptor_offset,
|
||||
uint32_t &r_descriptor_count,
|
||||
bool &r_first_in_set,
|
||||
Vector<TraceableDescriptorTable> &r_tables) {
|
||||
r_descriptor_offset = r_descriptor_count;
|
||||
|
||||
if (r_first_in_set) {
|
||||
r_tables.resize(r_tables.size() + 1);
|
||||
r_first_in_set = false;
|
||||
}
|
||||
|
||||
TraceableDescriptorTable &table = r_tables.write[r_tables.size() - 1];
|
||||
DEV_ASSERT(table.set == UINT_MAX || table.set == i);
|
||||
|
||||
table.stages_mask |= p_dxil_stages_mask;
|
||||
table.set = i;
|
||||
|
||||
CD3DX12_DESCRIPTOR_RANGE1 range;
|
||||
|
||||
// Due to the aliasing hack for SRV-UAV of different families,
|
||||
// we can be causing an unintended change of data (sometimes the validation layers catch it).
|
||||
D3D12_DESCRIPTOR_RANGE_FLAGS flags = D3D12_DESCRIPTOR_RANGE_FLAG_NONE;
|
||||
|
|
@ -528,79 +535,130 @@ bool RenderingShaderContainerD3D12::_generate_root_signature(BitField<RenderingD
|
|||
} else if (p_range_type == D3D12_DESCRIPTOR_RANGE_TYPE_CBV) {
|
||||
flags = D3D12_DESCRIPTOR_RANGE_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE;
|
||||
}
|
||||
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags);
|
||||
|
||||
range.Init(p_range_type, p_num_descriptors, p_dxil_register, 0, flags, r_descriptor_offset);
|
||||
r_descriptor_count += p_num_descriptors;
|
||||
table.ranges.push_back(range);
|
||||
table.root_signature_locations.push_back(p_root_sig_locations);
|
||||
};
|
||||
|
||||
D3D12_DESCRIPTOR_RANGE_TYPE range_type = (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX;
|
||||
bool has_sampler = false;
|
||||
uint32_t num_descriptors = 1;
|
||||
D3D12_DESCRIPTOR_RANGE_TYPE resource_range_type = {};
|
||||
switch ((ResourceClass)(uniform_d3d12.resource_class)) {
|
||||
case RES_CLASS_INVALID: {
|
||||
|
||||
switch (uniform.type) {
|
||||
case RDC::UNIFORM_TYPE_SAMPLER: {
|
||||
has_sampler = true;
|
||||
num_descriptors = uniform.length;
|
||||
DEV_ASSERT(uniform_d3d12.has_sampler);
|
||||
} break;
|
||||
case RES_CLASS_CBV: {
|
||||
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
DEV_ASSERT(!uniform_d3d12.has_sampler);
|
||||
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
has_sampler = true;
|
||||
num_descriptors = MAX(1u, uniform.length);
|
||||
} break;
|
||||
case RES_CLASS_SRV: {
|
||||
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
num_descriptors = MAX(1u, uniform.length); // An unbound R/O buffer is reflected as zero-size.
|
||||
case RDC::UNIFORM_TYPE_TEXTURE: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
num_descriptors = MAX(1u, uniform.length);
|
||||
} break;
|
||||
case RES_CLASS_UAV: {
|
||||
resource_range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
num_descriptors = MAX(1u, uniform.length); // An unbound R/W buffer is reflected as zero-size.
|
||||
DEV_ASSERT(!uniform_d3d12.has_sampler);
|
||||
case RDC::UNIFORM_TYPE_IMAGE: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_UAV;
|
||||
num_descriptors = MAX(1u, uniform.length);
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_TEXTURE_BUFFER: {
|
||||
CRASH_NOW_MSG("Unimplemented!");
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
|
||||
CRASH_NOW_MSG("Unimplemented!");
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
CRASH_NOW_MSG("Unimplemented!");
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_CBV;
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
range_type = uniform.writable ? D3D12_DESCRIPTOR_RANGE_TYPE_UAV : D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
} break;
|
||||
case RDC::UNIFORM_TYPE_INPUT_ATTACHMENT: {
|
||||
range_type = D3D12_DESCRIPTOR_RANGE_TYPE_SRV;
|
||||
} break;
|
||||
default: {
|
||||
DEV_ASSERT(false);
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t dxil_register = i * GODOT_NIR_DESCRIPTOR_SET_MULTIPLIER + uniform.binding * GODOT_NIR_BINDING_MULTIPLIER;
|
||||
if (uniform_d3d12.resource_class != RES_CLASS_INVALID) {
|
||||
insert_range(
|
||||
resource_range_type,
|
||||
num_descriptors,
|
||||
dxil_register,
|
||||
uniform_d3d12.dxil_stages,
|
||||
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_RESOURCE],
|
||||
resource_tables_maps,
|
||||
first_resource_in_set);
|
||||
if (range_type != (D3D12_DESCRIPTOR_RANGE_TYPE)UINT_MAX) {
|
||||
// Dynamic buffers are converted to root descriptors to prevent copying descriptors during command recording.
|
||||
// Out of bounds accesses are not a concern because that's already undefined behavior on Vulkan.
|
||||
if (uniform.type == RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC || uniform.type == RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC) {
|
||||
CD3DX12_ROOT_PARAMETER1 root_param = {};
|
||||
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(uniform.stages);
|
||||
|
||||
switch (range_type) {
|
||||
case D3D12_DESCRIPTOR_RANGE_TYPE_CBV: {
|
||||
root_param.InitAsConstantBufferView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_STATIC_WHILE_SET_AT_EXECUTE, visibility);
|
||||
} break;
|
||||
case D3D12_DESCRIPTOR_RANGE_TYPE_SRV: {
|
||||
root_param.InitAsShaderResourceView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
|
||||
} break;
|
||||
case D3D12_DESCRIPTOR_RANGE_TYPE_UAV: {
|
||||
root_param.InitAsUnorderedAccessView(dxil_register, 0, D3D12_ROOT_DESCRIPTOR_FLAG_DATA_VOLATILE, visibility);
|
||||
} break;
|
||||
default: {
|
||||
DEV_ASSERT(false && "Unrecognized range type.");
|
||||
} break;
|
||||
}
|
||||
|
||||
uniform_d3d12.root_param_idx = root_params.size();
|
||||
root_params.push_back(root_param);
|
||||
} else {
|
||||
insert_range(
|
||||
range_type,
|
||||
num_descriptors,
|
||||
dxil_register,
|
||||
uniform.stages,
|
||||
uniform_d3d12.resource_descriptor_offset,
|
||||
reflection_binding_set_data_d3d12.ptrw()[i].resource_descriptor_count,
|
||||
first_resource_in_set,
|
||||
resource_tables_maps);
|
||||
}
|
||||
}
|
||||
|
||||
if (uniform_d3d12.has_sampler) {
|
||||
if (has_sampler) {
|
||||
insert_range(
|
||||
D3D12_DESCRIPTOR_RANGE_TYPE_SAMPLER,
|
||||
num_descriptors,
|
||||
dxil_register,
|
||||
uniform_d3d12.dxil_stages,
|
||||
&uniform_d3d12.root_signature_locations[RS_LOC_TYPE_SAMPLER],
|
||||
sampler_tables_maps,
|
||||
first_sampler_in_set);
|
||||
uniform.stages,
|
||||
uniform_d3d12.sampler_descriptor_offset,
|
||||
reflection_binding_set_data_d3d12.ptrw()[i].sampler_descriptor_count,
|
||||
first_sampler_in_set,
|
||||
sampler_tables_maps);
|
||||
}
|
||||
}
|
||||
|
||||
binding_start += uniform_count;
|
||||
}
|
||||
|
||||
auto make_descriptor_tables = [&root_params](const Vector<TraceableDescriptorTable> &p_tables) {
|
||||
for (const TraceableDescriptorTable &table : p_tables) {
|
||||
D3D12_SHADER_VISIBILITY visibility = stages_to_d3d12_visibility(table.stages_mask);
|
||||
DEV_ASSERT(table.ranges.size() == table.root_signature_locations.size());
|
||||
for (int i = 0; i < table.ranges.size(); i++) {
|
||||
// By now we know very well which root signature location corresponds to the pointed uniform.
|
||||
table.root_signature_locations[i]->root_param_index = root_params.size();
|
||||
table.root_signature_locations[i]->range_index = i;
|
||||
}
|
||||
for (const TraceableDescriptorTable &table : resource_tables_maps) {
|
||||
CD3DX12_ROOT_PARAMETER1 root_table = {};
|
||||
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
|
||||
reflection_binding_set_data_d3d12.ptrw()[table.set].resource_root_param_idx = root_params.size();
|
||||
root_params.push_back(root_table);
|
||||
}
|
||||
|
||||
CD3DX12_ROOT_PARAMETER1 root_table;
|
||||
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), visibility);
|
||||
root_params.push_back(root_table);
|
||||
}
|
||||
};
|
||||
|
||||
make_descriptor_tables(resource_tables_maps);
|
||||
make_descriptor_tables(sampler_tables_maps);
|
||||
for (const TraceableDescriptorTable &table : sampler_tables_maps) {
|
||||
CD3DX12_ROOT_PARAMETER1 root_table = {};
|
||||
root_table.InitAsDescriptorTable(table.ranges.size(), table.ranges.ptr(), stages_to_d3d12_visibility(table.stages_mask));
|
||||
reflection_binding_set_data_d3d12.ptrw()[table.set].sampler_root_param_idx = root_params.size();
|
||||
root_params.push_back(root_table);
|
||||
}
|
||||
|
||||
CD3DX12_VERSIONED_ROOT_SIGNATURE_DESC root_sig_desc = {};
|
||||
D3D12_ROOT_SIGNATURE_FLAGS root_sig_flags =
|
||||
|
|
@ -755,6 +813,7 @@ void RenderingShaderContainerD3D12::_nir_report_bitcode_bit_offset(uint64_t p_bi
|
|||
#endif
|
||||
|
||||
void RenderingShaderContainerD3D12::_set_from_shader_reflection_post(const ReflectShader &p_shader) {
|
||||
reflection_binding_set_data_d3d12.resize(reflection_binding_set_uniforms_count.size());
|
||||
reflection_binding_set_uniforms_data_d3d12.resize(reflection_binding_set_uniforms_data.size());
|
||||
reflection_specialization_data_d3d12.resize(reflection_specialization_data.size());
|
||||
|
||||
|
|
@ -841,6 +900,7 @@ RenderingShaderContainerD3D12::ShaderReflectionD3D12 RenderingShaderContainerD3D
|
|||
reflection.spirv_specialization_constants_ids_mask = reflection_data_d3d12.spirv_specialization_constants_ids_mask;
|
||||
reflection.dxil_push_constant_stages = reflection_data_d3d12.dxil_push_constant_stages;
|
||||
reflection.nir_runtime_data_root_param_idx = reflection_data_d3d12.nir_runtime_data_root_param_idx;
|
||||
reflection.reflection_binding_sets_d3d12 = reflection_binding_set_data_d3d12;
|
||||
reflection.reflection_specialization_data_d3d12 = reflection_specialization_data_d3d12;
|
||||
reflection.root_signature_bytes = root_signature_bytes;
|
||||
reflection.root_signature_crc = root_signature_crc;
|
||||
|
|
|
|||
|
|
@ -86,16 +86,20 @@ public:
|
|||
2, // SHADER_STAGE_COMPUTE
|
||||
};
|
||||
|
||||
struct RootSignatureLocation {
|
||||
uint32_t root_param_index = UINT32_MAX;
|
||||
uint32_t range_index = UINT32_MAX;
|
||||
struct ReflectionBindingSetDataD3D12 {
|
||||
uint32_t resource_root_param_idx = UINT32_MAX;
|
||||
uint32_t resource_descriptor_count = 0;
|
||||
uint32_t sampler_root_param_idx = UINT32_MAX;
|
||||
uint32_t sampler_descriptor_count = 0;
|
||||
};
|
||||
|
||||
struct ReflectionBindingDataD3D12 {
|
||||
uint32_t resource_class = 0;
|
||||
uint32_t has_sampler = 0;
|
||||
uint32_t dxil_stages = 0;
|
||||
RootSignatureLocation root_signature_locations[2];
|
||||
uint32_t resource_descriptor_offset = UINT32_MAX;
|
||||
uint32_t sampler_descriptor_offset = UINT32_MAX;
|
||||
uint32_t root_param_idx = UINT32_MAX; // Root descriptor only.
|
||||
};
|
||||
|
||||
struct ReflectionSpecializationDataD3D12 {
|
||||
|
|
@ -116,6 +120,7 @@ protected:
|
|||
|
||||
void *lib_d3d12 = nullptr;
|
||||
ReflectionDataD3D12 reflection_data_d3d12;
|
||||
Vector<ReflectionBindingSetDataD3D12> reflection_binding_set_data_d3d12;
|
||||
Vector<ReflectionBindingDataD3D12> reflection_binding_set_uniforms_data_d3d12;
|
||||
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
|
||||
Vector<uint8_t> root_signature_bytes;
|
||||
|
|
@ -154,6 +159,7 @@ public:
|
|||
uint32_t spirv_specialization_constants_ids_mask = 0;
|
||||
uint32_t dxil_push_constant_stages = 0;
|
||||
uint32_t nir_runtime_data_root_param_idx = 0;
|
||||
Vector<ReflectionBindingSetDataD3D12> reflection_binding_sets_d3d12;
|
||||
Vector<Vector<ReflectionBindingDataD3D12>> reflection_binding_set_uniforms_d3d12;
|
||||
Vector<ReflectionSpecializationDataD3D12> reflection_specialization_data_d3d12;
|
||||
Vector<uint8_t> root_signature_bytes;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue