mirror of
https://github.com/godotengine/godot.git
synced 2025-12-08 06:09:55 +00:00
Merge pull request #111183 from stuartcarnie/matias-uma-pc-pr
Add Persistent Buffers utilizing UMA
This commit is contained in:
commit
edbfb7a6ec
38 changed files with 2848 additions and 1466 deletions
384
servers/rendering/multi_uma_buffer.h
Normal file
384
servers/rendering/multi_uma_buffer.h
Normal file
|
|
@ -0,0 +1,384 @@
|
|||
/**************************************************************************/
|
||||
/* multi_uma_buffer.h */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "servers/rendering/rendering_server.h"
|
||||
|
||||
class MultiUmaBufferBase {
|
||||
protected:
|
||||
LocalVector<RID> buffers;
|
||||
uint32_t curr_idx = UINT32_MAX;
|
||||
uint64_t last_frame_mapped = UINT64_MAX;
|
||||
const uint32_t max_extra_buffers;
|
||||
#ifdef DEBUG_ENABLED
|
||||
const char *debug_name;
|
||||
#endif
|
||||
|
||||
MultiUmaBufferBase(uint32_t p_max_extra_buffers, const char *p_debug_name) :
|
||||
max_extra_buffers(p_max_extra_buffers)
|
||||
#ifdef DEBUG_ENABLED
|
||||
,
|
||||
debug_name(p_debug_name)
|
||||
#endif
|
||||
{
|
||||
}
|
||||
|
||||
#ifdef DEV_ENABLED
|
||||
~MultiUmaBufferBase() {
|
||||
DEV_ASSERT(buffers.is_empty() && "Forgot to call uninit()!");
|
||||
}
|
||||
#endif
|
||||
|
||||
public:
|
||||
void uninit() {
|
||||
if (is_print_verbose_enabled()) {
|
||||
print_line("MultiUmaBuffer '"
|
||||
#ifdef DEBUG_ENABLED
|
||||
+ String(debug_name) +
|
||||
#else
|
||||
"{DEBUG_ENABLED unavailable}"
|
||||
#endif
|
||||
"' used a total of " + itos(buffers.size()) +
|
||||
" buffers. A large number may indicate a waste of VRAM and can be brought down by tweaking MAX_EXTRA_BUFFERS for this buffer.");
|
||||
}
|
||||
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
|
||||
for (RID buffer : buffers) {
|
||||
if (buffer.is_valid()) {
|
||||
rd->free_rid(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
buffers.clear();
|
||||
}
|
||||
|
||||
void shrink_to_max_extra_buffers() {
|
||||
DEV_ASSERT(curr_idx == 0u && "This function can only be called after reset and before being upload_and_advance again!");
|
||||
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
|
||||
uint32_t elem_count = buffers.size();
|
||||
|
||||
if (elem_count > max_extra_buffers) {
|
||||
if (is_print_verbose_enabled()) {
|
||||
print_line("MultiUmaBuffer '"
|
||||
#ifdef DEBUG_ENABLED
|
||||
+ String(debug_name) +
|
||||
#else
|
||||
"{DEBUG_ENABLED unavailable}"
|
||||
#endif
|
||||
"' peaked to " + itos(elem_count) + " elements and shrinking it to " + itos(max_extra_buffers) +
|
||||
". If you see this message often, then something is wrong with rendering or MAX_EXTRA_BUFFERS needs to be increased.");
|
||||
}
|
||||
}
|
||||
|
||||
while (elem_count > max_extra_buffers) {
|
||||
--elem_count;
|
||||
if (buffers[elem_count].is_valid()) {
|
||||
rd->free_rid(buffers[elem_count]);
|
||||
}
|
||||
buffers.remove_at(elem_count);
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
/// Interface for making it easier to work with UMA.
|
||||
///
|
||||
/// # What is UMA?
|
||||
///
|
||||
/// It stands for Unified Memory Architecture. There are two kinds of UMA:
|
||||
/// 1. HW UMA. This is the case of iGPUs (specially Android, iOS, Apple ARM-based macOS, PS4 & PS5)
|
||||
/// The CPU and GPU share the same die and same memory. So regular RAM and VRAM are internally the
|
||||
/// same thing. There may be some differences between them in practice due to cache synchronization
|
||||
/// behaviors or the regular BW RAM may be purposely throttled (as is the case of PS4 & PS5).
|
||||
/// 2. "Pretended UMA". On PC Desktop GPUs with ReBAR enabled can pretend VRAM behaves like normal
|
||||
/// RAM, while internally the data is moved across the PCIe Bus. This can cause differences
|
||||
/// in execution time of the routines that write to GPU buffers as the region is often uncached
|
||||
/// (i.e. write-combined) and PCIe latency and BW is vastly different from regular RAM.
|
||||
/// Without ReBAR, the amount of UMA memory is limited to 256MB (shared by the entire system).
|
||||
///
|
||||
/// Since often this type of memory is uncached, it is not well-suited for downloading GPU -> CPU,
|
||||
/// but rather for uploading CPU -> GPU.
|
||||
///
|
||||
/// # When to use UMA buffers?
|
||||
///
|
||||
/// UMA buffers have various caveats and improper usage might lead to visual glitches. Therefore they
|
||||
/// should be used sparingly, where it makes a difference. Does all of the following check?:
|
||||
/// 1. Data is uploaded from CPU to GPU every (or almost every) frame.
|
||||
/// 2. Data is always uploaded from scratch. Partial uploads are unsupported.
|
||||
/// 3. If uploading multiple times per frame (e.g. for multiple passes). The amount of times
|
||||
/// per frame is relatively stable (occasional spikes are fine if using MAX_EXTRA_BUFFERS).
|
||||
///
|
||||
/// # Why the caveats?
|
||||
///
|
||||
/// This is due to our inability to detect race conditions. If you write to an UMA buffer, submit
|
||||
/// GPU commands and then write more data to it, we can't guarantee that you won't be writing to a
|
||||
/// region the GPU is currently reading from. Tools like the validation layers cannot detect this
|
||||
/// race condition at all, making it very hard to troubleshoot.
|
||||
///
|
||||
/// Therefore the safest approach is to use an interface that forces users to upload everything at once.
|
||||
/// There is one exception for performance: map_raw_for_upload() will return a pointer, and it is your
|
||||
/// responsibility to make sure you don't use that pointer again after submitting.
|
||||
/// USE THIS API CALL SPARINGLY AND WITH CARE.
|
||||
///
|
||||
/// Since we forbid uploading more data after we've uploaded to it, this Interface will create
|
||||
/// more buffers. This means users will need more UniformSets (i.e. uniform_set_create).
|
||||
///
|
||||
/// # How to use
|
||||
///
|
||||
/// Example code 01:
|
||||
/// MultiUmaBuffer<1> uma_buffer = MultiUmaBuffer<1>("Debug name displayed if run with --verbose");
|
||||
/// uma_buffer.set_size(0, max_size_bytes, false);
|
||||
///
|
||||
/// for(uint32_t i = 0u; i < num_passes; ++i) {
|
||||
/// uma_buffer.prepare_for_upload(); // Creates a new buffer (if none exists already)
|
||||
/// // of max_size_bytes. Must be called.
|
||||
/// uma_buffer.upload(0, src_data, size_bytes);
|
||||
///
|
||||
/// if(!uniform_set[i]) {
|
||||
/// RD::Uniform u;
|
||||
/// u.binding = 1;
|
||||
/// u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
/// u.append_id(uma_buffer._get(0u));
|
||||
/// uniform_set[i] = rd->uniform_set_create( ... );
|
||||
/// }
|
||||
/// }
|
||||
///
|
||||
/// // On shutdown (or if you need to call set_size again).
|
||||
/// uma_buffer.uninit();
|
||||
///
|
||||
/// Example code 02:
|
||||
///
|
||||
/// uma_buffer.prepare_for_upload();
|
||||
/// RID rid = uma_buffer.get_for_upload(0u);
|
||||
/// rd->buffer_update(rid, 0, sizeof(BakeParameters), &bake_parameters);
|
||||
/// RD::Uniform u; // Skipping full initialization of u. See Example 01.
|
||||
/// u.append_id(rid);
|
||||
///
|
||||
/// Example code 03:
|
||||
///
|
||||
/// void *dst_data = uma_buffer.map_raw_for_upload(0u);
|
||||
/// memcpy(dst_data, src_data, size_bytes);
|
||||
/// rd->buffer_flush(uma_buffer._get(0u));
|
||||
/// RD::Uniform u; // Skipping full initialization of u. See Example 01.
|
||||
/// u.append_id(rid);
|
||||
///
|
||||
/// # Tricks
|
||||
///
|
||||
/// Godot's shadow mapping code calls uma_buffer.uniform_buffers._get(-p_pass_offset) (i.e. a negative value)
|
||||
/// because for various reasons its shadow mapping code was written like this:
|
||||
///
|
||||
/// for( uint32_t i = 0u; i < num_passes; ++i ) {
|
||||
/// uma_buffer.prepare_for_upload();
|
||||
/// uma_buffer.upload(0, src_data, size_bytes);
|
||||
/// }
|
||||
/// for( uint32_t i = 0u; i < num_passes; ++i ) {
|
||||
/// RD::Uniform u;
|
||||
/// u.binding = 1;
|
||||
/// u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
/// u.append_id(uma_buffer._get(-(num_passes - 1u - i)));
|
||||
/// uniform_set[i] = rd->uniform_set_create( ... );
|
||||
/// }
|
||||
///
|
||||
/// Every time prepare_for_upload() is called, uma_buffer._get(-idx) will return a different RID(*).
|
||||
/// Thus with a negative value we can address previous ones. This is fine as long as the value idx
|
||||
/// doesn't exceed the number of times the user called prepare_for_upload() for this frame.
|
||||
///
|
||||
/// (*)This RID will be returned again on the next frame after the same amount of prepare_for_upload()
|
||||
/// calls; unless the number of times it was called exceeded MAX_EXTRA_BUFFERS.
|
||||
///
|
||||
/// # Template parameters
|
||||
///
|
||||
/// ## NUM_BUFFERS
|
||||
///
|
||||
/// How many buffers we should track. e.g. instead of doing this:
|
||||
/// MultiUmaBuffer<1> omni_lights = /*...*/;
|
||||
/// MultiUmaBuffer<1> spot_lights = /*...*/;
|
||||
/// MultiUmaBuffer<1> directional_lights = /*...*/;
|
||||
///
|
||||
/// omni_lights.set_size(0u, omni_size);
|
||||
/// spot_lights.set_size(0u, spot_size);
|
||||
/// directional_lights.set_size(0u, dir_size);
|
||||
///
|
||||
/// omni_lights.prepare_for_upload();
|
||||
/// spot_lights.prepare_for_upload();
|
||||
/// directional_lights.prepare_for_upload();
|
||||
///
|
||||
/// You can do this:
|
||||
///
|
||||
/// MultiUmaBuffer<3> lights = /*...*/;
|
||||
///
|
||||
/// lights.set_size(0u, omni_size);
|
||||
/// lights.set_size(1u, spot_size);
|
||||
/// lights.set_size(2u, dir_size);
|
||||
///
|
||||
/// lights.prepare_for_upload();
|
||||
///
|
||||
/// This approach works as long as all buffers would call prepare_for_upload() at the same time.
|
||||
/// It saves some overhead.
|
||||
///
|
||||
/// ## MAX_EXTRA_BUFFERS
|
||||
///
|
||||
/// Upper limit on the number of buffers per frame.
|
||||
///
|
||||
/// There are times where rendering might spike for exceptional reasons, calling prepare_for_upload()
|
||||
/// too many times, never to do that again. This will cause an increase in memory usage that will
|
||||
/// never be reclaimed until shutdown.
|
||||
///
|
||||
/// MAX_EXTRA_BUFFERS can be used to handle such spikes, by deallocating the extra buffers.
|
||||
/// Example:
|
||||
/// MultiUmaBuffer<1, 6> buffer;
|
||||
///
|
||||
/// // Normal frame (assuming up to 6 passes is considered normal):
|
||||
/// for(uint32_t i = 0u; i < 6u; ++i) {
|
||||
/// buffer.prepare_for_upload();
|
||||
/// ...
|
||||
/// buffer.upload(...);
|
||||
/// }
|
||||
///
|
||||
/// // Exceptional frame:
|
||||
/// for(uint32_t i = 0u; i < 24u; ++i) {
|
||||
/// buffer.prepare_for_upload();
|
||||
/// ...
|
||||
/// buffer.upload(...);
|
||||
/// }
|
||||
///
|
||||
/// After the frame is done, those extra 18 buffers will be deleted.
|
||||
/// Launching godot with --verbose will print diagnostic information.
|
||||
template <uint32_t NUM_BUFFERS, uint32_t MAX_EXTRA_BUFFERS = UINT32_MAX>
|
||||
class MultiUmaBuffer : public MultiUmaBufferBase {
|
||||
uint32_t buffer_sizes[NUM_BUFFERS] = {};
|
||||
#ifdef DEV_ENABLED
|
||||
bool can_upload[NUM_BUFFERS] = {};
|
||||
#endif
|
||||
|
||||
void push() {
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
for (uint32_t i = 0u; i < NUM_BUFFERS; ++i) {
|
||||
const bool is_storage = buffer_sizes[i] & 0x80000000u;
|
||||
const uint32_t size_bytes = buffer_sizes[i] & ~0x80000000u;
|
||||
RID buffer;
|
||||
if (is_storage) {
|
||||
buffer = rd->storage_buffer_create(size_bytes, Vector<uint8_t>(), 0, RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT);
|
||||
} else {
|
||||
buffer = rd->uniform_buffer_create(size_bytes, Vector<uint8_t>(), RD::BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT);
|
||||
}
|
||||
buffers.push_back(buffer);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
MultiUmaBuffer(const char *p_debug_name) :
|
||||
MultiUmaBufferBase(MAX_EXTRA_BUFFERS, p_debug_name) {}
|
||||
|
||||
uint32_t get_curr_idx() const { return curr_idx; }
|
||||
|
||||
void set_size(uint32_t p_idx, uint32_t p_size_bytes, bool p_is_storage) {
|
||||
DEV_ASSERT(buffers.is_empty());
|
||||
buffer_sizes[p_idx] = p_size_bytes | (p_is_storage ? 0x80000000u : 0u);
|
||||
curr_idx = UINT32_MAX;
|
||||
last_frame_mapped = UINT64_MAX;
|
||||
}
|
||||
|
||||
uint32_t get_size(uint32_t p_idx) const { return buffer_sizes[p_idx] & ~0x80000000u; }
|
||||
|
||||
// Gets the raw buffer. Use with care.
|
||||
// If you call this function, make sure to have called prepare_for_upload() first.
|
||||
// Do not call _get() then prepare_for_upload().
|
||||
RID _get(uint32_t p_idx) {
|
||||
return buffers[curr_idx * NUM_BUFFERS + p_idx];
|
||||
}
|
||||
|
||||
/**
|
||||
* @param p_append True if you wish to append more data to existing buffer.
|
||||
* @return True if it's possible to append. False if the internal buffer changed.
|
||||
*/
|
||||
bool prepare_for_map(bool p_append) {
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
const uint64_t frames_drawn = rd->get_frames_drawn();
|
||||
|
||||
if (last_frame_mapped == frames_drawn) {
|
||||
if (!p_append) {
|
||||
++curr_idx;
|
||||
}
|
||||
} else {
|
||||
p_append = false;
|
||||
curr_idx = 0u;
|
||||
if (max_extra_buffers != UINT32_MAX) {
|
||||
shrink_to_max_extra_buffers();
|
||||
}
|
||||
}
|
||||
last_frame_mapped = frames_drawn;
|
||||
if (curr_idx * NUM_BUFFERS >= buffers.size()) {
|
||||
push();
|
||||
}
|
||||
|
||||
#ifdef DEV_ENABLED
|
||||
if (!p_append) {
|
||||
for (size_t i = 0u; i < NUM_BUFFERS; ++i) {
|
||||
can_upload[i] = true;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return !p_append;
|
||||
}
|
||||
|
||||
void prepare_for_upload() {
|
||||
prepare_for_map(false);
|
||||
}
|
||||
|
||||
void *map_raw_for_upload(uint32_t p_idx) {
|
||||
#ifdef DEV_ENABLED
|
||||
DEV_ASSERT(can_upload[p_idx] && "Forgot to prepare_for_upload first! Or called get_for_upload/upload() twice.");
|
||||
can_upload[p_idx] = false;
|
||||
#endif
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
return rd->buffer_persistent_map_advance(buffers[curr_idx * NUM_BUFFERS + p_idx]);
|
||||
}
|
||||
|
||||
RID get_for_upload(uint32_t p_idx) {
|
||||
#ifdef DEV_ENABLED
|
||||
DEV_ASSERT(can_upload[p_idx] && "Forgot to prepare_for_upload first! Or called get_for_upload/upload() twice.");
|
||||
can_upload[p_idx] = false;
|
||||
#endif
|
||||
return buffers[curr_idx * NUM_BUFFERS + p_idx];
|
||||
}
|
||||
|
||||
void upload(uint32_t p_idx, const void *p_src_data, uint32_t p_size_bytes) {
|
||||
#ifdef DEV_ENABLED
|
||||
DEV_ASSERT(can_upload[p_idx] && "Forgot to prepare_for_upload first! Or called get_for_upload/upload() twice.");
|
||||
can_upload[p_idx] = false;
|
||||
#endif
|
||||
RenderingDevice *rd = RD::RenderingDevice::get_singleton();
|
||||
rd->buffer_update(buffers[curr_idx * NUM_BUFFERS + p_idx], 0, p_size_bytes, p_src_data, true);
|
||||
}
|
||||
};
|
||||
|
|
@ -767,26 +767,38 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat
|
|||
RD::get_singleton()->buffer_update(scene_state.implementation_uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo);
|
||||
}
|
||||
|
||||
void RenderForwardClustered::_update_instance_data_buffer(RenderListType p_render_list) {
|
||||
if (scene_state.instance_data[p_render_list].size() > 0) {
|
||||
if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) {
|
||||
if (scene_state.instance_buffer[p_render_list] != RID()) {
|
||||
RD::get_singleton()->free_rid(scene_state.instance_buffer[p_render_list]);
|
||||
}
|
||||
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size()));
|
||||
scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData));
|
||||
scene_state.instance_buffer_size[p_render_list] = new_size;
|
||||
void RenderForwardClustered::SceneState::grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append) {
|
||||
if (p_req_element_count > 0) {
|
||||
if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) {
|
||||
instance_buffer[p_render_list].uninit();
|
||||
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count));
|
||||
instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true);
|
||||
curr_gpu_ptr[p_render_list] = nullptr;
|
||||
}
|
||||
|
||||
const bool must_remap = instance_buffer[p_render_list].prepare_for_map(p_append);
|
||||
if (must_remap) {
|
||||
curr_gpu_ptr[p_render_list] = nullptr;
|
||||
}
|
||||
RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr());
|
||||
}
|
||||
}
|
||||
|
||||
void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, int *p_render_info, uint32_t p_offset, int32_t p_max_elements, bool p_update_buffer) {
|
||||
RenderList *rl = &render_list[p_render_list];
|
||||
uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size();
|
||||
|
||||
scene_state.instance_data[p_render_list].resize(p_offset + element_total);
|
||||
rl->element_info.resize(p_offset + element_total);
|
||||
|
||||
// If p_offset == 0, grow_instance_buffer resets and increment the buffer.
|
||||
// If this behavior ever changes, _render_shadow_begin may need to change.
|
||||
scene_state.grow_instance_buffer(p_render_list, p_offset + element_total, p_offset != 0u);
|
||||
if (!scene_state.curr_gpu_ptr[p_render_list] && element_total > 0u) {
|
||||
// The old buffer was replaced for another larger one. We must start copying from scratch.
|
||||
element_total += p_offset;
|
||||
p_offset = 0u;
|
||||
scene_state.curr_gpu_ptr[p_render_list] = reinterpret_cast<SceneState::InstanceData *>(scene_state.instance_buffer[p_render_list].map_raw_for_upload(0u));
|
||||
}
|
||||
|
||||
if (p_render_info) {
|
||||
p_render_info[RS::VIEWPORT_RENDER_INFO_OBJECTS_IN_FRAME] += element_total;
|
||||
}
|
||||
|
|
@ -797,7 +809,7 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
|
|||
GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset];
|
||||
GeometryInstanceForwardClustered *inst = surface->owner;
|
||||
|
||||
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
|
||||
SceneState::InstanceData instance_data;
|
||||
|
||||
if (likely(inst->store_transform_cache)) {
|
||||
RendererRD::MaterialStorage::store_transform_transposed_3x4(inst->transform, instance_data.transform);
|
||||
|
|
@ -836,7 +848,9 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
|
|||
instance_data.set_compressed_aabb(surface_aabb);
|
||||
instance_data.set_uv_scale(uv_scale);
|
||||
|
||||
bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid();
|
||||
scene_state.curr_gpu_ptr[p_render_list][i + p_offset] = instance_data;
|
||||
|
||||
const bool cant_repeat = instance_data.flags & INSTANCE_DATA_FLAG_MULTIMESH || inst->mesh_instance.is_valid();
|
||||
|
||||
if (prev_surface != nullptr && !cant_repeat && prev_surface->sort.sort_key1 == surface->sort.sort_key1 && prev_surface->sort.sort_key2 == surface->sort.sort_key2 && inst->mirror == prev_surface->owner->mirror && repeats < RenderElementInfo::MAX_REPEATS) {
|
||||
//this element is the same as the previous one, count repeats to draw it using instancing
|
||||
|
|
@ -870,8 +884,8 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i
|
|||
}
|
||||
}
|
||||
|
||||
if (p_update_buffer) {
|
||||
_update_instance_data_buffer(p_render_list);
|
||||
if (p_update_buffer && element_total > 0u) {
|
||||
RenderingDevice::get_singleton()->buffer_flush(scene_state.instance_buffer[p_render_list]._get(0u));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2722,7 +2736,8 @@ void RenderForwardClustered::_render_shadow_begin() {
|
|||
_update_render_base_uniform_set();
|
||||
|
||||
render_list[RENDER_LIST_SECONDARY].clear();
|
||||
scene_state.instance_data[RENDER_LIST_SECONDARY].clear();
|
||||
// No need to reset scene_state.curr_gpu_ptr or scene_state.instance_buffer[RENDER_LIST_SECONDARY]
|
||||
// because _fill_instance_data will do that if it detects p_offset == 0u.
|
||||
}
|
||||
|
||||
void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const PagedArray<RenderGeometryInstance *> &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_reverse_cull_face, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end, RenderingMethod::RenderInfo *p_render_info, const Size2i &p_viewport_size, const Transform3D &p_main_cam_transform) {
|
||||
|
|
@ -2797,7 +2812,11 @@ void RenderForwardClustered::_render_shadow_append(RID p_framebuffer, const Page
|
|||
}
|
||||
|
||||
void RenderForwardClustered::_render_shadow_process() {
|
||||
_update_instance_data_buffer(RENDER_LIST_SECONDARY);
|
||||
RenderingDevice *rd = RenderingDevice::get_singleton();
|
||||
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) > 0u) {
|
||||
rd->buffer_flush(scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u));
|
||||
}
|
||||
|
||||
//render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time)
|
||||
|
||||
for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) {
|
||||
|
|
@ -3258,11 +3277,14 @@ RID RenderForwardClustered::_setup_render_pass_uniform_set(RenderListType p_rend
|
|||
{
|
||||
RD::Uniform u;
|
||||
u.binding = 2;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
RID instance_buffer = scene_state.instance_buffer[p_render_list];
|
||||
if (instance_buffer == RID()) {
|
||||
instance_buffer = scene_shader.default_vec4_xform_buffer; // any buffer will do since its not used
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) {
|
||||
// Any buffer will do since it's not used, so just create one.
|
||||
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
|
||||
scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
|
||||
scene_state.instance_buffer[p_render_list].prepare_for_upload();
|
||||
}
|
||||
RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u);
|
||||
u.append_id(instance_buffer);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
|
|
@ -3624,11 +3646,14 @@ RID RenderForwardClustered::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_te
|
|||
{
|
||||
RD::Uniform u;
|
||||
u.binding = 2;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
RID instance_buffer = scene_state.instance_buffer[RENDER_LIST_SECONDARY];
|
||||
if (instance_buffer == RID()) {
|
||||
instance_buffer = scene_shader.default_vec4_xform_buffer; // any buffer will do since its not used
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) == 0u) {
|
||||
// Any buffer will do since it's not used, so just create one.
|
||||
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
|
||||
scene_state.instance_buffer[RENDER_LIST_SECONDARY].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
|
||||
scene_state.instance_buffer[RENDER_LIST_SECONDARY].prepare_for_upload();
|
||||
}
|
||||
RID instance_buffer = scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u);
|
||||
u.append_id(instance_buffer);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
|
|
@ -5125,9 +5150,7 @@ RenderForwardClustered::~RenderForwardClustered() {
|
|||
RD::get_singleton()->free_rid(scene_state.lightmap_buffer);
|
||||
RD::get_singleton()->free_rid(scene_state.lightmap_capture_buffer);
|
||||
for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) {
|
||||
if (scene_state.instance_buffer[i] != RID()) {
|
||||
RD::get_singleton()->free_rid(scene_state.instance_buffer[i]);
|
||||
}
|
||||
scene_state.instance_buffer[i].uninit();
|
||||
}
|
||||
memdelete_arr(scene_state.lightmap_captures);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/templates/paged_allocator.h"
|
||||
#include "servers/rendering/multi_uma_buffer.h"
|
||||
#include "servers/rendering/renderer_rd/cluster_builder_rd.h"
|
||||
#include "servers/rendering/renderer_rd/effects/fsr2.h"
|
||||
#ifdef METAL_ENABLED
|
||||
|
|
@ -398,9 +399,8 @@ private:
|
|||
uint32_t max_lightmaps;
|
||||
RID lightmap_buffer;
|
||||
|
||||
RID instance_buffer[RENDER_LIST_MAX];
|
||||
uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 };
|
||||
LocalVector<InstanceData> instance_data[RENDER_LIST_MAX];
|
||||
MultiUmaBuffer<1u> instance_buffer[RENDER_LIST_MAX] = { MultiUmaBuffer<1u>("RENDER_LIST_OPAQUE"), MultiUmaBuffer<1u>("RENDER_LIST_MOTION"), MultiUmaBuffer<1u>("RENDER_LIST_ALPHA"), MultiUmaBuffer<1u>("RENDER_LIST_SECONDARY") };
|
||||
InstanceData *curr_gpu_ptr[RENDER_LIST_MAX] = {};
|
||||
|
||||
LightmapCaptureData *lightmap_captures = nullptr;
|
||||
uint32_t max_lightmap_captures;
|
||||
|
|
@ -433,6 +433,7 @@ private:
|
|||
|
||||
LocalVector<ShadowPass> shadow_passes;
|
||||
|
||||
void grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append);
|
||||
} scene_state;
|
||||
|
||||
static RenderForwardClustered *singleton;
|
||||
|
|
@ -464,7 +465,6 @@ private:
|
|||
void _render_list(RenderingDevice::DrawListID p_draw_list, RenderingDevice::FramebufferFormatID p_framebuffer_Format, RenderListParameters *p_params, uint32_t p_from_element, uint32_t p_to_element);
|
||||
void _render_list_with_draw_list(RenderListParameters *p_params, RID p_framebuffer, BitField<RD::DrawFlags> p_draw_flags = RD::DRAW_DEFAULT_ALL, const Vector<Color> &p_clear_color_values = Vector<Color>(), float p_clear_depth_value = 0.0, uint32_t p_clear_stencil_value = 0, const Rect2 &p_region = Rect2());
|
||||
|
||||
void _update_instance_data_buffer(RenderListType p_render_list);
|
||||
void _fill_instance_data(RenderListType p_render_list, int *p_render_info = nullptr, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
|
||||
void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_using_sdfgi = false, bool p_using_opaque_gi = false, bool p_using_motion_pass = false, bool p_append = false);
|
||||
|
||||
|
|
|
|||
|
|
@ -667,7 +667,9 @@ void SceneShaderForwardClustered::init(const String p_defines) {
|
|||
shader_versions.push_back(ShaderRD::VariantDefine(group, version, false));
|
||||
}
|
||||
|
||||
shader.initialize(shader_versions, p_defines);
|
||||
Vector<uint64_t> dynamic_buffers;
|
||||
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardClustered::RENDER_PASS_UNIFORM_SET, 2));
|
||||
shader.initialize(shader_versions, p_defines, Vector<RD::PipelineImmutableSampler>(), dynamic_buffers);
|
||||
|
||||
if (RendererCompositorRD::get_singleton()->is_xr_enabled()) {
|
||||
shader.enable_group(SHADER_GROUP_MULTIVIEW);
|
||||
|
|
|
|||
|
|
@ -424,13 +424,10 @@ bool RenderForwardMobile::_render_buffers_can_be_storage() {
|
|||
return false;
|
||||
}
|
||||
|
||||
RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas, int p_index) {
|
||||
RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas, uint32_t p_pass_offset) {
|
||||
RendererRD::LightStorage *light_storage = RendererRD::LightStorage::get_singleton();
|
||||
RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton();
|
||||
|
||||
//there should always be enough uniform buffers for render passes, otherwise bugs
|
||||
ERR_FAIL_INDEX_V(p_index, (int)scene_state.uniform_buffers.size(), RID());
|
||||
|
||||
bool is_multiview = false;
|
||||
|
||||
Ref<RenderBufferDataForwardMobile> rb_data;
|
||||
|
|
@ -454,19 +451,26 @@ RID RenderForwardMobile::_setup_render_pass_uniform_set(RenderListType p_render_
|
|||
{
|
||||
RD::Uniform u;
|
||||
u.binding = 0;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER;
|
||||
u.append_id(scene_state.uniform_buffers[p_index]);
|
||||
u.uniform_type = RD::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
// Negative on purpose. We've created multiple uniform_buffers by calling prepare_for_upload()
|
||||
// many times in a row, now we must reference those.
|
||||
// We use 0u - p_pass_offset instead of -p_pass_offset to make MSVC warnings shut up.
|
||||
// See the "Tricks" section of MultiUmaBuffer documentation.
|
||||
u.append_id(scene_state.uniform_buffers._get(uint32_t(0u - p_pass_offset)));
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
|
||||
{
|
||||
RD::Uniform u;
|
||||
u.binding = 1;
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
RID instance_buffer = scene_state.instance_buffer[p_render_list];
|
||||
if (instance_buffer == RID()) {
|
||||
instance_buffer = scene_shader.default_vec4_xform_buffer; // Any buffer will do since its not used.
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
if (scene_state.instance_buffer[p_render_list].get_size(0u) == 0u) {
|
||||
// Any buffer will do since it's not used, so just create one.
|
||||
// We can't use scene_shader.default_vec4_xform_buffer because it's not dynamic.
|
||||
scene_state.instance_buffer[p_render_list].set_size(0u, INSTANCE_DATA_BUFFER_MIN_SIZE * sizeof(SceneState::InstanceData), true);
|
||||
scene_state.instance_buffer[p_render_list].prepare_for_upload();
|
||||
}
|
||||
RID instance_buffer = scene_state.instance_buffer[p_render_list]._get(0u);
|
||||
u.append_id(instance_buffer);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
|
|
@ -886,6 +890,7 @@ void RenderForwardMobile::_render_scene(RenderDataRD *p_render_data, const Color
|
|||
render_list[RENDER_LIST_OPAQUE].sort_by_key();
|
||||
}
|
||||
render_list[RENDER_LIST_ALPHA].sort_by_reverse_depth_and_priority();
|
||||
|
||||
_fill_instance_data(RENDER_LIST_OPAQUE);
|
||||
_fill_instance_data(RENDER_LIST_ALPHA);
|
||||
|
||||
|
|
@ -1507,12 +1512,9 @@ void RenderForwardMobile::_render_shadow_begin() {
|
|||
_update_render_base_uniform_set();
|
||||
|
||||
render_list[RENDER_LIST_SECONDARY].clear();
|
||||
scene_state.instance_data[RENDER_LIST_SECONDARY].clear();
|
||||
}
|
||||
|
||||
void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedArray<RenderGeometryInstance *> &p_instances, const Projection &p_projection, const Transform3D &p_transform, float p_zfar, float p_bias, float p_normal_bias, bool p_use_dp, bool p_use_dp_flip, bool p_use_pancake, float p_lod_distance_multiplier, float p_screen_mesh_lod_threshold, const Rect2i &p_rect, bool p_flip_y, bool p_clear_region, bool p_begin, bool p_end, RenderingMethod::RenderInfo *p_render_info, const Transform3D &p_main_cam_transform) {
|
||||
uint32_t shadow_pass_index = scene_state.shadow_passes.size();
|
||||
|
||||
SceneState::ShadowPass shadow_pass;
|
||||
|
||||
if (p_render_info) {
|
||||
|
|
@ -1539,7 +1541,7 @@ void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedAr
|
|||
render_data.instances = &p_instances;
|
||||
render_data.render_info = p_render_info;
|
||||
|
||||
_setup_environment(&render_data, true, Vector2(1, 1), Color(), false, p_use_pancake, shadow_pass_index);
|
||||
_setup_environment(&render_data, true, Vector2(1, 1), Color(), false, p_use_pancake);
|
||||
|
||||
if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_DISABLE_LOD) {
|
||||
scene_data.screen_mesh_lod_threshold = 0.0;
|
||||
|
|
@ -1580,13 +1582,17 @@ void RenderForwardMobile::_render_shadow_append(RID p_framebuffer, const PagedAr
|
|||
}
|
||||
|
||||
void RenderForwardMobile::_render_shadow_process() {
|
||||
_update_instance_data_buffer(RENDER_LIST_SECONDARY);
|
||||
RenderingDevice *rd = RenderingDevice::get_singleton();
|
||||
if (scene_state.instance_buffer[RENDER_LIST_SECONDARY].get_size(0u) > 0u) {
|
||||
rd->buffer_flush(scene_state.instance_buffer[RENDER_LIST_SECONDARY]._get(0u));
|
||||
}
|
||||
|
||||
//render shadows one after the other, so this can be done un-barriered and the driver can optimize (as well as allow us to run compute at the same time)
|
||||
|
||||
for (uint32_t i = 0; i < scene_state.shadow_passes.size(); i++) {
|
||||
//render passes need to be configured after instance buffer is done, since they need the latest version
|
||||
SceneState::ShadowPass &shadow_pass = scene_state.shadow_passes[i];
|
||||
shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, nullptr, RID(), RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(), false, i);
|
||||
shadow_pass.rp_uniform_set = _setup_render_pass_uniform_set(RENDER_LIST_SECONDARY, nullptr, RID(), RendererRD::MaterialStorage::get_singleton()->samplers_rd_get_default(), false, scene_state.shadow_passes.size() - 1u - i);
|
||||
}
|
||||
|
||||
RD::get_singleton()->draw_command_end_label();
|
||||
|
|
@ -1899,17 +1905,19 @@ RID RenderForwardMobile::_render_buffers_get_velocity_texture(Ref<RenderSceneBuf
|
|||
return RID();
|
||||
}
|
||||
|
||||
void RenderForwardMobile::_update_instance_data_buffer(RenderListType p_render_list) {
|
||||
if (scene_state.instance_data[p_render_list].size() > 0) {
|
||||
if (scene_state.instance_buffer[p_render_list] == RID() || scene_state.instance_buffer_size[p_render_list] < scene_state.instance_data[p_render_list].size()) {
|
||||
if (scene_state.instance_buffer[p_render_list] != RID()) {
|
||||
RD::get_singleton()->free_rid(scene_state.instance_buffer[p_render_list]);
|
||||
}
|
||||
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), scene_state.instance_data[p_render_list].size()));
|
||||
scene_state.instance_buffer[p_render_list] = RD::get_singleton()->storage_buffer_create(new_size * sizeof(SceneState::InstanceData));
|
||||
scene_state.instance_buffer_size[p_render_list] = new_size;
|
||||
void RenderForwardMobile::SceneState::grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append) {
|
||||
if (p_req_element_count > 0) {
|
||||
if (instance_buffer[p_render_list].get_size(0u) < p_req_element_count * sizeof(SceneState::InstanceData)) {
|
||||
instance_buffer[p_render_list].uninit();
|
||||
uint32_t new_size = nearest_power_of_2_templated(MAX(uint64_t(INSTANCE_DATA_BUFFER_MIN_SIZE), p_req_element_count));
|
||||
instance_buffer[p_render_list].set_size(0u, new_size * sizeof(SceneState::InstanceData), true);
|
||||
curr_gpu_ptr[p_render_list] = nullptr;
|
||||
}
|
||||
|
||||
const bool must_remap = instance_buffer[p_render_list].prepare_for_map(p_append);
|
||||
if (must_remap) {
|
||||
curr_gpu_ptr[p_render_list] = nullptr;
|
||||
}
|
||||
RD::get_singleton()->buffer_update(scene_state.instance_buffer[p_render_list], 0, sizeof(SceneState::InstanceData) * scene_state.instance_data[p_render_list].size(), scene_state.instance_data[p_render_list].ptr());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1917,16 +1925,22 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
|
|||
RenderList *rl = &render_list[p_render_list];
|
||||
uint32_t element_total = p_max_elements >= 0 ? uint32_t(p_max_elements) : rl->elements.size();
|
||||
|
||||
scene_state.instance_data[p_render_list].resize(p_offset + element_total);
|
||||
rl->element_info.resize(p_offset + element_total);
|
||||
|
||||
uint64_t frame = RSG::rasterizer->get_frame_number();
|
||||
|
||||
scene_state.grow_instance_buffer(p_render_list, p_offset + element_total, p_offset != 0u);
|
||||
if (!scene_state.curr_gpu_ptr[p_render_list] && element_total > 0u) {
|
||||
// The old buffer was replaced for another larger one. We must start copying from scratch.
|
||||
element_total += p_offset;
|
||||
p_offset = 0u;
|
||||
scene_state.curr_gpu_ptr[p_render_list] = reinterpret_cast<SceneState::InstanceData *>(scene_state.instance_buffer[p_render_list].map_raw_for_upload(0u));
|
||||
}
|
||||
for (uint32_t i = 0; i < element_total; i++) {
|
||||
GeometryInstanceSurfaceDataCache *surface = rl->elements[i + p_offset];
|
||||
GeometryInstanceForwardMobile *inst = surface->owner;
|
||||
|
||||
SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset];
|
||||
SceneState::InstanceData instance_data;
|
||||
|
||||
if (inst->prev_transform_dirty && frame > inst->prev_transform_change_frame + 1 && inst->prev_transform_change_frame) {
|
||||
inst->prev_transform = inst->transform;
|
||||
|
|
@ -1972,14 +1986,16 @@ void RenderForwardMobile::_fill_instance_data(RenderListType p_render_list, uint
|
|||
instance_data.set_compressed_aabb(surface_aabb);
|
||||
instance_data.set_uv_scale(uv_scale);
|
||||
|
||||
scene_state.curr_gpu_ptr[p_render_list][i + p_offset] = instance_data;
|
||||
|
||||
RenderElementInfo &element_info = rl->element_info[p_offset + i];
|
||||
|
||||
// Sets lod_index and uses_lightmap at once.
|
||||
element_info.value = uint32_t(surface->sort.sort_key1 & 0x1FF);
|
||||
}
|
||||
|
||||
if (p_update_buffer) {
|
||||
_update_instance_data_buffer(p_render_list);
|
||||
if (p_update_buffer && element_total > 0u) {
|
||||
RenderingDevice::get_singleton()->buffer_flush(scene_state.instance_buffer[p_render_list]._get(0u));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2182,22 +2198,20 @@ void RenderForwardMobile::_fill_render_list(RenderListType p_render_list, const
|
|||
}
|
||||
}
|
||||
|
||||
void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_pancake_shadows, int p_index) {
|
||||
void RenderForwardMobile::_setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers, bool p_pancake_shadows) {
|
||||
RID env = is_environment(p_render_data->environment) ? p_render_data->environment : RID();
|
||||
RID reflection_probe_instance = p_render_data->reflection_probe.is_valid() ? RendererRD::LightStorage::get_singleton()->reflection_probe_instance_get_probe(p_render_data->reflection_probe) : RID();
|
||||
|
||||
// May do this earlier in RenderSceneRenderRD::render_scene
|
||||
if (p_index >= (int)scene_state.uniform_buffers.size()) {
|
||||
uint32_t from = scene_state.uniform_buffers.size();
|
||||
scene_state.uniform_buffers.resize(p_index + 1);
|
||||
for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) {
|
||||
scene_state.uniform_buffers[i] = p_render_data->scene_data->create_uniform_buffer();
|
||||
}
|
||||
if (scene_state.uniform_buffers.get_size(0u) == 0u) {
|
||||
scene_state.uniform_buffers.set_size(0u, p_render_data->scene_data->get_uniform_buffer_size_bytes(), false);
|
||||
}
|
||||
|
||||
float luminance_multiplier = p_render_data->render_buffers.is_valid() ? p_render_data->render_buffers->get_luminance_multiplier() : 1.0;
|
||||
|
||||
p_render_data->scene_data->update_ubo(scene_state.uniform_buffers[p_index], get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_pancake_shadows, p_screen_size, p_default_bg_color, luminance_multiplier, p_opaque_render_buffers, false);
|
||||
// Start a new setup.
|
||||
scene_state.uniform_buffers.prepare_for_upload();
|
||||
p_render_data->scene_data->update_ubo(scene_state.uniform_buffers.get_for_upload(0u), get_debug_draw_mode(), env, reflection_probe_instance, p_render_data->camera_attributes, p_pancake_shadows, p_screen_size, p_default_bg_color, luminance_multiplier, p_opaque_render_buffers, false);
|
||||
}
|
||||
|
||||
/// RENDERING ///
|
||||
|
|
@ -3395,13 +3409,9 @@ RenderForwardMobile::~RenderForwardMobile() {
|
|||
RSG::light_storage->directional_shadow_atlas_set_size(0);
|
||||
|
||||
{
|
||||
for (const RID &rid : scene_state.uniform_buffers) {
|
||||
RD::get_singleton()->free_rid(rid);
|
||||
}
|
||||
scene_state.uniform_buffers.uninit();
|
||||
for (uint32_t i = 0; i < RENDER_LIST_MAX; i++) {
|
||||
if (scene_state.instance_buffer[i].is_valid()) {
|
||||
RD::get_singleton()->free_rid(scene_state.instance_buffer[i]);
|
||||
}
|
||||
scene_state.instance_buffer[i].uninit();
|
||||
}
|
||||
RD::get_singleton()->free_rid(scene_state.lightmap_buffer);
|
||||
RD::get_singleton()->free_rid(scene_state.lightmap_capture_buffer);
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/templates/paged_allocator.h"
|
||||
#include "servers/rendering/multi_uma_buffer.h"
|
||||
#include "servers/rendering/renderer_rd/forward_mobile/scene_shader_forward_mobile.h"
|
||||
#include "servers/rendering/renderer_rd/renderer_scene_render_rd.h"
|
||||
|
||||
|
|
@ -161,18 +162,17 @@ private:
|
|||
|
||||
/* Render Scene */
|
||||
|
||||
RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, int p_index = 0);
|
||||
RID _setup_render_pass_uniform_set(RenderListType p_render_list, const RenderDataRD *p_render_data, RID p_radiance_texture, const RendererRD::MaterialStorage::Samplers &p_samplers, bool p_use_directional_shadow_atlas = false, uint32_t p_pass_offset = 0u);
|
||||
void _pre_opaque_render(RenderDataRD *p_render_data);
|
||||
|
||||
uint64_t lightmap_texture_array_version = 0xFFFFFFFF;
|
||||
|
||||
void _update_render_base_uniform_set();
|
||||
|
||||
void _update_instance_data_buffer(RenderListType p_render_list);
|
||||
void _fill_instance_data(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1, bool p_update_buffer = true);
|
||||
void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_append = false);
|
||||
|
||||
void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false, int p_index = 0);
|
||||
void _setup_environment(const RenderDataRD *p_render_data, bool p_no_fog, const Size2i &p_screen_size, const Color &p_default_bg_color, bool p_opaque_render_buffers = false, bool p_pancake_shadows = false);
|
||||
void _setup_lightmaps(const RenderDataRD *p_render_data, const PagedArray<RID> &p_lightmaps, const Transform3D &p_cam_transform);
|
||||
|
||||
RID render_base_uniform_set;
|
||||
|
|
@ -193,7 +193,7 @@ private:
|
|||
/* Scene state */
|
||||
|
||||
struct SceneState {
|
||||
LocalVector<RID> uniform_buffers;
|
||||
MultiUmaBuffer<1u> uniform_buffers = MultiUmaBuffer<1u>("SceneState::uniform_buffers");
|
||||
|
||||
struct PushConstantUbershader {
|
||||
SceneShaderForwardMobile::ShaderSpecialization specialization;
|
||||
|
|
@ -274,9 +274,8 @@ private:
|
|||
static_assert(std::is_trivially_destructible_v<InstanceData>);
|
||||
static_assert(std::is_trivially_constructible_v<InstanceData>);
|
||||
|
||||
RID instance_buffer[RENDER_LIST_MAX];
|
||||
uint32_t instance_buffer_size[RENDER_LIST_MAX] = { 0, 0, 0 };
|
||||
LocalVector<InstanceData> instance_data[RENDER_LIST_MAX];
|
||||
MultiUmaBuffer<1u> instance_buffer[RENDER_LIST_MAX] = { MultiUmaBuffer<1u>("RENDER_LIST_OPAQUE"), MultiUmaBuffer<1u>("RENDER_LIST_ALPHA"), MultiUmaBuffer<1u>("RENDER_LIST_SECONDARY") };
|
||||
InstanceData *curr_gpu_ptr[RENDER_LIST_MAX] = {};
|
||||
|
||||
// !BAS! We need to change lightmaps, we're not going to do this with a buffer but pushing the used lightmap in
|
||||
LightmapData lightmaps[MAX_LIGHTMAPS];
|
||||
|
|
@ -311,6 +310,8 @@ private:
|
|||
};
|
||||
|
||||
LocalVector<ShadowPass> shadow_passes;
|
||||
|
||||
void grow_instance_buffer(RenderListType p_render_list, uint32_t p_req_element_count, bool p_append);
|
||||
} scene_state;
|
||||
|
||||
/* Render List */
|
||||
|
|
|
|||
|
|
@ -601,7 +601,10 @@ void SceneShaderForwardMobile::init(const String p_defines) {
|
|||
immutable_shadow_sampler.append_id(shadow_sampler);
|
||||
immutable_shadow_sampler.uniform_type = RenderingDeviceCommons::UNIFORM_TYPE_SAMPLER;
|
||||
immutable_samplers.push_back(immutable_shadow_sampler);
|
||||
shader.initialize(shader_versions, p_defines, immutable_samplers);
|
||||
Vector<uint64_t> dynamic_buffers;
|
||||
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardMobile::RENDER_PASS_UNIFORM_SET, 0));
|
||||
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(RenderForwardMobile::RENDER_PASS_UNIFORM_SET, 1));
|
||||
shader.initialize(shader_versions, p_defines, immutable_samplers, dynamic_buffers);
|
||||
|
||||
if (RendererCompositorRD::get_singleton()->is_xr_enabled()) {
|
||||
enable_multiview_shader_group();
|
||||
|
|
|
|||
|
|
@ -748,8 +748,6 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
|
|||
Item *canvas_group_owner = nullptr;
|
||||
bool skip_item = false;
|
||||
|
||||
state.last_instance_index = 0;
|
||||
|
||||
bool update_skeletons = false;
|
||||
bool time_used = false;
|
||||
|
||||
|
|
@ -916,8 +914,13 @@ void RendererCanvasRenderRD::canvas_render_items(RID p_to_render_target, Item *p
|
|||
}
|
||||
|
||||
texture_info_map.clear();
|
||||
state.current_data_buffer_index = (state.current_data_buffer_index + 1) % BATCH_DATA_BUFFER_COUNT;
|
||||
state.current_instance_buffer_index = 0;
|
||||
state.instance_data = nullptr;
|
||||
if (state.instance_data_index > 0) {
|
||||
// If there was any remaining instance data, it must be flushed.
|
||||
RID buf = state.instance_buffers._get(0);
|
||||
RD::get_singleton()->buffer_flush(buf);
|
||||
state.instance_data_index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
RID RendererCanvasRenderRD::light_create() {
|
||||
|
|
@ -1747,7 +1750,10 @@ RendererCanvasRenderRD::RendererCanvasRenderRD() {
|
|||
variants.push_back(base_define + "#define USE_ATTRIBUTES\n#define USE_POINT_SIZE\n"); // SHADER_VARIANT_ATTRIBUTES_POINTS
|
||||
}
|
||||
|
||||
shader.canvas_shader.initialize(variants, global_defines);
|
||||
Vector<uint64_t> dynamic_buffers;
|
||||
dynamic_buffers.push_back(ShaderRD::DynamicBuffer::encode(BATCH_UNIFORM_SET, 4));
|
||||
|
||||
shader.canvas_shader.initialize(variants, global_defines, {}, dynamic_buffers);
|
||||
|
||||
shader.default_version_data = memnew(CanvasShaderData);
|
||||
shader.default_version_data->version = shader.canvas_shader.version_create();
|
||||
|
|
@ -2058,12 +2064,7 @@ void fragment() {
|
|||
state.max_instances_per_buffer = uint32_t(GLOBAL_GET("rendering/2d/batching/item_buffer_size"));
|
||||
state.max_instance_buffer_size = state.max_instances_per_buffer * sizeof(InstanceData);
|
||||
state.canvas_instance_batches.reserve(200);
|
||||
|
||||
for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
|
||||
DataBuffer &db = state.canvas_instance_data_buffers[i];
|
||||
db.instance_buffers.push_back(RD::get_singleton()->storage_buffer_create(state.max_instance_buffer_size));
|
||||
}
|
||||
state.instance_data_array = memnew_arr(InstanceData, state.max_instances_per_buffer);
|
||||
state.instance_buffers.set_size(0, state.max_instance_buffer_size, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -2122,7 +2123,6 @@ uint32_t RendererCanvasRenderRD::get_pipeline_compilations(RS::PipelineSource p_
|
|||
|
||||
void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer, RenderingMethod::RenderInfo *r_render_info) {
|
||||
// Record batches
|
||||
uint32_t instance_index = 0;
|
||||
{
|
||||
RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton();
|
||||
Item *current_clip = nullptr;
|
||||
|
|
@ -2132,7 +2132,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
|
|||
bool batch_broken = false;
|
||||
Batch *current_batch = _new_batch(batch_broken);
|
||||
// Override the start position and index as we want to start from where we finished off last time.
|
||||
current_batch->start = state.last_instance_index;
|
||||
current_batch->start = state.instance_data_index;
|
||||
|
||||
for (int i = 0; i < p_item_count; i++) {
|
||||
Item *ci = items[i];
|
||||
|
|
@ -2173,7 +2173,7 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
|
|||
|
||||
if (ci->repeat_source_item == nullptr || ci->repeat_size == Vector2()) {
|
||||
Transform2D base_transform = p_canvas_transform_inverse * ci->final_transform;
|
||||
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, instance_index, batch_broken, r_sdf_used, current_batch);
|
||||
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, batch_broken, r_sdf_used, current_batch);
|
||||
} else {
|
||||
Point2 start_pos = ci->repeat_size * -(ci->repeat_times / 2);
|
||||
Point2 offset;
|
||||
|
|
@ -2186,20 +2186,11 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
|
|||
Transform2D base_transform = ci->final_transform;
|
||||
base_transform.columns[2] += ci->repeat_source_item->final_transform.basis_xform(offset);
|
||||
base_transform = p_canvas_transform_inverse * base_transform;
|
||||
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, instance_index, batch_broken, r_sdf_used, current_batch);
|
||||
_record_item_commands(ci, p_to_render_target, base_transform, current_clip, p_lights, batch_broken, r_sdf_used, current_batch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Copy over remaining data needed for rendering.
|
||||
if (instance_index > 0) {
|
||||
RD::get_singleton()->buffer_update(
|
||||
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[state.current_instance_buffer_index],
|
||||
state.last_instance_index * sizeof(InstanceData),
|
||||
instance_index * sizeof(InstanceData),
|
||||
state.instance_data_array);
|
||||
}
|
||||
}
|
||||
|
||||
if (state.canvas_instance_batches.is_empty()) {
|
||||
|
|
@ -2284,63 +2275,28 @@ void RendererCanvasRenderRD::_render_batch_items(RenderTarget p_to_render_target
|
|||
|
||||
state.current_batch_index = 0;
|
||||
state.canvas_instance_batches.clear();
|
||||
state.last_instance_index += instance_index;
|
||||
}
|
||||
|
||||
RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, uint32_t p_uniforms_ofs, TextureInfo *p_info) {
|
||||
InstanceData *instance_data = &state.instance_data_array[p_index];
|
||||
// Zero out most fields.
|
||||
for (int i = 0; i < 4; i++) {
|
||||
instance_data->modulation[i] = 0.0;
|
||||
instance_data->ninepatch_margins[i] = 0.0;
|
||||
instance_data->src_rect[i] = 0.0;
|
||||
instance_data->dst_rect[i] = 0.0;
|
||||
}
|
||||
|
||||
instance_data->pad[0] = 0.0;
|
||||
instance_data->pad[1] = 0.0;
|
||||
|
||||
instance_data->lights[0] = p_lights[0];
|
||||
instance_data->lights[1] = p_lights[1];
|
||||
instance_data->lights[2] = p_lights[2];
|
||||
instance_data->lights[3] = p_lights[3];
|
||||
|
||||
for (int i = 0; i < 6; i++) {
|
||||
instance_data->world[i] = p_world[i];
|
||||
}
|
||||
|
||||
instance_data->flags = p_base_flags; // Reset on each command for safety.
|
||||
|
||||
instance_data->color_texture_pixel_size[0] = p_info->texpixel_size.width;
|
||||
instance_data->color_texture_pixel_size[1] = p_info->texpixel_size.height;
|
||||
|
||||
instance_data->instance_uniforms_ofs = p_uniforms_ofs;
|
||||
|
||||
return instance_data;
|
||||
}
|
||||
|
||||
void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch) {
|
||||
void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch) {
|
||||
const RenderingServer::CanvasItemTextureFilter texture_filter = p_item->texture_filter == RS::CANVAS_ITEM_TEXTURE_FILTER_DEFAULT ? default_filter : p_item->texture_filter;
|
||||
const RenderingServer::CanvasItemTextureRepeat texture_repeat = p_item->texture_repeat == RS::CANVAS_ITEM_TEXTURE_REPEAT_DEFAULT ? default_repeat : p_item->texture_repeat;
|
||||
|
||||
Transform2D base_transform = p_base_transform;
|
||||
|
||||
float world[6];
|
||||
InstanceData template_instance;
|
||||
memset(&template_instance, 0, sizeof(InstanceData));
|
||||
|
||||
Transform2D draw_transform; // Used by transform command
|
||||
_update_transform_2d_to_mat2x3(base_transform, world);
|
||||
_update_transform_2d_to_mat2x3(base_transform, template_instance.world);
|
||||
|
||||
Color base_color = p_item->final_modulate;
|
||||
bool use_linear_colors = p_render_target.use_linear_colors;
|
||||
uint32_t base_flags = 0;
|
||||
uint32_t uniforms_ofs = static_cast<uint32_t>(p_item->instance_allocated_shader_uniforms_offset);
|
||||
template_instance.instance_uniforms_ofs = static_cast<uint32_t>(p_item->instance_allocated_shader_uniforms_offset);
|
||||
|
||||
bool reclip = false;
|
||||
|
||||
bool skipping = false;
|
||||
|
||||
// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
|
||||
uint32_t lights[4] = { 0, 0, 0, 0 };
|
||||
|
||||
uint16_t light_count = 0;
|
||||
uint16_t shadow_mask = 0;
|
||||
|
||||
|
|
@ -2350,7 +2306,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
while (light) {
|
||||
if (light->render_index_cache >= 0 && p_item->light_mask & light->item_mask && p_item->z_final >= light->z_min && p_item->z_final <= light->z_max && p_item->global_rect_cache.intersects(light->rect_cache)) {
|
||||
uint32_t light_index = light->render_index_cache;
|
||||
lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
|
||||
// TODO: consider making lights a per-batch property and then baking light operations in the shader for better performance.
|
||||
template_instance.lights[light_count >> 2] |= light_index << ((light_count & 3) * 8);
|
||||
|
||||
if (p_item->light_mask & light->item_shadow_mask) {
|
||||
shadow_mask |= 1 << light_count;
|
||||
|
|
@ -2365,8 +2322,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
light = light->next_ptr;
|
||||
}
|
||||
|
||||
base_flags |= light_count << INSTANCE_FLAGS_LIGHT_COUNT_SHIFT;
|
||||
base_flags |= shadow_mask << INSTANCE_FLAGS_SHADOW_MASKED_SHIFT;
|
||||
template_instance.flags |= light_count << INSTANCE_FLAGS_LIGHT_COUNT_SHIFT;
|
||||
template_instance.flags |= shadow_mask << INSTANCE_FLAGS_SHADOW_MASKED_SHIFT;
|
||||
}
|
||||
|
||||
bool use_lighting = (light_count > 0 || using_directional_lights);
|
||||
|
|
@ -2430,9 +2387,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
if (r_current_batch->tex_info != tex_info) {
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->tex_info = tex_info;
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
}
|
||||
|
||||
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
InstanceData *instance_data = new_instance_data(template_instance);
|
||||
Rect2 src_rect;
|
||||
Rect2 dst_rect;
|
||||
|
||||
|
|
@ -2505,7 +2464,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->dst_rect[2] = dst_rect.size.width;
|
||||
instance_data->dst_rect[3] = dst_rect.size.height;
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
} break;
|
||||
|
||||
case Item::Command::TYPE_NINEPATCH: {
|
||||
|
|
@ -2531,9 +2490,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
if (r_current_batch->tex_info != tex_info) {
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->tex_info = tex_info;
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
}
|
||||
|
||||
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
InstanceData *instance_data = new_instance_data(template_instance);
|
||||
|
||||
Rect2 src_rect;
|
||||
Rect2 dst_rect(np->rect.position.x, np->rect.position.y, np->rect.size.x, np->rect.size.y);
|
||||
|
|
@ -2582,7 +2543,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->ninepatch_margins[2] = np->margin[SIDE_RIGHT];
|
||||
instance_data->ninepatch_margins[3] = np->margin[SIDE_BOTTOM];
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
} break;
|
||||
|
||||
case Item::Command::TYPE_POLYGON: {
|
||||
|
|
@ -2606,6 +2567,8 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
if (r_current_batch->tex_info != tex_info) {
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->tex_info = tex_info;
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
}
|
||||
|
||||
// pipeline variant
|
||||
|
|
@ -2615,7 +2578,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
r_current_batch->render_primitive = _primitive_type_to_render_primitive(polygon->primitive);
|
||||
}
|
||||
|
||||
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
InstanceData *instance_data = new_instance_data(template_instance);
|
||||
|
||||
Color color = base_color;
|
||||
if (use_linear_colors) {
|
||||
|
|
@ -2627,7 +2590,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->modulation[2] = color.b;
|
||||
instance_data->modulation[3] = color.a;
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
} break;
|
||||
|
||||
case Item::Command::TYPE_PRIMITIVE: {
|
||||
|
|
@ -2673,9 +2636,11 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
if (r_current_batch->tex_info != tex_info) {
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->tex_info = tex_info;
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
}
|
||||
|
||||
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
InstanceData *instance_data = new_instance_data(template_instance);
|
||||
|
||||
for (uint32_t j = 0; j < MIN(3u, primitive->point_count); j++) {
|
||||
instance_data->points[j * 2 + 0] = primitive->points[j].x;
|
||||
|
|
@ -2690,10 +2655,10 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b);
|
||||
}
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
|
||||
if (primitive->point_count == 4) {
|
||||
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
instance_data = new_instance_data(template_instance);
|
||||
|
||||
for (uint32_t j = 0; j < 3; j++) {
|
||||
int offset = j == 0 ? 0 : 1;
|
||||
|
|
@ -2710,7 +2675,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->colors[j * 2 + 1] = (uint32_t(Math::make_half_float(col.a)) << 16) | Math::make_half_float(col.b);
|
||||
}
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
}
|
||||
} break;
|
||||
|
||||
|
|
@ -2736,7 +2701,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
_prepare_batch_texture_info(m->texture, tex_state, tex_info);
|
||||
}
|
||||
r_current_batch->tex_info = tex_info;
|
||||
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
instance_data = new_instance_data(template_instance);
|
||||
|
||||
r_current_batch->mesh_instance_count = 1;
|
||||
_update_transform_2d_to_mat2x3(base_transform * draw_transform * m->transform, instance_data->world);
|
||||
|
|
@ -2763,7 +2730,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
_prepare_batch_texture_info(mm->texture, tex_state, tex_info);
|
||||
}
|
||||
r_current_batch->tex_info = tex_info;
|
||||
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
instance_data = new_instance_data(template_instance);
|
||||
|
||||
r_current_batch->flags |= 1; // multimesh, trails disabled
|
||||
|
||||
|
|
@ -2785,7 +2754,9 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
_prepare_batch_texture_info(pt->texture, tex_state, tex_info);
|
||||
}
|
||||
r_current_batch->tex_info = tex_info;
|
||||
instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
instance_data = new_instance_data(template_instance);
|
||||
|
||||
uint32_t divisor = 1;
|
||||
r_current_batch->mesh_instance_count = particles_storage->particles_get_amount(pt->particles, divisor);
|
||||
|
|
@ -2828,13 +2799,13 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->modulation[2] = modulated.b;
|
||||
instance_data->modulation[3] = modulated.a;
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
} break;
|
||||
|
||||
case Item::Command::TYPE_TRANSFORM: {
|
||||
const Item::CommandTransform *transform = static_cast<const Item::CommandTransform *>(c);
|
||||
draw_transform = transform->xform;
|
||||
_update_transform_2d_to_mat2x3(base_transform * transform->xform, world);
|
||||
_update_transform_2d_to_mat2x3(base_transform * transform->xform, template_instance.world);
|
||||
} break;
|
||||
|
||||
case Item::Command::TYPE_CLIP_IGNORE: {
|
||||
|
|
@ -2906,10 +2877,12 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
if (r_current_batch->tex_info != tex_info) {
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->tex_info = tex_info;
|
||||
template_instance.color_texture_pixel_size[0] = tex_info->texpixel_size.width;
|
||||
template_instance.color_texture_pixel_size[1] = tex_info->texpixel_size.height;
|
||||
}
|
||||
|
||||
_update_transform_2d_to_mat2x3(base_transform, world);
|
||||
InstanceData *instance_data = new_instance_data(world, lights, base_flags, r_index, uniforms_ofs, tex_info);
|
||||
_update_transform_2d_to_mat2x3(base_transform, template_instance.world);
|
||||
InstanceData *instance_data = new_instance_data(template_instance);
|
||||
|
||||
Rect2 src_rect;
|
||||
Rect2 dst_rect;
|
||||
|
|
@ -2941,7 +2914,7 @@ void RendererCanvasRenderRD::_record_item_commands(const Item *p_item, RenderTar
|
|||
instance_data->dst_rect[2] = dst_rect.size.width;
|
||||
instance_data->dst_rect[3] = dst_rect.size.height;
|
||||
|
||||
_add_to_batch(r_index, r_batch_broken, r_current_batch);
|
||||
_add_to_batch(r_batch_broken, r_current_batch);
|
||||
|
||||
p_item->debug_redraw_time -= RSG::rasterizer->get_frame_delta_time();
|
||||
|
||||
|
|
@ -2984,9 +2957,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
|
|||
{
|
||||
RendererRD::TextureStorage *ts = RendererRD::TextureStorage::get_singleton();
|
||||
|
||||
RIDSetKey key(
|
||||
p_batch->tex_info->state,
|
||||
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
|
||||
RIDSetKey key(p_batch->tex_info->state, p_batch->instance_buffer);
|
||||
|
||||
const RID *uniform_set = rid_set_to_uniform_set.getptr(key);
|
||||
if (uniform_set == nullptr) {
|
||||
|
|
@ -2995,7 +2966,7 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
|
|||
uniform_ptrw[1] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 1, p_batch->tex_info->normal);
|
||||
uniform_ptrw[2] = RD::Uniform(RD::UNIFORM_TYPE_TEXTURE, 2, p_batch->tex_info->specular);
|
||||
uniform_ptrw[3] = RD::Uniform(RD::UNIFORM_TYPE_SAMPLER, 3, p_batch->tex_info->sampler);
|
||||
uniform_ptrw[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER, 4, state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[p_batch->instance_buffer_index]);
|
||||
uniform_ptrw[4] = RD::Uniform(RD::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC, 4, p_batch->instance_buffer);
|
||||
|
||||
RID rid = RD::get_singleton()->uniform_set_create(state.batch_texture_uniforms, shader.default_version_rd_shader, BATCH_UNIFORM_SET);
|
||||
ERR_FAIL_COND_MSG(rid.is_null(), "Failed to create uniform set for batch.");
|
||||
|
|
@ -3194,10 +3165,24 @@ void RendererCanvasRenderRD::_render_batch(RD::DrawListID p_draw_list, CanvasSha
|
|||
}
|
||||
}
|
||||
|
||||
RendererCanvasRenderRD::InstanceData *RendererCanvasRenderRD::new_instance_data(const InstanceData &template_instance) {
|
||||
DEV_ASSERT(state.instance_data != nullptr);
|
||||
|
||||
InstanceData *instance_data = &state.instance_data[state.instance_data_index];
|
||||
memcpy(instance_data, &template_instance, sizeof(InstanceData));
|
||||
return instance_data;
|
||||
}
|
||||
|
||||
RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_broken) {
|
||||
if (state.canvas_instance_batches.is_empty()) {
|
||||
Batch new_batch;
|
||||
new_batch.instance_buffer_index = state.current_instance_buffer_index;
|
||||
// This will still be a valid point when multiple calls to _render_batch_items
|
||||
// are made in the same draw call.
|
||||
if (state.instance_data == nullptr) {
|
||||
// If there is no existing instance buffer, we must allocate a new one.
|
||||
_allocate_instance_buffer();
|
||||
}
|
||||
new_batch.instance_buffer = state.instance_buffers._get(0);
|
||||
state.canvas_instance_batches.push_back(new_batch);
|
||||
return state.canvas_instance_batches.ptr();
|
||||
}
|
||||
|
|
@ -3212,43 +3197,30 @@ RendererCanvasRenderRD::Batch *RendererCanvasRenderRD::_new_batch(bool &r_batch_
|
|||
Batch new_batch = state.canvas_instance_batches[state.current_batch_index];
|
||||
new_batch.instance_count = 0;
|
||||
new_batch.start = state.canvas_instance_batches[state.current_batch_index].start + state.canvas_instance_batches[state.current_batch_index].instance_count;
|
||||
new_batch.instance_buffer_index = state.current_instance_buffer_index;
|
||||
state.current_batch_index++;
|
||||
state.canvas_instance_batches.push_back(new_batch);
|
||||
return &state.canvas_instance_batches[state.current_batch_index];
|
||||
}
|
||||
|
||||
void RendererCanvasRenderRD::_add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch) {
|
||||
void RendererCanvasRenderRD::_add_to_batch(bool &r_batch_broken, Batch *&r_current_batch) {
|
||||
r_current_batch->instance_count++;
|
||||
r_index++;
|
||||
if (r_index + state.last_instance_index >= state.max_instances_per_buffer) {
|
||||
// Copy over all data needed for rendering right away
|
||||
// then go back to recording item commands.
|
||||
RD::get_singleton()->buffer_update(
|
||||
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers[state.current_instance_buffer_index],
|
||||
state.last_instance_index * sizeof(InstanceData),
|
||||
r_index * sizeof(InstanceData),
|
||||
state.instance_data_array);
|
||||
state.instance_data_index++;
|
||||
if (state.instance_data_index >= state.max_instances_per_buffer) {
|
||||
RD::get_singleton()->buffer_flush(r_current_batch->instance_buffer);
|
||||
state.instance_data = nullptr;
|
||||
_allocate_instance_buffer();
|
||||
r_index = 0;
|
||||
state.last_instance_index = 0;
|
||||
state.instance_data_index = 0;
|
||||
state.instance_data_index = 0;
|
||||
r_batch_broken = false; // Force a new batch to be created
|
||||
r_current_batch = _new_batch(r_batch_broken);
|
||||
r_current_batch->start = 0;
|
||||
r_current_batch->instance_buffer = state.instance_buffers._get(0);
|
||||
}
|
||||
}
|
||||
|
||||
void RendererCanvasRenderRD::_allocate_instance_buffer() {
|
||||
state.current_instance_buffer_index++;
|
||||
|
||||
if (state.current_instance_buffer_index < state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers.size()) {
|
||||
// We already allocated another buffer in a previous frame, so we can just use it.
|
||||
return;
|
||||
}
|
||||
|
||||
// Allocate a new buffer.
|
||||
RID buf = RD::get_singleton()->storage_buffer_create(state.max_instance_buffer_size);
|
||||
state.canvas_instance_data_buffers[state.current_data_buffer_index].instance_buffers.push_back(buf);
|
||||
state.instance_buffers.prepare_for_upload();
|
||||
state.instance_data = reinterpret_cast<InstanceData *>(state.instance_buffers.map_raw_for_upload(0));
|
||||
}
|
||||
|
||||
void RendererCanvasRenderRD::_prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info) {
|
||||
|
|
@ -3337,12 +3309,7 @@ RendererCanvasRenderRD::~RendererCanvasRenderRD() {
|
|||
RD::get_singleton()->free_rid(state.shadow_occluder_buffer);
|
||||
}
|
||||
|
||||
memdelete_arr(state.instance_data_array);
|
||||
for (uint32_t i = 0; i < BATCH_DATA_BUFFER_COUNT; i++) {
|
||||
for (uint32_t j = 0; j < state.canvas_instance_data_buffers[i].instance_buffers.size(); j++) {
|
||||
RD::get_singleton()->free_rid(state.canvas_instance_data_buffers[i].instance_buffers[j]);
|
||||
}
|
||||
}
|
||||
state.instance_buffers.uninit();
|
||||
|
||||
// Disable the callback, as we're tearing everything down
|
||||
texture_storage->canvas_texture_set_invalidation_callback(default_canvas_texture, nullptr, nullptr);
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@
|
|||
#pragma once
|
||||
|
||||
#include "core/templates/lru.h"
|
||||
#include "servers/rendering/multi_uma_buffer.h"
|
||||
#include "servers/rendering/renderer_canvas_render.h"
|
||||
#include "servers/rendering/renderer_rd/pipeline_hash_map_rd.h"
|
||||
#include "servers/rendering/renderer_rd/shaders/canvas.glsl.gen.h"
|
||||
|
|
@ -495,10 +496,12 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
|
|||
HashMap<RID, TightLocalVector<RID>> canvas_texture_to_uniform_set;
|
||||
|
||||
struct Batch {
|
||||
// Position in the UBO measured in bytes
|
||||
/// First instance index into the instance buffer for this batch.
|
||||
uint32_t start = 0;
|
||||
/// Number of instances in this batch.
|
||||
uint32_t instance_count = 0;
|
||||
uint32_t instance_buffer_index = 0;
|
||||
/// Resource ID of the instance buffer for this batch.
|
||||
RID instance_buffer; // UMA
|
||||
|
||||
TextureInfo *tex_info;
|
||||
|
||||
|
|
@ -528,11 +531,6 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
|
|||
|
||||
HashMap<TextureState, TextureInfo, HashMapHasherDefault, HashMapComparatorDefault<TextureState>, PagedAllocator<HashMapElement<TextureState, TextureInfo>>> texture_info_map;
|
||||
|
||||
// per-frame buffers
|
||||
struct DataBuffer {
|
||||
LocalVector<RID> instance_buffers;
|
||||
};
|
||||
|
||||
struct State {
|
||||
//state buffer
|
||||
struct Buffer {
|
||||
|
|
@ -555,13 +553,17 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
|
|||
uint32_t flags;
|
||||
};
|
||||
|
||||
DataBuffer canvas_instance_data_buffers[BATCH_DATA_BUFFER_COUNT];
|
||||
LocalVector<Batch> canvas_instance_batches;
|
||||
uint32_t current_data_buffer_index = 0;
|
||||
uint32_t current_instance_buffer_index = 0;
|
||||
uint32_t current_batch_index = 0;
|
||||
uint32_t last_instance_index = 0;
|
||||
InstanceData *instance_data_array = nullptr;
|
||||
|
||||
static_assert(std::is_trivially_destructible_v<InstanceData>);
|
||||
static_assert(std::is_trivially_constructible_v<InstanceData>);
|
||||
|
||||
MultiUmaBuffer<1u> instance_buffers = MultiUmaBuffer<1u>("CANVAS_INSTANCE_DATA");
|
||||
/// A pointer to the current instance buffer retrieved from <c>instance_buffers</c>.
|
||||
InstanceData *instance_data = nullptr;
|
||||
/// The index of the next instance to be added to <c>instance_data</c>.
|
||||
uint32_t instance_data_index = 0;
|
||||
|
||||
uint32_t max_instances_per_buffer = 16384;
|
||||
uint32_t max_instance_buffer_size = 16384 * sizeof(InstanceData);
|
||||
|
|
@ -619,12 +621,14 @@ class RendererCanvasRenderRD : public RendererCanvasRender {
|
|||
|
||||
inline RID _get_pipeline_specialization_or_ubershader(CanvasShaderData *p_shader_data, PipelineKey &r_pipeline_key, PushConstant &r_push_constant, RID p_mesh_instance = RID(), void *p_surface = nullptr, uint32_t p_surface_index = 0, RID *r_vertex_array = nullptr);
|
||||
void _render_batch_items(RenderTarget p_to_render_target, int p_item_count, const Transform2D &p_canvas_transform_inverse, Light *p_lights, bool &r_sdf_used, bool p_to_backbuffer = false, RenderingMethod::RenderInfo *r_render_info = nullptr);
|
||||
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, uint32_t &r_index, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
|
||||
void _record_item_commands(const Item *p_item, RenderTarget p_render_target, const Transform2D &p_base_transform, Item *&r_current_clip, Light *p_lights, bool &r_batch_broken, bool &r_sdf_used, Batch *&r_current_batch);
|
||||
void _render_batch(RD::DrawListID p_draw_list, CanvasShaderData *p_shader_data, RenderingDevice::FramebufferFormatID p_framebuffer_format, Light *p_lights, Batch const *p_batch, RenderingMethod::RenderInfo *r_render_info = nullptr);
|
||||
void _prepare_batch_texture_info(RID p_texture, TextureState &p_state, TextureInfo *p_info);
|
||||
InstanceData *new_instance_data(float *p_world, uint32_t *p_lights, uint32_t p_base_flags, uint32_t p_index, uint32_t p_uniforms_ofs, TextureInfo *p_info);
|
||||
|
||||
// non-UMA
|
||||
InstanceData *new_instance_data(const InstanceData &template_instance);
|
||||
[[nodiscard]] Batch *_new_batch(bool &r_batch_broken);
|
||||
void _add_to_batch(uint32_t &r_index, bool &r_batch_broken, Batch *&r_current_batch);
|
||||
void _add_to_batch(bool &r_batch_broken, Batch *&r_current_batch);
|
||||
void _allocate_instance_buffer();
|
||||
|
||||
_FORCE_INLINE_ void _update_transform_2d_to_mat2x4(const Transform2D &p_transform, float *p_mat2x4);
|
||||
|
|
|
|||
|
|
@ -302,7 +302,7 @@ void ShaderRD::_compile_variant(uint32_t p_variant, CompileData p_data) {
|
|||
}
|
||||
|
||||
Vector<String> variant_stage_sources = _build_variant_stage_sources(variant, p_data);
|
||||
Vector<RD::ShaderStageSPIRVData> variant_stages = compile_stages(variant_stage_sources);
|
||||
Vector<RD::ShaderStageSPIRVData> variant_stages = compile_stages(variant_stage_sources, dynamic_buffers);
|
||||
ERR_FAIL_COND(variant_stages.is_empty());
|
||||
|
||||
Vector<uint8_t> shader_data = RD::get_singleton()->shader_compile_binary_from_spirv(variant_stages, name + ":" + itos(variant));
|
||||
|
|
@ -783,6 +783,10 @@ const String &ShaderRD::get_name() const {
|
|||
return name;
|
||||
}
|
||||
|
||||
const Vector<uint64_t> &ShaderRD::get_dynamic_buffers() const {
|
||||
return dynamic_buffers;
|
||||
}
|
||||
|
||||
bool ShaderRD::shader_cache_cleanup_on_start = false;
|
||||
|
||||
ShaderRD::ShaderRD() {
|
||||
|
|
@ -801,12 +805,13 @@ ShaderRD::ShaderRD() {
|
|||
base_compute_defines = base_compute_define_text.ascii();
|
||||
}
|
||||
|
||||
void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
|
||||
void ShaderRD::initialize(const Vector<String> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers, const Vector<uint64_t> &p_dynamic_buffers) {
|
||||
ERR_FAIL_COND(variant_defines.size());
|
||||
ERR_FAIL_COND(p_variant_defines.is_empty());
|
||||
|
||||
general_defines = p_general_defines.utf8();
|
||||
immutable_samplers = p_immutable_samplers;
|
||||
dynamic_buffers = p_dynamic_buffers;
|
||||
|
||||
// When initialized this way, there is just one group and its always enabled.
|
||||
group_to_variant_map.insert(0, LocalVector<int>{});
|
||||
|
|
@ -846,6 +851,11 @@ void ShaderRD::_initialize_cache() {
|
|||
hash_build.append(variant_defines[E.value[i]].text.get_data());
|
||||
}
|
||||
|
||||
for (const uint64_t dyn_buffer : dynamic_buffers) {
|
||||
hash_build.append("[dynamic_buffer]");
|
||||
hash_build.append(uitos(dyn_buffer));
|
||||
}
|
||||
|
||||
group_sha256[E.key] = hash_build.as_string().sha256_text();
|
||||
|
||||
if (!shader_cache_user_dir.is_empty()) {
|
||||
|
|
@ -880,12 +890,13 @@ void ShaderRD::_initialize_cache() {
|
|||
}
|
||||
|
||||
// Same as above, but allows specifying shader compilation groups.
|
||||
void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers) {
|
||||
void ShaderRD::initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines, const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers, const Vector<uint64_t> &p_dynamic_buffers) {
|
||||
ERR_FAIL_COND(variant_defines.size());
|
||||
ERR_FAIL_COND(p_variant_defines.is_empty());
|
||||
|
||||
general_defines = p_general_defines.utf8();
|
||||
immutable_samplers = p_immutable_samplers;
|
||||
dynamic_buffers = p_dynamic_buffers;
|
||||
|
||||
int max_group_id = 0;
|
||||
|
||||
|
|
@ -962,7 +973,7 @@ void ShaderRD::set_shader_cache_save_debug(bool p_enable) {
|
|||
shader_cache_save_debug = p_enable;
|
||||
}
|
||||
|
||||
Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &p_stage_sources) {
|
||||
Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &p_stage_sources, const Vector<uint64_t> &p_dynamic_buffers) {
|
||||
RD::ShaderStageSPIRVData stage;
|
||||
Vector<RD::ShaderStageSPIRVData> stages;
|
||||
String error;
|
||||
|
|
@ -974,6 +985,7 @@ Vector<RD::ShaderStageSPIRVData> ShaderRD::compile_stages(const Vector<String> &
|
|||
}
|
||||
|
||||
stage.spirv = RD::get_singleton()->shader_compile_spirv_from_source(RD::ShaderStage(i), p_stage_sources[i], RD::SHADER_LANGUAGE_GLSL, &error);
|
||||
stage.dynamic_buffers = p_dynamic_buffers;
|
||||
stage.shader_stage = RD::ShaderStage(i);
|
||||
if (!stage.spirv.is_empty()) {
|
||||
stages.push_back(stage);
|
||||
|
|
|
|||
|
|
@ -65,6 +65,7 @@ private:
|
|||
Vector<bool> group_enabled;
|
||||
|
||||
Vector<RD::PipelineImmutableSampler> immutable_samplers;
|
||||
Vector<uint64_t> dynamic_buffers;
|
||||
|
||||
struct Version {
|
||||
Mutex *mutex = nullptr;
|
||||
|
|
@ -225,6 +226,8 @@ public:
|
|||
|
||||
const String &get_name() const;
|
||||
|
||||
const Vector<uint64_t> &get_dynamic_buffers() const;
|
||||
|
||||
static void shaders_embedded_set_lock();
|
||||
static const ShaderVersionPairSet &shaders_embedded_set_get();
|
||||
static void shaders_embedded_set_unlock();
|
||||
|
|
@ -237,15 +240,26 @@ public:
|
|||
static void set_shader_cache_save_compressed_zstd(bool p_enable);
|
||||
static void set_shader_cache_save_debug(bool p_enable);
|
||||
|
||||
static Vector<RD::ShaderStageSPIRVData> compile_stages(const Vector<String> &p_stage_sources);
|
||||
static Vector<RD::ShaderStageSPIRVData> compile_stages(const Vector<String> &p_stage_sources, const Vector<uint64_t> &p_dynamic_buffers);
|
||||
static PackedByteArray save_shader_cache_bytes(const LocalVector<int> &p_variants, const Vector<Vector<uint8_t>> &p_variant_data);
|
||||
|
||||
Vector<String> version_build_variant_stage_sources(RID p_version, int p_variant);
|
||||
RS::ShaderNativeSourceCode version_get_native_source_code(RID p_version);
|
||||
String version_get_cache_file_relative_path(RID p_version, int p_group, const String &p_api_name);
|
||||
|
||||
void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
|
||||
void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>());
|
||||
struct DynamicBuffer {
|
||||
static uint64_t encode(uint32_t p_set_id, uint32_t p_binding) {
|
||||
return uint64_t(p_set_id) << 32ul | uint64_t(p_binding);
|
||||
}
|
||||
};
|
||||
|
||||
// Dynamic Buffers specifies Which buffers will be persistent/dynamic when used.
|
||||
// See DynamicBuffer::encode. We need this argument because SPIR-V does not distinguish between a
|
||||
// uniform buffer and a dynamic uniform buffer. At shader level they're the same thing, but the PSO
|
||||
// is created slightly differently and they're bound differently.
|
||||
// On D3D12 the Root Layout is also different.
|
||||
void initialize(const Vector<String> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>(), const Vector<uint64_t> &p_dynamic_buffers = Vector<uint64_t>());
|
||||
void initialize(const Vector<VariantDefine> &p_variant_defines, const String &p_general_defines = "", const Vector<RD::PipelineImmutableSampler> &p_immutable_samplers = Vector<RD::PipelineImmutableSampler>(), const Vector<uint64_t> &p_dynamic_buffers = Vector<uint64_t>());
|
||||
|
||||
virtual ~ShaderRD();
|
||||
};
|
||||
|
|
|
|||
|
|
@ -94,6 +94,8 @@ public:
|
|||
void update_ubo(RID p_uniform_buffer, RS::ViewportDebugDraw p_debug_mode, RID p_env, RID p_reflection_probe_instance, RID p_camera_attributes, bool p_pancake_shadows, const Size2i &p_screen_size, const Color &p_default_bg_color, float p_luminance_multiplier, bool p_opaque_render_buffers, bool p_apply_alpha_multiplier);
|
||||
virtual RID get_uniform_buffer() const override;
|
||||
|
||||
static uint32_t get_uniform_buffer_size_bytes() { return sizeof(UBODATA); }
|
||||
|
||||
private:
|
||||
RID uniform_buffer; // loaded into this uniform buffer (supplied externally)
|
||||
|
||||
|
|
|
|||
|
|
@ -267,7 +267,7 @@ Error RenderingDevice::_buffer_initialize(Buffer *p_buffer, Span<uint8_t> p_data
|
|||
Error RenderingDevice::_insert_staging_block(StagingBuffers &p_staging_buffers) {
|
||||
StagingBufferBlock block;
|
||||
|
||||
block.driver_id = driver->buffer_create(p_staging_buffers.block_size, p_staging_buffers.usage_bits, RDD::MEMORY_ALLOCATION_TYPE_CPU);
|
||||
block.driver_id = driver->buffer_create(p_staging_buffers.block_size, p_staging_buffers.usage_bits, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!block.driver_id, ERR_CANT_CREATE);
|
||||
|
||||
block.frame_used = 0;
|
||||
|
|
@ -455,19 +455,29 @@ Error RenderingDevice::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t
|
|||
return OK;
|
||||
}
|
||||
|
||||
Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data) {
|
||||
Error RenderingDevice::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_skip_check) {
|
||||
ERR_RENDER_THREAD_GUARD_V(ERR_UNAVAILABLE);
|
||||
|
||||
copy_bytes_count += p_size;
|
||||
ERR_FAIL_COND_V_MSG(draw_list.active, ERR_INVALID_PARAMETER,
|
||||
|
||||
ERR_FAIL_COND_V_MSG(draw_list.active && !p_skip_check, ERR_INVALID_PARAMETER,
|
||||
"Updating buffers is forbidden during creation of a draw list");
|
||||
ERR_FAIL_COND_V_MSG(compute_list.active, ERR_INVALID_PARAMETER,
|
||||
ERR_FAIL_COND_V_MSG(compute_list.active && !p_skip_check, ERR_INVALID_PARAMETER,
|
||||
"Updating buffers is forbidden during creation of a compute list");
|
||||
|
||||
Buffer *buffer = _get_buffer_from_owner(p_buffer);
|
||||
ERR_FAIL_NULL_V_MSG(buffer, ERR_INVALID_PARAMETER, "Buffer argument is not a valid buffer of any type.");
|
||||
ERR_FAIL_COND_V_MSG(p_offset + p_size > buffer->size, ERR_INVALID_PARAMETER, "Attempted to write buffer (" + itos((p_offset + p_size) - buffer->size) + " bytes) past the end.");
|
||||
|
||||
if (buffer->usage.has_flag(RDD::BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
|
||||
uint8_t *dst_data = driver->buffer_persistent_map_advance(buffer->driver_id, frames_drawn);
|
||||
|
||||
memcpy(dst_data + p_offset, p_data, p_size);
|
||||
direct_copy_count++;
|
||||
buffer_flush(p_buffer);
|
||||
return OK;
|
||||
}
|
||||
|
||||
_check_transfer_worker_buffer(buffer);
|
||||
|
||||
// Submitting may get chunked for various reasons, so convert this to a task.
|
||||
|
|
@ -597,8 +607,9 @@ Error RenderingDevice::driver_callback_add(RDD::DriverCallback p_callback, void
|
|||
|
||||
String RenderingDevice::get_perf_report() const {
|
||||
String perf_report_text;
|
||||
perf_report_text += " gpu:" + String::num_int64(prev_gpu_copy_count);
|
||||
perf_report_text += " bytes:" + String::num_int64(prev_copy_bytes_count);
|
||||
perf_report_text += " gpu:" + String::num_int64(gpu_copy_count);
|
||||
perf_report_text += " direct:" + String::num_int64(direct_copy_count);
|
||||
perf_report_text += " bytes:" + String::num_int64(copy_bytes_count);
|
||||
|
||||
perf_report_text += " lazily alloc:" + String::num_int64(driver->get_lazily_memory_used());
|
||||
return perf_report_text;
|
||||
|
|
@ -608,6 +619,7 @@ void RenderingDevice::update_perf_report() {
|
|||
prev_gpu_copy_count = gpu_copy_count;
|
||||
prev_copy_bytes_count = copy_bytes_count;
|
||||
gpu_copy_count = 0;
|
||||
direct_copy_count = 0;
|
||||
copy_bytes_count = 0;
|
||||
}
|
||||
|
||||
|
|
@ -659,7 +671,7 @@ Vector<uint8_t> RenderingDevice::buffer_get_data(RID p_buffer, uint32_t p_offset
|
|||
|
||||
_check_transfer_worker_buffer(buffer);
|
||||
|
||||
RDD::BufferID tmp_buffer = driver->buffer_create(buffer->size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU);
|
||||
RDD::BufferID tmp_buffer = driver->buffer_create(buffer->size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!tmp_buffer, Vector<uint8_t>());
|
||||
|
||||
RDD::BufferCopyRegion region;
|
||||
|
|
@ -784,12 +796,38 @@ uint64_t RenderingDevice::buffer_get_device_address(RID p_buffer) {
|
|||
return driver->buffer_get_device_address(buffer->driver_id);
|
||||
}
|
||||
|
||||
uint8_t *RenderingDevice::buffer_persistent_map_advance(RID p_buffer) {
|
||||
ERR_RENDER_THREAD_GUARD_V(0);
|
||||
|
||||
Buffer *buffer = _get_buffer_from_owner(p_buffer);
|
||||
ERR_FAIL_NULL_V_MSG(buffer, nullptr, "Buffer argument is not a valid buffer of any type.");
|
||||
direct_copy_count++;
|
||||
return driver->buffer_persistent_map_advance(buffer->driver_id, frames_drawn);
|
||||
}
|
||||
|
||||
void RenderingDevice::buffer_flush(RID p_buffer) {
|
||||
ERR_RENDER_THREAD_GUARD();
|
||||
|
||||
Buffer *buffer = _get_buffer_from_owner(p_buffer);
|
||||
ERR_FAIL_NULL_MSG(buffer, "Buffer argument is not a valid buffer of any type.");
|
||||
driver->buffer_flush(buffer->driver_id);
|
||||
}
|
||||
|
||||
RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, Span<uint8_t> p_data, BitField<StorageBufferUsage> p_usage, BitField<BufferCreationBits> p_creation_bits) {
|
||||
ERR_FAIL_COND_V(p_data.size() && (uint32_t)p_data.size() != p_size_bytes, RID());
|
||||
|
||||
Buffer buffer;
|
||||
buffer.size = p_size_bytes;
|
||||
buffer.usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_STORAGE_BIT);
|
||||
if (p_creation_bits.has_flag(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT)) {
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT);
|
||||
|
||||
// This is a precaution: Persistent buffers are meant for frequent CPU -> GPU transfers.
|
||||
// Writing to this buffer from GPU might cause sync issues if both CPU & GPU try to write at the
|
||||
// same time. It's probably fine (since CPU always advances the pointer before writing) but let's
|
||||
// stick to the known/intended use cases and scream if we deviate from it.
|
||||
buffer.usage.clear_flag(RDD::BUFFER_USAGE_TRANSFER_TO_BIT);
|
||||
}
|
||||
if (p_usage.has_flag(STORAGE_BUFFER_USAGE_DISPATCH_INDIRECT)) {
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_INDIRECT_BIT);
|
||||
}
|
||||
|
|
@ -801,7 +839,7 @@ RID RenderingDevice::storage_buffer_create(uint32_t p_size_bytes, Span<uint8_t>
|
|||
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT);
|
||||
}
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!buffer.driver_id, RID());
|
||||
|
||||
// Storage buffers are assumed to be mutable.
|
||||
|
|
@ -833,7 +871,7 @@ RID RenderingDevice::texture_buffer_create(uint32_t p_size_elements, DataFormat
|
|||
Buffer texture_buffer;
|
||||
texture_buffer.size = size_bytes;
|
||||
BitField<RDD::BufferUsageBits> usage = (RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT | RDD::BUFFER_USAGE_TEXEL_BIT);
|
||||
texture_buffer.driver_id = driver->buffer_create(size_bytes, usage, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
texture_buffer.driver_id = driver->buffer_create(size_bytes, usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!texture_buffer.driver_id, RID());
|
||||
|
||||
// Texture buffers are assumed to be immutable unless they don't have initial data.
|
||||
|
|
@ -1884,7 +1922,7 @@ void RenderingDevice::_texture_create_reinterpret_buffer(Texture *p_texture) {
|
|||
uint32_t pixel_bytes = get_image_format_pixel_size(p_texture->format);
|
||||
uint32_t row_pitch = STEPIFY(p_texture->width * pixel_bytes, row_pitch_step);
|
||||
uint64_t buffer_size = STEPIFY(pixel_bytes * row_pitch * p_texture->height * p_texture->depth, transfer_alignment);
|
||||
p_texture->shared_fallback->buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
p_texture->shared_fallback->buffer = driver->buffer_create(buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT | RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
buffer_memory += driver->buffer_get_allocation_size(p_texture->shared_fallback->buffer);
|
||||
|
||||
RDG::ResourceTracker *tracker = RDG::resource_tracker_create();
|
||||
|
|
@ -1938,7 +1976,7 @@ Vector<uint8_t> RenderingDevice::texture_get_data(RID p_texture, uint32_t p_laye
|
|||
work_buffer_size = STEPIFY(work_buffer_size, work_mip_alignment) + mip_layouts[i].size;
|
||||
}
|
||||
|
||||
RDD::BufferID tmp_buffer = driver->buffer_create(work_buffer_size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU);
|
||||
RDD::BufferID tmp_buffer = driver->buffer_create(work_buffer_size, RDD::BUFFER_USAGE_TRANSFER_TO_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!tmp_buffer, Vector<uint8_t>());
|
||||
|
||||
thread_local LocalVector<RDD::BufferTextureCopyRegion> command_buffer_texture_copy_regions_vector;
|
||||
|
|
@ -3052,7 +3090,7 @@ RID RenderingDevice::vertex_buffer_create(uint32_t p_size_bytes, Span<uint8_t> p
|
|||
if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) {
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT);
|
||||
}
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!buffer.driver_id, RID());
|
||||
|
||||
// Vertex buffers are assumed to be immutable unless they don't have initial data or they've been marked for storage explicitly.
|
||||
|
|
@ -3224,7 +3262,7 @@ RID RenderingDevice::index_buffer_create(uint32_t p_index_count, IndexBufferForm
|
|||
if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) {
|
||||
index_buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT);
|
||||
}
|
||||
index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
index_buffer.driver_id = driver->buffer_create(index_buffer.size, index_buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!index_buffer.driver_id, RID());
|
||||
|
||||
// Index buffers are assumed to be immutable unless they don't have initial data.
|
||||
|
|
@ -3279,7 +3317,7 @@ RID RenderingDevice::index_array_create(RID p_index_buffer, uint32_t p_index_off
|
|||
/****************/
|
||||
|
||||
static const char *SHADER_UNIFORM_NAMES[RenderingDevice::UNIFORM_TYPE_MAX] = {
|
||||
"Sampler", "CombinedSampler", "Texture", "Image", "TextureBuffer", "SamplerTextureBuffer", "ImageBuffer", "UniformBuffer", "StorageBuffer", "InputAttachment"
|
||||
"Sampler", "CombinedSampler", "Texture", "Image", "TextureBuffer", "SamplerTextureBuffer", "ImageBuffer", "UniformBuffer", "UniformBufferDynamic", "StorageBuffer", "StorageBufferDynamic", "InputAttachment"
|
||||
};
|
||||
|
||||
String RenderingDevice::_shader_uniform_debug(RID p_shader, int p_set) {
|
||||
|
|
@ -3450,7 +3488,16 @@ RID RenderingDevice::uniform_buffer_create(uint32_t p_size_bytes, Span<uint8_t>
|
|||
if (p_creation_bits.has_flag(BUFFER_CREATION_DEVICE_ADDRESS_BIT)) {
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT);
|
||||
}
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU);
|
||||
if (p_creation_bits.has_flag(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT)) {
|
||||
buffer.usage.set_flag(RDD::BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT);
|
||||
|
||||
// This is a precaution: Persistent buffers are meant for frequent CPU -> GPU transfers.
|
||||
// Writing to this buffer from GPU might cause sync issues if both CPU & GPU try to write at the
|
||||
// same time. It's probably fine (since CPU always advances the pointer before writing) but let's
|
||||
// stick to the known/intended use cases and scream if we deviate from it.
|
||||
buffer.usage.clear_flag(RDD::BUFFER_USAGE_TRANSFER_TO_BIT);
|
||||
}
|
||||
buffer.driver_id = driver->buffer_create(buffer.size, buffer.usage, RDD::MEMORY_ALLOCATION_TYPE_GPU, frames_drawn);
|
||||
ERR_FAIL_COND_V(!buffer.driver_id, RID());
|
||||
|
||||
// Uniform buffers are assumed to be immutable unless they don't have initial data.
|
||||
|
|
@ -3527,8 +3574,7 @@ RID RenderingDevice::uniform_set_create(const VectorView<RD::Uniform> &p_uniform
|
|||
const Uniform &uniform = uniforms[uniform_idx];
|
||||
|
||||
ERR_FAIL_INDEX_V(uniform.uniform_type, RD::UNIFORM_TYPE_MAX, RID());
|
||||
ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(),
|
||||
"Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + SHADER_UNIFORM_NAMES[set_uniform.type] + "', supplied: '" + SHADER_UNIFORM_NAMES[uniform.uniform_type] + "'.");
|
||||
ERR_FAIL_COND_V_MSG(uniform.uniform_type != set_uniform.type, RID(), "Shader '" + shader->name + "' Mismatch uniform type for binding (" + itos(set_uniform.binding) + "), set (" + itos(p_shader_set) + "). Expected '" + SHADER_UNIFORM_NAMES[set_uniform.type] + "', supplied: '" + SHADER_UNIFORM_NAMES[uniform.uniform_type] + "'.");
|
||||
|
||||
RDD::BoundUniform &driver_uniform = driver_uniforms[i];
|
||||
driver_uniform.type = uniform.uniform_type;
|
||||
|
|
@ -3759,7 +3805,8 @@ RID RenderingDevice::uniform_set_create(const VectorView<RD::Uniform> &p_uniform
|
|||
case UNIFORM_TYPE_IMAGE_BUFFER: {
|
||||
// Todo.
|
||||
} break;
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER: {
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER:
|
||||
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
|
||||
ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(),
|
||||
"Uniform buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided).");
|
||||
|
||||
|
|
@ -3780,7 +3827,8 @@ RID RenderingDevice::uniform_set_create(const VectorView<RD::Uniform> &p_uniform
|
|||
driver_uniform.ids.push_back(buffer->driver_id);
|
||||
_check_transfer_worker_buffer(buffer);
|
||||
} break;
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER: {
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER:
|
||||
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
|
||||
ERR_FAIL_COND_V_MSG(uniform.get_id_count() != 1, RID(),
|
||||
"Storage buffer supplied (binding: " + itos(uniform.binding) + ") must provide one ID (" + itos(uniform.get_id_count()) + " provided).");
|
||||
|
||||
|
|
@ -5630,7 +5678,7 @@ RenderingDevice::TransferWorker *RenderingDevice::_acquire_transfer_worker(uint3
|
|||
|
||||
uint32_t new_staging_buffer_size = next_power_of_2(expected_buffer_size);
|
||||
transfer_worker->staging_buffer_size_allocated = new_staging_buffer_size;
|
||||
transfer_worker->staging_buffer = driver->buffer_create(new_staging_buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU);
|
||||
transfer_worker->staging_buffer = driver->buffer_create(new_staging_buffer_size, RDD::BUFFER_USAGE_TRANSFER_FROM_BIT, RDD::MEMORY_ALLOCATION_TYPE_CPU, frames_drawn);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -7788,6 +7836,8 @@ void RenderingDevice::_bind_methods() {
|
|||
|
||||
BIND_BITFIELD_FLAG(BUFFER_CREATION_DEVICE_ADDRESS_BIT);
|
||||
BIND_BITFIELD_FLAG(BUFFER_CREATION_AS_STORAGE_BIT);
|
||||
// Not exposed on purpose. This flag is too dangerous to be exposed to regular GD users.
|
||||
//BIND_BITFIELD_FLAG(BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT);
|
||||
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER); //for sampling only (sampler GLSL type)
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_SAMPLER_WITH_TEXTURE); // for sampling only); but includes a texture); (samplerXX GLSL type)); first a sampler then a texture
|
||||
|
|
@ -7799,6 +7849,8 @@ void RenderingDevice::_bind_methods() {
|
|||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_UNIFORM_BUFFER); //regular uniform buffer (or UBO).
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_STORAGE_BUFFER); //storage buffer ("buffer" qualifier) like UBO); but supports storage); for compute mostly
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_INPUT_ATTACHMENT); //used for sub-pass read/write); for mobile mostly
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC); // Exposed in case a BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT buffer created by C++ makes it into GD users.
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC); // Exposed in case a BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT buffer created by C++ makes it into GD users.
|
||||
BIND_ENUM_CONSTANT(UNIFORM_TYPE_MAX);
|
||||
|
||||
BIND_ENUM_CONSTANT(RENDER_PRIMITIVE_POINTS);
|
||||
|
|
|
|||
|
|
@ -189,6 +189,7 @@ private:
|
|||
// swapchain semaphore to be signaled (which causes bubbles).
|
||||
bool split_swapchain_into_its_own_cmd_buffer = true;
|
||||
uint32_t gpu_copy_count = 0;
|
||||
uint32_t direct_copy_count = 0;
|
||||
uint32_t copy_bytes_count = 0;
|
||||
uint32_t prev_gpu_copy_count = 0;
|
||||
uint32_t prev_copy_bytes_count = 0;
|
||||
|
|
@ -206,11 +207,55 @@ private:
|
|||
|
||||
public:
|
||||
Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size);
|
||||
Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data);
|
||||
/**
|
||||
* @brief Updates the given GPU buffer at offset and size with the given CPU data.
|
||||
* @remarks
|
||||
* Buffer update is queued into the render graph. The render graph will reorder this operation so
|
||||
* that it happens together with other buffer_update() in bulk and before rendering operations
|
||||
* (or compute dispatches) that need it.
|
||||
*
|
||||
* This means that the following will not work as intended:
|
||||
* @code
|
||||
* buffer_update(buffer_a, ..., data_source_x, ...);
|
||||
* draw_list_draw(buffer_a); // render data_render_x.
|
||||
* buffer_update(buffer_a, ..., data_source_y, ...);
|
||||
* draw_list_draw(buffer_a); // render data_source_y.
|
||||
* @endcode
|
||||
*
|
||||
* Because it will be *reordered* to become the following:
|
||||
* @code
|
||||
* buffer_update(buffer_a, ..., data_source_x, ...);
|
||||
* buffer_update(buffer_a, ..., data_source_y, ...);
|
||||
* draw_list_draw(buffer_a); // render data_source_y. <-- Oops! should be data_source_x
|
||||
* draw_list_draw(buffer_a); // render data_source_y.
|
||||
* @endcode
|
||||
*
|
||||
* When p_skip_check = true, we will perform checks to prevent this situation from happening
|
||||
* (buffer_update must not be called while creating a draw or compute list).
|
||||
* Do NOT set it to false for user-facing public API because users had trouble understanding
|
||||
* this problem when manually creating draw lists.
|
||||
*
|
||||
* Godot internally can set p_skip_check = true when it believes it will only update
|
||||
* the buffer once and it needs to be done while a draw/compute list is being created.
|
||||
*
|
||||
* Important: The Vulkan & Metal APIs do not allow issuing copies while inside a RenderPass.
|
||||
* We can do it because Godot's render graph will reorder them.
|
||||
*
|
||||
* @param p_buffer GPU buffer to update.
|
||||
* @param p_offset Offset in bytes (relative to p_buffer).
|
||||
* @param p_size Size in bytes of the data.
|
||||
* @param p_data CPU data to transfer to GPU.
|
||||
* Pointer can be deleted after buffer_update returns.
|
||||
* @param p_skip_check Must always be false for user-facing public API. See remarks.
|
||||
* @return Status result of the operation.
|
||||
*/
|
||||
Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, bool p_skip_check = false);
|
||||
Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size);
|
||||
Vector<uint8_t> buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); // This causes stall, only use to retrieve large buffers for saving.
|
||||
Error buffer_get_data_async(RID p_buffer, const Callable &p_callback, uint32_t p_offset = 0, uint32_t p_size = 0);
|
||||
uint64_t buffer_get_device_address(RID p_buffer);
|
||||
uint8_t *buffer_persistent_map_advance(RID p_buffer);
|
||||
void buffer_flush(RID p_buffer);
|
||||
|
||||
private:
|
||||
/******************/
|
||||
|
|
@ -788,6 +833,7 @@ public:
|
|||
enum BufferCreationBits {
|
||||
BUFFER_CREATION_DEVICE_ADDRESS_BIT = (1 << 0),
|
||||
BUFFER_CREATION_AS_STORAGE_BIT = (1 << 1),
|
||||
BUFFER_CREATION_DYNAMIC_PERSISTENT_BIT = (1 << 2),
|
||||
};
|
||||
|
||||
enum StorageBufferUsage {
|
||||
|
|
@ -1656,6 +1702,8 @@ public:
|
|||
String get_device_api_version() const;
|
||||
String get_device_pipeline_cache_uuid() const;
|
||||
|
||||
uint64_t get_frames_drawn() const { return frames_drawn; }
|
||||
|
||||
bool is_composite_alpha_supported() const;
|
||||
|
||||
uint64_t get_driver_resource(DriverResource p_resource, RID p_rid = RID(), uint64_t p_index = 0);
|
||||
|
|
|
|||
|
|
@ -607,6 +607,7 @@ public:
|
|||
struct ShaderStageSPIRVData {
|
||||
ShaderStage shader_stage = SHADER_STAGE_MAX;
|
||||
Vector<uint8_t> spirv;
|
||||
Vector<uint64_t> dynamic_buffers;
|
||||
};
|
||||
|
||||
/*********************/
|
||||
|
|
@ -626,6 +627,8 @@ public:
|
|||
UNIFORM_TYPE_UNIFORM_BUFFER, // Regular uniform buffer (or UBO).
|
||||
UNIFORM_TYPE_STORAGE_BUFFER, // Storage buffer ("buffer" qualifier) like UBO, but supports storage, for compute mostly.
|
||||
UNIFORM_TYPE_INPUT_ATTACHMENT, // Used for sub-pass read/write, for mobile mostly.
|
||||
UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC, // Same as UNIFORM but created with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT.
|
||||
UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC, // Same as STORAGE but created with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT.
|
||||
UNIFORM_TYPE_MAX
|
||||
};
|
||||
|
||||
|
|
@ -1062,6 +1065,7 @@ public:
|
|||
uint32_t fragment_output_mask = 0;
|
||||
bool is_compute = false;
|
||||
bool has_multiview = false;
|
||||
bool has_dynamic_buffers = false;
|
||||
uint32_t compute_local_size[3] = {};
|
||||
uint32_t push_constant_size = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -171,19 +171,30 @@ public:
|
|||
BUFFER_USAGE_VERTEX_BIT = (1 << 7),
|
||||
BUFFER_USAGE_INDIRECT_BIT = (1 << 8),
|
||||
BUFFER_USAGE_DEVICE_ADDRESS_BIT = (1 << 17),
|
||||
// There are no Vulkan-equivalent. Try to use unused/unclaimed bits.
|
||||
BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT = (1 << 31),
|
||||
};
|
||||
|
||||
enum {
|
||||
BUFFER_WHOLE_SIZE = ~0ULL
|
||||
};
|
||||
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type) = 0;
|
||||
/** Allocates a new GPU buffer. Must be destroyed with buffer_free().
|
||||
* @param p_size The size in bytes of the buffer.
|
||||
* @param p_usage Usage flags.
|
||||
* @param p_allocation_type See MemoryAllocationType.
|
||||
* @param p_frames_drawn Used for debug checks when BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT is set.
|
||||
* @return the buffer.
|
||||
*/
|
||||
virtual BufferID buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) = 0;
|
||||
// Only for a buffer with BUFFER_USAGE_TEXEL_BIT.
|
||||
virtual bool buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) = 0;
|
||||
virtual void buffer_free(BufferID p_buffer) = 0;
|
||||
virtual uint64_t buffer_get_allocation_size(BufferID p_buffer) = 0;
|
||||
virtual uint8_t *buffer_map(BufferID p_buffer) = 0;
|
||||
virtual void buffer_unmap(BufferID p_buffer) = 0;
|
||||
virtual uint8_t *buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) = 0;
|
||||
virtual void buffer_flush(BufferID p_buffer) {}
|
||||
// Only for a buffer with BUFFER_USAGE_DEVICE_ADDRESS_BIT.
|
||||
virtual uint64_t buffer_get_device_address(BufferID p_buffer) = 0;
|
||||
|
||||
|
|
@ -499,12 +510,17 @@ public:
|
|||
// Flag to indicate that this is an immutable sampler so it is skipped when creating uniform
|
||||
// sets, as it would be set previously when creating the pipeline layout.
|
||||
bool immutable_sampler = false;
|
||||
|
||||
_FORCE_INLINE_ bool is_dynamic() const {
|
||||
return type == UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC || type == UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
}
|
||||
};
|
||||
|
||||
virtual UniformSetID uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) = 0;
|
||||
virtual void linear_uniform_set_pools_reset(int p_linear_pool_index) {}
|
||||
virtual void uniform_set_free(UniformSetID p_uniform_set) = 0;
|
||||
virtual bool uniform_sets_have_linear_pools() const { return false; }
|
||||
virtual uint32_t uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const = 0;
|
||||
|
||||
// ----- COMMANDS -----
|
||||
|
||||
|
|
@ -646,8 +662,7 @@ public:
|
|||
|
||||
// Binding.
|
||||
virtual void command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) = 0;
|
||||
virtual void command_bind_render_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) = 0;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) = 0;
|
||||
virtual void command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0;
|
||||
|
||||
// Drawing.
|
||||
virtual void command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) = 0;
|
||||
|
|
@ -689,8 +704,7 @@ public:
|
|||
|
||||
// Binding.
|
||||
virtual void command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) = 0;
|
||||
virtual void command_bind_compute_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) = 0;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) = 0;
|
||||
virtual void command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) = 0;
|
||||
|
||||
// Dispatching.
|
||||
virtual void command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) = 0;
|
||||
|
|
|
|||
|
|
@ -772,7 +772,7 @@ void RenderingDeviceGraph::_run_compute_list_command(RDD::CommandBufferID p_comm
|
|||
} break;
|
||||
case ComputeListInstruction::TYPE_BIND_UNIFORM_SETS: {
|
||||
const ComputeListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast<const ComputeListBindUniformSetsInstruction *>(instruction);
|
||||
driver->command_bind_compute_uniform_sets(p_command_buffer, VectorView<RDD::UniformSetID>(bind_uniform_sets_instruction->uniform_set_ids(), bind_uniform_sets_instruction->set_count), bind_uniform_sets_instruction->shader, bind_uniform_sets_instruction->first_set_index, bind_uniform_sets_instruction->set_count);
|
||||
driver->command_bind_compute_uniform_sets(p_command_buffer, VectorView<RDD::UniformSetID>(bind_uniform_sets_instruction->uniform_set_ids(), bind_uniform_sets_instruction->set_count), bind_uniform_sets_instruction->shader, bind_uniform_sets_instruction->first_set_index, bind_uniform_sets_instruction->set_count, bind_uniform_sets_instruction->dynamic_offsets_mask);
|
||||
instruction_data_cursor += sizeof(ComputeListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
|
||||
} break;
|
||||
case ComputeListInstruction::TYPE_DISPATCH: {
|
||||
|
|
@ -865,7 +865,7 @@ void RenderingDeviceGraph::_run_draw_list_command(RDD::CommandBufferID p_command
|
|||
} break;
|
||||
case DrawListInstruction::TYPE_BIND_UNIFORM_SETS: {
|
||||
const DrawListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast<const DrawListBindUniformSetsInstruction *>(instruction);
|
||||
driver->command_bind_render_uniform_sets(p_command_buffer, VectorView<RDD::UniformSetID>(bind_uniform_sets_instruction->uniform_set_ids(), bind_uniform_sets_instruction->set_count), bind_uniform_sets_instruction->shader, bind_uniform_sets_instruction->first_set_index, bind_uniform_sets_instruction->set_count);
|
||||
driver->command_bind_render_uniform_sets(p_command_buffer, VectorView<RDD::UniformSetID>(bind_uniform_sets_instruction->uniform_set_ids(), bind_uniform_sets_instruction->set_count), bind_uniform_sets_instruction->shader, bind_uniform_sets_instruction->first_set_index, bind_uniform_sets_instruction->set_count, bind_uniform_sets_instruction->dynamic_offsets_mask);
|
||||
instruction_data_cursor += sizeof(DrawListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
|
||||
} break;
|
||||
case DrawListInstruction::TYPE_BIND_VERTEX_BUFFERS: {
|
||||
|
|
@ -1430,7 +1430,7 @@ void RenderingDeviceGraph::_print_draw_list(const uint8_t *p_instruction_data, u
|
|||
const DrawListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast<const DrawListBindUniformSetsInstruction *>(instruction);
|
||||
print_line("\tBIND UNIFORM SETS COUNT", bind_uniform_sets_instruction->set_count);
|
||||
for (uint32_t i = 0; i < bind_uniform_sets_instruction->set_count; i++) {
|
||||
print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_sets_instruction->uniform_set_ids()[i].id), "START INDEX", bind_uniform_sets_instruction->first_set_index);
|
||||
print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_sets_instruction->uniform_set_ids()[i].id), "START INDEX", bind_uniform_sets_instruction->first_set_index, "DYNAMIC_OFFSETS", bind_uniform_sets_instruction->dynamic_offsets_mask);
|
||||
}
|
||||
instruction_data_cursor += sizeof(DrawListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
|
||||
} break;
|
||||
|
|
@ -1532,7 +1532,7 @@ void RenderingDeviceGraph::_print_compute_list(const uint8_t *p_instruction_data
|
|||
const ComputeListBindUniformSetsInstruction *bind_uniform_sets_instruction = reinterpret_cast<const ComputeListBindUniformSetsInstruction *>(instruction);
|
||||
print_line("\tBIND UNIFORM SETS COUNT", bind_uniform_sets_instruction->set_count);
|
||||
for (uint32_t i = 0; i < bind_uniform_sets_instruction->set_count; i++) {
|
||||
print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_sets_instruction->uniform_set_ids()[i].id), "START INDEX", bind_uniform_sets_instruction->first_set_index);
|
||||
print_line("\tBIND UNIFORM SET ID", itos(bind_uniform_sets_instruction->uniform_set_ids()[i].id), "START INDEX", bind_uniform_sets_instruction->first_set_index, "DYNAMIC_OFFSETS", bind_uniform_sets_instruction->dynamic_offsets_mask);
|
||||
}
|
||||
instruction_data_cursor += sizeof(ComputeListBindUniformSetsInstruction) + sizeof(RDD::UniformSetID) * bind_uniform_sets_instruction->set_count;
|
||||
} break;
|
||||
|
|
@ -1746,6 +1746,7 @@ void RenderingDeviceGraph::add_compute_list_bind_uniform_sets(RDD::ShaderID p_sh
|
|||
instruction->shader = p_shader;
|
||||
instruction->first_set_index = p_first_set_index;
|
||||
instruction->set_count = p_set_count;
|
||||
instruction->dynamic_offsets_mask = driver->uniform_sets_get_dynamic_offsets(p_uniform_sets, p_shader, p_first_set_index, p_set_count);
|
||||
|
||||
RDD::UniformSetID *ids = instruction->uniform_set_ids();
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
|
|
@ -1864,6 +1865,7 @@ void RenderingDeviceGraph::add_draw_list_bind_uniform_sets(RDD::ShaderID p_shade
|
|||
instruction->shader = p_shader;
|
||||
instruction->first_set_index = p_first_index;
|
||||
instruction->set_count = p_set_count;
|
||||
instruction->dynamic_offsets_mask = driver->uniform_sets_get_dynamic_offsets(p_uniform_sets, p_shader, p_first_index, p_set_count);
|
||||
|
||||
for (uint32_t i = 0; i < p_set_count; i++) {
|
||||
instruction->uniform_set_ids()[i] = p_uniform_sets[i];
|
||||
|
|
|
|||
|
|
@ -489,6 +489,7 @@ private:
|
|||
RDD::ShaderID shader;
|
||||
uint32_t first_set_index = 0;
|
||||
uint32_t set_count = 0;
|
||||
uint32_t dynamic_offsets_mask = 0u;
|
||||
|
||||
_FORCE_INLINE_ RDD::UniformSetID *uniform_set_ids() {
|
||||
return reinterpret_cast<RDD::UniformSetID *>(&this[1]);
|
||||
|
|
@ -620,6 +621,7 @@ private:
|
|||
RDD::ShaderID shader;
|
||||
uint32_t first_set_index = 0;
|
||||
uint32_t set_count = 0;
|
||||
uint32_t dynamic_offsets_mask = 0u;
|
||||
|
||||
_FORCE_INLINE_ RDD::UniformSetID *uniform_set_ids() {
|
||||
return reinterpret_cast<RDD::UniformSetID *>(&this[1]);
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@
|
|||
|
||||
#include "core/io/compression.h"
|
||||
|
||||
#include "servers/rendering/renderer_rd/shader_rd.h"
|
||||
#include "thirdparty/spirv-reflect/spirv_reflect.h"
|
||||
|
||||
static inline uint32_t aligned_to(uint32_t p_size, uint32_t p_alignment) {
|
||||
|
|
@ -138,6 +139,8 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
|
|||
r_refl[i].shader_stage = p_spirv[i].shader_stage;
|
||||
r_refl[i]._spirv_data = p_spirv[i].spirv;
|
||||
|
||||
const Vector<uint64_t> &dynamic_buffers = p_spirv[i].dynamic_buffers;
|
||||
|
||||
if (p_spirv[i].shader_stage == RDC::SHADER_STAGE_COMPUTE) {
|
||||
reflection.is_compute = true;
|
||||
ERR_FAIL_COND_V_MSG(spirv_size != 1, FAILED,
|
||||
|
|
@ -217,11 +220,23 @@ Error RenderingShaderContainer::reflect_spirv(const String &p_shader_name, Span<
|
|||
may_be_writable = true;
|
||||
} break;
|
||||
case SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER: {
|
||||
uniform.type = RDC::UNIFORM_TYPE_UNIFORM_BUFFER;
|
||||
const uint64_t key = ShaderRD::DynamicBuffer::encode(binding.set, binding.binding);
|
||||
if (dynamic_buffers.has(key)) {
|
||||
uniform.type = RDC::UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC;
|
||||
reflection.has_dynamic_buffers = true;
|
||||
} else {
|
||||
uniform.type = RDC::UNIFORM_TYPE_UNIFORM_BUFFER;
|
||||
}
|
||||
need_block_size = true;
|
||||
} break;
|
||||
case SPV_REFLECT_DESCRIPTOR_TYPE_STORAGE_BUFFER: {
|
||||
uniform.type = RDC::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
const uint64_t key = ShaderRD::DynamicBuffer::encode(binding.set, binding.binding);
|
||||
if (dynamic_buffers.has(key)) {
|
||||
uniform.type = RDC::UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC;
|
||||
reflection.has_dynamic_buffers = true;
|
||||
} else {
|
||||
uniform.type = RDC::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
}
|
||||
need_block_size = true;
|
||||
may_be_writable = true;
|
||||
} break;
|
||||
|
|
@ -486,6 +501,7 @@ void RenderingShaderContainer::set_from_shader_reflection(const RenderingDeviceC
|
|||
reflection_data.specialization_constants_count = p_reflection.specialization_constants.size();
|
||||
reflection_data.is_compute = p_reflection.is_compute;
|
||||
reflection_data.has_multiview = p_reflection.has_multiview;
|
||||
reflection_data.has_dynamic_buffers = p_reflection.has_dynamic_buffers;
|
||||
reflection_data.compute_local_size[0] = p_reflection.compute_local_size[0];
|
||||
reflection_data.compute_local_size[1] = p_reflection.compute_local_size[1];
|
||||
reflection_data.compute_local_size[2] = p_reflection.compute_local_size[2];
|
||||
|
|
@ -542,6 +558,7 @@ RenderingDeviceCommons::ShaderReflection RenderingShaderContainer::get_shader_re
|
|||
shader_refl.fragment_output_mask = reflection_data.fragment_output_mask;
|
||||
shader_refl.is_compute = reflection_data.is_compute;
|
||||
shader_refl.has_multiview = reflection_data.has_multiview;
|
||||
shader_refl.has_dynamic_buffers = reflection_data.has_dynamic_buffers;
|
||||
shader_refl.compute_local_size[0] = reflection_data.compute_local_size[0];
|
||||
shader_refl.compute_local_size[1] = reflection_data.compute_local_size[1];
|
||||
shader_refl.compute_local_size[2] = reflection_data.compute_local_size[2];
|
||||
|
|
|
|||
|
|
@ -57,6 +57,7 @@ protected:
|
|||
uint32_t specialization_constants_count = 0;
|
||||
uint32_t is_compute = 0;
|
||||
uint32_t has_multiview = 0;
|
||||
uint32_t has_dynamic_buffers = 0;
|
||||
uint32_t compute_local_size[3] = {};
|
||||
uint32_t set_count = 0;
|
||||
uint32_t push_constant_size = 0;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue