mirror of
https://github.com/godotengine/godot.git
synced 2025-10-19 07:53:26 +00:00
Apple: Use image atomic operations on supported Apple hardware
Co-authored-by: A Thousand Ships <96648715+AThousandShips@users.noreply.github.com>
This commit is contained in:
parent
9b22b41531
commit
5230f6c60c
32 changed files with 5354 additions and 719 deletions
|
@ -2490,6 +2490,9 @@
|
|||
<constant name="SUPPORTS_BUFFER_DEVICE_ADDRESS" value="6" enum="Features">
|
||||
Features support for buffer device address extension.
|
||||
</constant>
|
||||
<constant name="SUPPORTS_IMAGE_ATOMIC_32_BIT" value="7" enum="Features">
|
||||
Support for 32-bit image atomic operations.
|
||||
</constant>
|
||||
<constant name="LIMIT_MAX_BOUND_UNIFORM_SETS" value="0" enum="Limit">
|
||||
Maximum number of uniform sets that can be bound at a given time.
|
||||
</constant>
|
||||
|
|
56
drivers/apple/foundation_helpers.h
Normal file
56
drivers/apple/foundation_helpers.h
Normal file
|
@ -0,0 +1,56 @@
|
|||
/**************************************************************************/
|
||||
/* foundation_helpers.h */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#pragma once
|
||||
|
||||
#import <Foundation/NSString.h>
|
||||
|
||||
class String;
|
||||
template <typename T>
|
||||
class CharStringT;
|
||||
|
||||
using CharString = CharStringT<char>;
|
||||
|
||||
namespace conv {
|
||||
|
||||
/**
|
||||
* Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer.
|
||||
* */
|
||||
NSString *to_nsstring(const String &p_str);
|
||||
/**
|
||||
* Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer.
|
||||
* */
|
||||
NSString *to_nsstring(const CharString &p_str);
|
||||
/**
|
||||
* Converts an NSString to a Godot String without allocating intermediate buffers.
|
||||
* */
|
||||
String to_string(NSString *p_str);
|
||||
|
||||
} //namespace conv
|
85
drivers/apple/foundation_helpers.mm
Normal file
85
drivers/apple/foundation_helpers.mm
Normal file
|
@ -0,0 +1,85 @@
|
|||
/**************************************************************************/
|
||||
/* foundation_helpers.mm */
|
||||
/**************************************************************************/
|
||||
/* This file is part of: */
|
||||
/* GODOT ENGINE */
|
||||
/* https://godotengine.org */
|
||||
/**************************************************************************/
|
||||
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
|
||||
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
|
||||
/* */
|
||||
/* Permission is hereby granted, free of charge, to any person obtaining */
|
||||
/* a copy of this software and associated documentation files (the */
|
||||
/* "Software"), to deal in the Software without restriction, including */
|
||||
/* without limitation the rights to use, copy, modify, merge, publish, */
|
||||
/* distribute, sublicense, and/or sell copies of the Software, and to */
|
||||
/* permit persons to whom the Software is furnished to do so, subject to */
|
||||
/* the following conditions: */
|
||||
/* */
|
||||
/* The above copyright notice and this permission notice shall be */
|
||||
/* included in all copies or substantial portions of the Software. */
|
||||
/* */
|
||||
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
|
||||
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
|
||||
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
|
||||
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
|
||||
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
|
||||
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
|
||||
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
|
||||
/**************************************************************************/
|
||||
|
||||
#import "foundation_helpers.h"
|
||||
|
||||
#import "core/string/ustring.h"
|
||||
|
||||
#import <CoreFoundation/CFString.h>
|
||||
|
||||
namespace conv {
|
||||
|
||||
NSString *to_nsstring(const String &p_str) {
|
||||
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
|
||||
length:p_str.length() * sizeof(char32_t)
|
||||
encoding:NSUTF32LittleEndianStringEncoding];
|
||||
}
|
||||
|
||||
NSString *to_nsstring(const CharString &p_str) {
|
||||
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
|
||||
length:p_str.length()
|
||||
encoding:NSUTF8StringEncoding];
|
||||
}
|
||||
|
||||
String to_string(NSString *p_str) {
|
||||
CFStringRef str = (__bridge CFStringRef)p_str;
|
||||
CFStringEncoding fastest = CFStringGetFastestEncoding(str);
|
||||
// Sometimes, CFString will return a pointer to it's encoded data,
|
||||
// so we can create the string without allocating intermediate buffers.
|
||||
const char *p = CFStringGetCStringPtr(str, fastest);
|
||||
if (p) {
|
||||
switch (fastest) {
|
||||
case kCFStringEncodingASCII:
|
||||
return String::ascii(Span(p, CFStringGetLength(str)));
|
||||
case kCFStringEncodingUTF8:
|
||||
return String::utf8(p);
|
||||
case kCFStringEncodingUTF32LE:
|
||||
return String::utf32(Span((char32_t *)p, CFStringGetLength(str)));
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
CFRange range = CFRangeMake(0, CFStringGetLength(str));
|
||||
CFIndex byte_len = 0;
|
||||
// Try to losslessly convert the string directly into a String's buffer to avoid intermediate allocations.
|
||||
CFIndex n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, nil, 0, &byte_len);
|
||||
if (n == range.length) {
|
||||
String res;
|
||||
res.resize_uninitialized((byte_len / sizeof(char32_t)) + 1);
|
||||
res[n] = 0;
|
||||
n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, (UInt8 *)res.ptrw(), res.length() * sizeof(char32_t), nil);
|
||||
return res;
|
||||
}
|
||||
|
||||
return String::utf8(p_str.UTF8String);
|
||||
}
|
||||
|
||||
} //namespace conv
|
|
@ -5586,6 +5586,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {
|
|||
return true;
|
||||
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
|
||||
return true;
|
||||
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -12,7 +12,6 @@ thirdparty_obj = []
|
|||
thirdparty_dir = "#thirdparty/spirv-cross/"
|
||||
thirdparty_sources = [
|
||||
"spirv_cfg.cpp",
|
||||
"spirv_cross_util.cpp",
|
||||
"spirv_cross.cpp",
|
||||
"spirv_parser.cpp",
|
||||
"spirv_msl.cpp",
|
||||
|
|
|
@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
|
|||
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
|
||||
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
|
||||
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
|
||||
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
|
||||
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
|
||||
};
|
||||
|
||||
struct MetalLimits {
|
||||
|
|
|
@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
|
|||
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
|
||||
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
|
||||
features.argument_buffers_tier = p_device.argumentBuffersSupport;
|
||||
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
|
||||
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
|
||||
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
|
||||
features.supports_image_atomic_32_bit = false;
|
||||
features.supports_image_atomic_64_bit = false;
|
||||
}
|
||||
|
||||
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
|
||||
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);
|
||||
|
|
|
@ -309,9 +309,23 @@ public:
|
|||
|
||||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
|
||||
private:
|
||||
#pragma mark - Common State
|
||||
|
||||
// From RenderingDevice
|
||||
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
|
||||
|
||||
RenderingDeviceDriverMetal *device_driver = nullptr;
|
||||
id<MTLCommandQueue> queue = nil;
|
||||
id<MTLCommandBuffer> commandBuffer = nil;
|
||||
bool state_begin = false;
|
||||
|
||||
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
|
||||
DEV_ASSERT(state_begin);
|
||||
if (commandBuffer == nil) {
|
||||
commandBuffer = queue.commandBuffer;
|
||||
}
|
||||
return commandBuffer;
|
||||
}
|
||||
|
||||
void _end_compute_dispatch();
|
||||
void _end_blit();
|
||||
|
@ -326,6 +340,11 @@ private:
|
|||
void _end_render_pass();
|
||||
void _render_clear_render_area();
|
||||
|
||||
#pragma mark - Compute
|
||||
|
||||
void _compute_set_dirty_state();
|
||||
void _compute_bind_uniform_sets();
|
||||
|
||||
public:
|
||||
MDCommandBufferStateType type = MDCommandBufferStateType::None;
|
||||
|
||||
|
@ -349,18 +368,18 @@ public:
|
|||
LocalVector<NSUInteger> vertex_offsets;
|
||||
ResourceUsageMap resource_usage;
|
||||
// clang-format off
|
||||
enum DirtyFlag: uint8_t {
|
||||
DIRTY_NONE = 0b0000'0000,
|
||||
DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
|
||||
DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
|
||||
DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
|
||||
DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
|
||||
DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
|
||||
DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
|
||||
DIRTY_BLEND = 0b0100'0000, //! blend state
|
||||
DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
|
||||
|
||||
DIRTY_ALL = 0xff,
|
||||
enum DirtyFlag: uint16_t {
|
||||
DIRTY_NONE = 0,
|
||||
DIRTY_PIPELINE = 1 << 0, //! pipeline state
|
||||
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
|
||||
DIRTY_PUSH = 1 << 2, //! push constants
|
||||
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
|
||||
DIRTY_VERTEX = 1 << 4, //! vertex buffers
|
||||
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
|
||||
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
|
||||
DIRTY_BLEND = 1 << 7, //! blend state
|
||||
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
|
||||
DIRTY_ALL = (1 << 9) - 1,
|
||||
};
|
||||
// clang-format on
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
|
@ -368,6 +387,9 @@ public:
|
|||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[2] = { 0 };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
|
@ -422,6 +444,13 @@ public:
|
|||
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_push_constants_dirty() {
|
||||
if (push_constant_data_len == 0) {
|
||||
return;
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_blend_dirty() {
|
||||
if (!blend_constants.has_value()) {
|
||||
return;
|
||||
|
@ -464,16 +493,46 @@ public:
|
|||
MDComputePipeline *pipeline = nullptr;
|
||||
id<MTLComputeCommandEncoder> encoder = nil;
|
||||
ResourceUsageMap resource_usage;
|
||||
_FORCE_INLINE_ void reset() {
|
||||
pipeline = nil;
|
||||
encoder = nil;
|
||||
// Keep the keys, as they are likely to be used again.
|
||||
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
|
||||
kv.value.clear();
|
||||
// clang-format off
|
||||
enum DirtyFlag: uint16_t {
|
||||
DIRTY_NONE = 0,
|
||||
DIRTY_PIPELINE = 1 << 0, //! pipeline state
|
||||
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
|
||||
DIRTY_PUSH = 1 << 2, //! push constants
|
||||
DIRTY_ALL = (1 << 3) - 1,
|
||||
};
|
||||
// clang-format on
|
||||
BitField<DirtyFlag> dirty = DIRTY_NONE;
|
||||
|
||||
LocalVector<MDUniformSet *> uniform_sets;
|
||||
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
|
||||
uint64_t uniform_set_mask = 0;
|
||||
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
|
||||
uint32_t push_constant_data_len = 0;
|
||||
uint32_t push_constant_bindings[1] = { 0 };
|
||||
|
||||
_FORCE_INLINE_ void reset();
|
||||
void end_encoding();
|
||||
|
||||
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
|
||||
if (uniform_sets.is_empty()) {
|
||||
return;
|
||||
}
|
||||
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
|
||||
if (uniform_sets[i] != nullptr) {
|
||||
uniform_set_mask |= 1 << i;
|
||||
}
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
|
||||
}
|
||||
|
||||
_FORCE_INLINE_ void mark_push_constants_dirty() {
|
||||
if (push_constant_data_len == 0) {
|
||||
return;
|
||||
}
|
||||
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
|
||||
}
|
||||
|
||||
void end_encoding();
|
||||
} compute;
|
||||
|
||||
// State specific to a blit pass.
|
||||
|
@ -496,6 +555,7 @@ public:
|
|||
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
|
||||
|
||||
void bind_pipeline(RDD::PipelineID p_pipeline);
|
||||
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
|
||||
|
||||
#pragma mark - Render Commands
|
||||
|
||||
|
@ -661,8 +721,6 @@ public:
|
|||
Vector<UniformSet> sets;
|
||||
bool uses_argument_buffers = true;
|
||||
|
||||
virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
|
||||
|
||||
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
|
||||
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
|
||||
virtual ~MDShader() = default;
|
||||
|
@ -671,15 +729,13 @@ public:
|
|||
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
|
||||
public:
|
||||
struct {
|
||||
uint32_t binding = -1;
|
||||
int32_t binding = -1;
|
||||
uint32_t size = 0;
|
||||
} push_constants;
|
||||
MTLSize local = {};
|
||||
|
||||
MDLibrary *kernel;
|
||||
|
||||
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
|
||||
|
||||
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
|
||||
};
|
||||
|
||||
|
@ -700,8 +756,6 @@ public:
|
|||
MDLibrary *vert;
|
||||
MDLibrary *frag;
|
||||
|
||||
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
|
||||
|
||||
MDRenderShader(CharString p_name,
|
||||
Vector<UniformSet> p_sets,
|
||||
bool p_needs_view_mask_buffer,
|
||||
|
|
|
@ -62,8 +62,8 @@
|
|||
#undef MAX
|
||||
|
||||
void MDCommandBuffer::begin() {
|
||||
DEV_ASSERT(commandBuffer == nil);
|
||||
commandBuffer = queue.commandBuffer;
|
||||
DEV_ASSERT(commandBuffer == nil && !state_begin);
|
||||
state_begin = true;
|
||||
}
|
||||
|
||||
void MDCommandBuffer::end() {
|
||||
|
@ -83,6 +83,7 @@ void MDCommandBuffer::commit() {
|
|||
end();
|
||||
[commandBuffer commit];
|
||||
commandBuffer = nil;
|
||||
state_begin = false;
|
||||
}
|
||||
|
||||
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
||||
|
@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
|||
render.desc.colorAttachments[0].resolveTexture = res_tex;
|
||||
}
|
||||
#endif
|
||||
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
|
||||
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc];
|
||||
}
|
||||
|
||||
if (render.pipeline != rp) {
|
||||
|
@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
|
|||
DEV_ASSERT(type == MDCommandBufferStateType::None);
|
||||
type = MDCommandBufferStateType::Compute;
|
||||
|
||||
compute.pipeline = (MDComputePipeline *)p;
|
||||
compute.encoder = commandBuffer.computeCommandEncoder;
|
||||
[compute.encoder setComputePipelineState:compute.pipeline->state];
|
||||
if (compute.pipeline != p) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
|
||||
compute.mark_uniforms_dirty();
|
||||
compute.pipeline = (MDComputePipeline *)p;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {
|
||||
switch (type) {
|
||||
case MDCommandBufferStateType::Render: {
|
||||
MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
|
||||
if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) {
|
||||
return;
|
||||
}
|
||||
render.push_constant_bindings[0] = shader->push_constants.vert.binding;
|
||||
render.push_constant_bindings[1] = shader->push_constants.frag.binding;
|
||||
void const *ptr = p_data.ptr();
|
||||
render.push_constant_data_len = p_data.size() * sizeof(uint32_t);
|
||||
DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data));
|
||||
memcpy(render.push_constant_data, ptr, render.push_constant_data_len);
|
||||
render.mark_push_constants_dirty();
|
||||
} break;
|
||||
case MDCommandBufferStateType::Compute: {
|
||||
MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
|
||||
if (shader->push_constants.binding == -1) {
|
||||
return;
|
||||
}
|
||||
compute.push_constant_bindings[0] = shader->push_constants.binding;
|
||||
void const *ptr = p_data.ptr();
|
||||
compute.push_constant_data_len = p_data.size() * sizeof(uint32_t);
|
||||
DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data));
|
||||
memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len);
|
||||
compute.mark_push_constants_dirty();
|
||||
} break;
|
||||
case MDCommandBufferStateType::Blit:
|
||||
case MDCommandBufferStateType::None:
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -181,7 +217,7 @@ id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() {
|
|||
}
|
||||
|
||||
type = MDCommandBufferStateType::Blit;
|
||||
blit.encoder = commandBuffer.blitCommandEncoder;
|
||||
blit.encoder = command_buffer().blitCommandEncoder;
|
||||
return blit.encoder;
|
||||
}
|
||||
|
||||
|
@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
|
|||
break;
|
||||
}
|
||||
|
||||
id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
|
||||
id<MTLRenderCommandEncoder> enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc];
|
||||
if (p_label != nil) {
|
||||
[enc pushDebugGroup:p_label];
|
||||
[enc popDebugGroup];
|
||||
|
@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
|
|||
void MDCommandBuffer::_render_set_dirty_state() {
|
||||
_render_bind_uniform_sets();
|
||||
|
||||
if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
|
||||
if (render.push_constant_bindings[0] != (uint32_t)-1) {
|
||||
[render.encoder setVertexBytes:render.push_constant_data
|
||||
length:render.push_constant_data_len
|
||||
atIndex:render.push_constant_bindings[0]];
|
||||
}
|
||||
if (render.push_constant_bindings[1] != (uint32_t)-1) {
|
||||
[render.encoder setFragmentBytes:render.push_constant_data
|
||||
length:render.push_constant_data_len
|
||||
atIndex:render.push_constant_bindings[1]];
|
||||
}
|
||||
}
|
||||
|
||||
MDSubpass const &subpass = render.get_subpass();
|
||||
if (subpass.view_count > 1) {
|
||||
uint32_t view_range[2] = { 0, subpass.view_count };
|
||||
|
@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
|
|||
}
|
||||
|
||||
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
|
||||
DEV_ASSERT(commandBuffer != nil);
|
||||
DEV_ASSERT(command_buffer() != nil);
|
||||
end();
|
||||
|
||||
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
|
||||
|
@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() {
|
|||
}
|
||||
|
||||
void MDCommandBuffer::render_next_subpass() {
|
||||
DEV_ASSERT(commandBuffer != nil);
|
||||
DEV_ASSERT(command_buffer() != nil);
|
||||
|
||||
if (render.current_subpass == UINT32_MAX) {
|
||||
render.current_subpass = 0;
|
||||
|
@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() {
|
|||
// the defaultRasterSampleCount from the pipeline's sample count.
|
||||
render.desc = desc;
|
||||
} else {
|
||||
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
|
||||
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc];
|
||||
|
||||
if (!render.is_rendering_entire_area) {
|
||||
_render_clear_render_area();
|
||||
|
@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() {
|
|||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
uniform_set_mask = 0;
|
||||
push_constant_data_len = 0;
|
||||
clear_values.clear();
|
||||
viewports.clear();
|
||||
scissors.clear();
|
||||
|
@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() {
|
|||
|
||||
#pragma mark - Compute
|
||||
|
||||
void MDCommandBuffer::_compute_set_dirty_state() {
|
||||
if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
|
||||
compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent];
|
||||
[compute.encoder setComputePipelineState:compute.pipeline->state];
|
||||
}
|
||||
|
||||
_compute_bind_uniform_sets();
|
||||
|
||||
if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
|
||||
if (compute.push_constant_bindings[0] != (uint32_t)-1) {
|
||||
[compute.encoder setBytes:compute.push_constant_data
|
||||
length:compute.push_constant_data_len
|
||||
atIndex:compute.push_constant_bindings[0]];
|
||||
}
|
||||
}
|
||||
|
||||
compute.dirty.clear();
|
||||
}
|
||||
|
||||
void MDCommandBuffer::_compute_bind_uniform_sets() {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
|
||||
return;
|
||||
}
|
||||
|
||||
compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
uint64_t set_uniforms = compute.uniform_set_mask;
|
||||
compute.uniform_set_mask = 0;
|
||||
|
||||
MDComputeShader *shader = compute.pipeline->shader;
|
||||
|
||||
while (set_uniforms != 0) {
|
||||
// Find the index of the next set bit.
|
||||
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
|
||||
// Clear the set bit.
|
||||
set_uniforms &= (set_uniforms - 1);
|
||||
MDUniformSet *set = compute.uniform_sets[index];
|
||||
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
|
||||
continue;
|
||||
}
|
||||
set->bind_uniforms(shader, compute, index);
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::ComputeState::reset() {
|
||||
pipeline = nil;
|
||||
encoder = nil;
|
||||
dirty = DIRTY_NONE;
|
||||
uniform_sets.clear();
|
||||
uniform_set_mask = 0;
|
||||
push_constant_data_len = 0;
|
||||
// Keep the keys, as they are likely to be used again.
|
||||
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
|
||||
kv.value.clear();
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
|
||||
set->bind_uniforms(shader, compute, p_set_index);
|
||||
if (compute.uniform_sets.size() <= p_set_index) {
|
||||
uint32_t s = render.uniform_sets.size();
|
||||
compute.uniform_sets.resize(p_set_index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[p_set_index] != set) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << p_set_index;
|
||||
compute.uniform_sets[p_set_index] = set;
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
|
||||
// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
|
||||
for (size_t i = 0u; i < p_set_count; ++i) {
|
||||
for (size_t i = 0; i < p_set_count; ++i) {
|
||||
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
|
||||
set->bind_uniforms(shader, compute, p_first_set_index + i);
|
||||
|
||||
uint32_t index = p_first_set_index + i;
|
||||
if (compute.uniform_sets.size() <= index) {
|
||||
uint32_t s = compute.uniform_sets.size();
|
||||
compute.uniform_sets.resize(index + 1);
|
||||
// Set intermediate values to null.
|
||||
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
|
||||
}
|
||||
|
||||
if (compute.uniform_sets[index] != set) {
|
||||
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
|
||||
compute.uniform_set_mask |= 1ULL << index;
|
||||
compute.uniform_sets[index] = set;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
_compute_set_dirty_state();
|
||||
|
||||
MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = compute.encoder;
|
||||
|
@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups,
|
|||
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
|
||||
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
|
||||
|
||||
_compute_set_dirty_state();
|
||||
|
||||
id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = compute.encoder;
|
||||
|
@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name,
|
|||
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
|
||||
}
|
||||
|
||||
void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
|
||||
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
|
||||
if (push_constants.binding == (uint32_t)-1) {
|
||||
return;
|
||||
}
|
||||
|
||||
id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder;
|
||||
|
||||
void const *ptr = p_data.ptr();
|
||||
size_t length = p_data.size() * sizeof(uint32_t);
|
||||
|
||||
[enc setBytes:ptr length:length atIndex:push_constants.binding];
|
||||
}
|
||||
|
||||
MDRenderShader::MDRenderShader(CharString p_name,
|
||||
Vector<UniformSet> p_sets,
|
||||
bool p_needs_view_mask_buffer,
|
||||
|
@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name,
|
|||
frag(p_frag) {
|
||||
}
|
||||
|
||||
void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
|
||||
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
|
||||
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;
|
||||
|
||||
void const *ptr = p_data.ptr();
|
||||
size_t length = p_data.size() * sizeof(uint32_t);
|
||||
|
||||
if (push_constants.vert.binding > -1) {
|
||||
[enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
|
||||
}
|
||||
|
||||
if (push_constants.frag.binding > -1) {
|
||||
[enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
|
||||
}
|
||||
}
|
||||
|
||||
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
|
||||
DEV_ASSERT(p_shader->uses_argument_buffers);
|
||||
DEV_ASSERT(p_state.encoder != nil);
|
||||
|
|
|
@ -58,6 +58,7 @@
|
|||
#include "core/io/marshalls.h"
|
||||
#include "core/string/ustring.h"
|
||||
#include "core/templates/hash_map.h"
|
||||
#include "drivers/apple/foundation_helpers.h"
|
||||
|
||||
#import <Metal/MTLTexture.h>
|
||||
#import <Metal/Metal.h>
|
||||
|
@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
|||
desc.usage |= MTLTextureUsageShaderWrite;
|
||||
}
|
||||
|
||||
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
|
||||
if (format_caps & kMTLFmtCapsAtomic) {
|
||||
desc.usage |= MTLTextureUsageShaderAtomic;
|
||||
}
|
||||
}
|
||||
|
||||
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
|
||||
|
||||
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
|
||||
|
@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
|||
desc.usage |= MTLTextureUsageShaderRead;
|
||||
}
|
||||
|
||||
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
|
||||
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
|
||||
if (format_caps & kMTLFmtCapsAtomic) {
|
||||
desc.usage |= MTLTextureUsageShaderAtomic;
|
||||
} else {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported.");
|
||||
}
|
||||
} else {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version.");
|
||||
}
|
||||
}
|
||||
|
||||
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
|
||||
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
|
||||
}
|
||||
|
@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
|
|||
|
||||
// Check if it is a linear format for atomic operations and therefore needs a buffer,
|
||||
// as generally Metal does not support atomic operations on textures.
|
||||
bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT));
|
||||
bool needs_buffer = is_linear;
|
||||
|
||||
// Check for atomic requirements.
|
||||
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) {
|
||||
switch (p_format.format) {
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32_SINT:
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: {
|
||||
if (!device_properties->features.supports_image_atomic_32_bit) {
|
||||
// We can emulate 32-bit atomic operations on textures.
|
||||
needs_buffer = true;
|
||||
}
|
||||
} break;
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT:
|
||||
case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: {
|
||||
if (!device_properties->features.supports_image_atomic_64_bit) {
|
||||
// No emulation for 64-bit atomics.
|
||||
ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported.");
|
||||
}
|
||||
} break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
id<MTLTexture> obj = nil;
|
||||
if (needs_buffer) {
|
||||
|
@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
|
|||
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
|
||||
Fence *fence = (Fence *)(p_cmd_fence.id);
|
||||
if (fence != nullptr) {
|
||||
[cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
|
||||
id<MTLCommandBuffer> cb = cmd_buffer->get_command_buffer();
|
||||
if (cb == nil) {
|
||||
// If there is nothing to do, signal the fence immediately.
|
||||
dispatch_semaphore_signal(fence->semaphore);
|
||||
}];
|
||||
} else {
|
||||
[cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
|
||||
dispatch_semaphore_signal(fence->semaphore);
|
||||
}];
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
|
||||
|
@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
|
|||
|
||||
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
|
||||
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
|
||||
MDShader *shader = (MDShader *)(p_shader.id);
|
||||
shader->encode_push_constant_data(p_data, cb);
|
||||
cb->encode_push_constant_data(p_shader, p_data);
|
||||
}
|
||||
|
||||
// ----- CACHE -----
|
||||
|
@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s
|
|||
|
||||
MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
|
||||
desc.computeFunction = function;
|
||||
desc.label = conv::to_nsstring(shader->name);
|
||||
if (archive) {
|
||||
desc.binaryArchives = @[ archive ];
|
||||
}
|
||||
|
@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
|
|||
return device_properties->features.metal_fx_spatial;
|
||||
case SUPPORTS_METALFX_TEMPORAL:
|
||||
return device_properties->features.metal_fx_temporal;
|
||||
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
|
||||
return device_properties->features.supports_image_atomic_32_bit;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
|||
set_indexes[set_indexes_size - 1] = offset;
|
||||
}
|
||||
CompilerMSL::Options msl_options{};
|
||||
// MAJOR * 10000 + MINOR * 100
|
||||
uint32_t msl_version = CompilerMSL::Options::make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
|
||||
msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
|
||||
mtl_reflection_data.msl_version = msl_options.msl_version;
|
||||
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
|
||||
|
@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
|||
}
|
||||
|
||||
bool disable_argument_buffers = false;
|
||||
if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
|
||||
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
|
||||
disable_argument_buffers = true;
|
||||
}
|
||||
|
||||
|
@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
|||
msl_options.multiview_layered_rendering = true;
|
||||
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
|
||||
}
|
||||
if (msl_version >= CompilerMSL::Options::make_msl_version(3, 2)) {
|
||||
// All 3.2+ versions support device coherence, so we can disable texture fences.
|
||||
msl_options.readwrite_texture_fences = false;
|
||||
}
|
||||
|
||||
CompilerGLSL::Options options{};
|
||||
options.vertex.flip_vert_y = true;
|
||||
|
@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
|
|||
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
|
||||
primary.texture_type = MTLTextureTypeTextureBuffer;
|
||||
} break;
|
||||
case spv::DimTileImageDataEXT: {
|
||||
// Godot does not use this extension.
|
||||
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
|
||||
} break;
|
||||
case spv::DimMax: {
|
||||
// Add all enumerations to silence the compiler warning
|
||||
// and generate future warnings, should a new one be added.
|
||||
|
|
|
@ -5894,6 +5894,13 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) {
|
|||
return true;
|
||||
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
|
||||
return buffer_device_address_support;
|
||||
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
|
||||
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
|
||||
// MoltenVK has previously had issues with 32-bit atomics on images.
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -417,6 +417,7 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) {
|
|||
width = fog_size.x;
|
||||
height = fog_size.y;
|
||||
depth = fog_size.z;
|
||||
atomic_type = RD::get_singleton()->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT) ? RD::UNIFORM_TYPE_IMAGE : RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
|
||||
RD::TextureFormat tf;
|
||||
tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
|
||||
|
@ -440,29 +441,29 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) {
|
|||
fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(fog_map, "Fog map");
|
||||
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
Vector<uint8_t> dm;
|
||||
dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
|
||||
if (atomic_type == RD::UNIFORM_TYPE_STORAGE_BUFFER) {
|
||||
Vector<uint8_t> dm;
|
||||
dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
|
||||
|
||||
density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
|
||||
light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
|
||||
emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
|
||||
#else
|
||||
tf.format = RD::DATA_FORMAT_R32_UINT;
|
||||
tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
|
||||
density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
|
||||
RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
|
||||
RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
|
||||
RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
#endif
|
||||
density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
|
||||
light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
|
||||
emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
|
||||
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
|
||||
} else {
|
||||
tf.format = RD::DATA_FORMAT_R32_UINT;
|
||||
tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_ATOMIC_BIT;
|
||||
density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
|
||||
RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
|
||||
RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
|
||||
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
|
||||
RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
|
||||
}
|
||||
|
||||
Vector<RD::Uniform> uniforms;
|
||||
{
|
||||
|
@ -579,11 +580,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
|
|||
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 1;
|
||||
u.append_id(fog->emissive_map);
|
||||
uniforms.push_back(u);
|
||||
|
@ -599,11 +596,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
|
|||
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 3;
|
||||
u.append_id(fog->density_map);
|
||||
uniforms.push_back(u);
|
||||
|
@ -611,11 +604,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
|
|||
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 4;
|
||||
u.append_id(fog->light_map);
|
||||
uniforms.push_back(u);
|
||||
|
@ -918,22 +907,14 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
|
|||
}
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 16;
|
||||
u.append_id(fog->density_map);
|
||||
uniforms.push_back(u);
|
||||
}
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 17;
|
||||
u.append_id(fog->light_map);
|
||||
uniforms.push_back(u);
|
||||
|
@ -941,11 +922,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
|
|||
|
||||
{
|
||||
RD::Uniform u;
|
||||
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
|
||||
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
|
||||
#else
|
||||
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
#endif
|
||||
u.uniform_type = fog->atomic_type;
|
||||
u.binding = 18;
|
||||
u.append_id(fog->emissive_map);
|
||||
uniforms.push_back(u);
|
||||
|
|
|
@ -316,6 +316,9 @@ public:
|
|||
|
||||
int last_shadow_filter = -1;
|
||||
|
||||
// If the device doesn't support image atomics, use storage buffers instead.
|
||||
RD::UniformType atomic_type = RD::UNIFORM_TYPE_IMAGE;
|
||||
|
||||
virtual void configure(RenderSceneBuffersRD *p_render_buffers) override {}
|
||||
virtual void free_data() override {}
|
||||
|
||||
|
|
|
@ -234,11 +234,13 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
|
|||
builder.append(String("#define ") + String(E.key) + "_CODE_USED\n");
|
||||
}
|
||||
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
|
||||
if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
|
||||
RenderingDevice *rd = RD::get_singleton();
|
||||
if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
|
||||
builder.append("#define MOLTENVK_USED\n");
|
||||
}
|
||||
// Image atomics are supported on Metal 3.1 but no support in MoltenVK or SPIRV-Cross yet.
|
||||
builder.append("#define NO_IMAGE_ATOMICS\n");
|
||||
if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) {
|
||||
builder.append("#define NO_IMAGE_ATOMICS\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
builder.append(String("#define RENDER_DRIVER_") + OS::get_singleton()->get_current_rendering_driver_name().to_upper() + "\n");
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#version 450
|
||||
|
||||
#pragma use_vulkan_memory_model
|
||||
|
||||
#VERSION_DEFINES
|
||||
|
||||
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;
|
||||
|
|
|
@ -2,6 +2,8 @@
|
|||
|
||||
#version 450
|
||||
|
||||
#pragma use_vulkan_memory_model
|
||||
|
||||
#VERSION_DEFINES
|
||||
|
||||
#ifdef MODE_DENSITY
|
||||
|
|
|
@ -7979,6 +7979,7 @@ void RenderingDevice::_bind_methods() {
|
|||
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_SPATIAL);
|
||||
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_TEMPORAL);
|
||||
BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_DEVICE_ADDRESS);
|
||||
BIND_ENUM_CONSTANT(SUPPORTS_IMAGE_ATOMIC_32_BIT);
|
||||
|
||||
BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS);
|
||||
BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS);
|
||||
|
|
|
@ -952,6 +952,7 @@ public:
|
|||
// If not supported, a fragment shader with only side effects (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
|
||||
SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
|
||||
SUPPORTS_BUFFER_DEVICE_ADDRESS,
|
||||
SUPPORTS_IMAGE_ATOMIC_32_BIT,
|
||||
};
|
||||
|
||||
enum SubgroupOperations {
|
||||
|
|
2
thirdparty/README.md
vendored
2
thirdparty/README.md
vendored
|
@ -978,7 +978,7 @@ Its version and license is described in this file under `hidapi`.
|
|||
## spirv-cross
|
||||
|
||||
- Upstream: https://github.com/KhronosGroup/SPIRV-Cross
|
||||
- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024)
|
||||
- Version: git (d7440cbc6c50332600fdf21c45e6a5df0b07e54c, 2025)
|
||||
- License: Apache 2.0
|
||||
|
||||
Files extracted from upstream source:
|
||||
|
|
2739
thirdparty/spirv-cross/spirv.hpp
vendored
2739
thirdparty/spirv-cross/spirv.hpp
vendored
File diff suppressed because it is too large
Load diff
66
thirdparty/spirv-cross/spirv_common.hpp
vendored
66
thirdparty/spirv-cross/spirv_common.hpp
vendored
|
@ -580,7 +580,10 @@ struct SPIRType : IVariant
|
|||
Interpolant,
|
||||
Char,
|
||||
// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
|
||||
MeshGridProperties
|
||||
MeshGridProperties,
|
||||
BFloat16,
|
||||
FloatE4M3,
|
||||
FloatE5M2
|
||||
};
|
||||
|
||||
// Scalar/vector/matrix support.
|
||||
|
@ -605,6 +608,14 @@ struct SPIRType : IVariant
|
|||
bool pointer = false;
|
||||
bool forward_pointer = false;
|
||||
|
||||
struct
|
||||
{
|
||||
uint32_t use_id = 0;
|
||||
uint32_t rows_id = 0;
|
||||
uint32_t columns_id = 0;
|
||||
uint32_t scope_id = 0;
|
||||
} cooperative;
|
||||
|
||||
spv::StorageClass storage = spv::StorageClassGeneric;
|
||||
|
||||
SmallVector<TypeID> member_types;
|
||||
|
@ -686,6 +697,7 @@ struct SPIREntryPoint
|
|||
FunctionID self = 0;
|
||||
std::string name;
|
||||
std::string orig_name;
|
||||
std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
|
||||
SmallVector<VariableID> interface_variables;
|
||||
|
||||
Bitset flags;
|
||||
|
@ -1026,6 +1038,9 @@ struct SPIRFunction : IVariant
|
|||
// consider arrays value types.
|
||||
SmallVector<ID> constant_arrays_needed_on_stack;
|
||||
|
||||
// Does this function (or any function called by it), emit geometry?
|
||||
bool emits_geometry = false;
|
||||
|
||||
bool active = false;
|
||||
bool flush_undeclared = true;
|
||||
bool do_combined_parameters = true;
|
||||
|
@ -1226,6 +1241,26 @@ struct SPIRConstant : IVariant
|
|||
return u.f32;
|
||||
}
|
||||
|
||||
static inline float fe4m3_to_f32(uint8_t v)
|
||||
{
|
||||
if ((v & 0x7f) == 0x7f)
|
||||
{
|
||||
union
|
||||
{
|
||||
float f32;
|
||||
uint32_t u32;
|
||||
} u;
|
||||
|
||||
u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
|
||||
return u.f32;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Reuse the FP16 to FP32 code. Cute bit-hackery.
|
||||
return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
|
||||
}
|
||||
}
|
||||
|
||||
inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
|
||||
{
|
||||
return m.c[col].id[row];
|
||||
|
@ -1266,6 +1301,24 @@ struct SPIRConstant : IVariant
|
|||
return f16_to_f32(scalar_u16(col, row));
|
||||
}
|
||||
|
||||
inline float scalar_bf16(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
uint32_t v = scalar_u16(col, row) << 16;
|
||||
float fp32;
|
||||
memcpy(&fp32, &v, sizeof(float));
|
||||
return fp32;
|
||||
}
|
||||
|
||||
inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return fe4m3_to_f32(scalar_u8(col, row));
|
||||
}
|
||||
|
||||
inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return f16_to_f32(scalar_u8(col, row) << 8);
|
||||
}
|
||||
|
||||
inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
|
||||
{
|
||||
return m.c[col].r[row].f32;
|
||||
|
@ -1336,9 +1389,10 @@ struct SPIRConstant : IVariant
|
|||
|
||||
SPIRConstant() = default;
|
||||
|
||||
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
|
||||
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
|
||||
: constant_type(constant_type_)
|
||||
, specialization(specialized)
|
||||
, replicated(replicated_)
|
||||
{
|
||||
subconstants.reserve(num_elements);
|
||||
for (uint32_t i = 0; i < num_elements; i++)
|
||||
|
@ -1410,9 +1464,16 @@ struct SPIRConstant : IVariant
|
|||
// If true, this is a LUT, and should always be declared in the outer scope.
|
||||
bool is_used_as_lut = false;
|
||||
|
||||
// If this is a null constant of array type with specialized length.
|
||||
// May require special handling in initializer
|
||||
bool is_null_array_specialized_length = false;
|
||||
|
||||
// For composites which are constant arrays, etc.
|
||||
SmallVector<ConstantID> subconstants;
|
||||
|
||||
// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
|
||||
bool replicated = false;
|
||||
|
||||
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
|
||||
// and uses them to initialize the constant. This allows the user
|
||||
// to still be able to specialize the value by supplying corresponding
|
||||
|
@ -1708,6 +1769,7 @@ struct Meta
|
|||
uint32_t spec_id = 0;
|
||||
uint32_t index = 0;
|
||||
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
|
||||
spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
|
||||
bool builtin = false;
|
||||
bool qualified_alias_explicit_override = false;
|
||||
|
||||
|
|
85
thirdparty/spirv-cross/spirv_cross.cpp
vendored
85
thirdparty/spirv-cross/spirv_cross.cpp
vendored
|
@ -82,7 +82,7 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
|
|||
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
|
||||
bool image = type.basetype == SPIRType::Image;
|
||||
bool counter = type.basetype == SPIRType::AtomicCounter;
|
||||
bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
|
||||
bool buffer_reference = type.storage == StorageClassPhysicalStorageBuffer;
|
||||
|
||||
bool is_restrict;
|
||||
if (ssbo)
|
||||
|
@ -171,6 +171,7 @@ bool Compiler::block_is_control_dependent(const SPIRBlock &block)
|
|||
case OpGroupNonUniformLogicalXor:
|
||||
case OpGroupNonUniformQuadBroadcast:
|
||||
case OpGroupNonUniformQuadSwap:
|
||||
case OpGroupNonUniformRotateKHR:
|
||||
|
||||
// Control barriers
|
||||
case OpControlBarrier:
|
||||
|
@ -210,6 +211,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
|
|||
|
||||
case OpCopyMemory:
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
{
|
||||
auto &type = expression_type(ops[0]);
|
||||
if (type.storage != StorageClassFunction)
|
||||
|
@ -370,6 +372,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
|
|||
}
|
||||
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
case OpImageRead:
|
||||
{
|
||||
// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
|
||||
|
@ -481,7 +484,7 @@ void Compiler::register_write(uint32_t chain)
|
|||
}
|
||||
}
|
||||
|
||||
if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
|
||||
if (type.storage == StorageClassPhysicalStorageBuffer || variable_storage_is_aliased(*var))
|
||||
flush_all_aliased_variables();
|
||||
else if (var)
|
||||
flush_dependees(*var);
|
||||
|
@ -587,6 +590,7 @@ const SPIRType &Compiler::expression_type(uint32_t id) const
|
|||
bool Compiler::expression_is_lvalue(uint32_t id) const
|
||||
{
|
||||
auto &type = expression_type(id);
|
||||
|
||||
switch (type.basetype)
|
||||
{
|
||||
case SPIRType::SampledImage:
|
||||
|
@ -818,6 +822,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
|
|||
|
||||
case OpAtomicStore:
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
// Invalid SPIR-V.
|
||||
if (length < 1)
|
||||
return false;
|
||||
|
@ -910,6 +915,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
|
|||
case OpInBoundsAccessChain:
|
||||
case OpPtrAccessChain:
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
case OpCopyObject:
|
||||
case OpImageTexelPointer:
|
||||
case OpAtomicLoad:
|
||||
|
@ -2364,6 +2370,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
|
|||
execution.output_primitives = arg0;
|
||||
break;
|
||||
|
||||
case ExecutionModeFPFastMathDefault:
|
||||
execution.fp_fast_math_defaults[arg0] = arg1;
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -3461,6 +3471,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
|||
switch (op)
|
||||
{
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
{
|
||||
if (length < 2)
|
||||
return false;
|
||||
|
@ -3581,6 +3592,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
|
|||
}
|
||||
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
{
|
||||
if (length < 3)
|
||||
return false;
|
||||
|
@ -3800,6 +3812,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
|
|||
switch (op)
|
||||
{
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
if (length < 2)
|
||||
return false;
|
||||
if (args[0] == variable_id)
|
||||
|
@ -3810,6 +3823,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
|
|||
break;
|
||||
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
if (length < 3)
|
||||
return false;
|
||||
if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
|
||||
|
@ -4285,6 +4299,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
|
|||
switch (op.op)
|
||||
{
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
case OpCopyMemory:
|
||||
if (ops[0] == var)
|
||||
return false;
|
||||
|
@ -4323,6 +4338,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
|
|||
|
||||
case OpCopyObject:
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
if (ops[2] == var)
|
||||
return true;
|
||||
break;
|
||||
|
@ -4350,6 +4366,39 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
|
|||
return true;
|
||||
}
|
||||
|
||||
bool Compiler::GeometryEmitDisocveryHandler::handle(spv::Op opcode, const uint32_t *, uint32_t)
|
||||
{
|
||||
if (opcode == OpEmitVertex || opcode == OpEndPrimitive)
|
||||
{
|
||||
for (auto *func : function_stack)
|
||||
func->emits_geometry = true;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler::GeometryEmitDisocveryHandler::begin_function_scope(const uint32_t *stream, uint32_t)
|
||||
{
|
||||
auto &callee = compiler.get<SPIRFunction>(stream[2]);
|
||||
function_stack.push_back(&callee);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Compiler::GeometryEmitDisocveryHandler::end_function_scope([[maybe_unused]] const uint32_t *stream, uint32_t)
|
||||
{
|
||||
assert(function_stack.back() == &compiler.get<SPIRFunction>(stream[2]));
|
||||
function_stack.pop_back();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void Compiler::discover_geometry_emitters()
|
||||
{
|
||||
GeometryEmitDisocveryHandler handler(*this);
|
||||
|
||||
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
|
||||
}
|
||||
|
||||
Bitset Compiler::get_buffer_block_flags(VariableID id) const
|
||||
{
|
||||
return ir.get_buffer_block_flags(get<SPIRVariable>(id));
|
||||
|
@ -4462,6 +4511,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
|
|||
switch (opcode)
|
||||
{
|
||||
case OpStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
if (length < 1)
|
||||
return false;
|
||||
|
||||
|
@ -4478,6 +4528,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
|
|||
|
||||
case OpCopyObject:
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
if (length < 3)
|
||||
return false;
|
||||
|
||||
|
@ -4910,13 +4961,16 @@ void Compiler::make_constant_null(uint32_t id, uint32_t type)
|
|||
uint32_t parent_id = ir.increase_bound_by(1);
|
||||
make_constant_null(parent_id, constant_type.parent_type);
|
||||
|
||||
if (!constant_type.array_size_literal.back())
|
||||
SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
|
||||
|
||||
SmallVector<uint32_t> elements(constant_type.array.back());
|
||||
for (uint32_t i = 0; i < constant_type.array.back(); i++)
|
||||
// The array size of OpConstantNull can be either literal or specialization constant.
|
||||
// In the latter case, we cannot take the value as-is, as it can be changed to anything.
|
||||
// Rather, we assume it to be *one* for the sake of initializer.
|
||||
bool is_literal_array_size = constant_type.array_size_literal.back();
|
||||
uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
|
||||
SmallVector<uint32_t> elements(count);
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
elements[i] = parent_id;
|
||||
set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
|
||||
auto &constant = set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
|
||||
constant.is_null_array_specialized_length = !is_literal_array_size;
|
||||
}
|
||||
else if (!constant_type.member_types.empty())
|
||||
{
|
||||
|
@ -5177,7 +5231,7 @@ bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint
|
|||
|
||||
uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
|
||||
{
|
||||
if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
|
||||
if (type.storage == spv::StorageClassPhysicalStorageBuffer)
|
||||
return 8;
|
||||
else if (type.basetype == SPIRType::Struct)
|
||||
{
|
||||
|
@ -5252,6 +5306,13 @@ bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t
|
|||
break;
|
||||
}
|
||||
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
{
|
||||
// TODO: Can we meaningfully deal with this?
|
||||
break;
|
||||
}
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -5274,6 +5335,10 @@ uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_
|
|||
|
||||
void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
|
||||
{
|
||||
if (analyzed_type_ids.count(type.self))
|
||||
return;
|
||||
analyzed_type_ids.insert(type.self);
|
||||
|
||||
for (auto &member : type.member_types)
|
||||
{
|
||||
auto &subtype = compiler.get<SPIRType>(member);
|
||||
|
@ -5407,6 +5472,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
|
|||
switch (opcode)
|
||||
{
|
||||
case OpLoad:
|
||||
case OpCooperativeMatrixLoadKHR:
|
||||
{
|
||||
if (length < 3)
|
||||
return false;
|
||||
|
@ -5484,6 +5550,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
|
|||
case OpStore:
|
||||
case OpImageWrite:
|
||||
case OpAtomicStore:
|
||||
case OpCooperativeMatrixStoreKHR:
|
||||
{
|
||||
if (length < 1)
|
||||
return false;
|
||||
|
|
17
thirdparty/spirv-cross/spirv_cross.hpp
vendored
17
thirdparty/spirv-cross/spirv_cross.hpp
vendored
|
@ -1054,6 +1054,7 @@ protected:
|
|||
std::unordered_set<uint32_t> non_block_types;
|
||||
std::unordered_map<uint32_t, PhysicalBlockMeta> physical_block_type_meta;
|
||||
std::unordered_map<uint32_t, PhysicalBlockMeta *> access_chain_to_physical_block;
|
||||
std::unordered_set<uint32_t> analyzed_type_ids;
|
||||
|
||||
void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length);
|
||||
PhysicalBlockMeta *find_block_meta(uint32_t id) const;
|
||||
|
@ -1072,6 +1073,22 @@ protected:
|
|||
bool single_function);
|
||||
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
|
||||
|
||||
struct GeometryEmitDisocveryHandler : OpcodeHandler
|
||||
{
|
||||
explicit GeometryEmitDisocveryHandler(Compiler &compiler_)
|
||||
: compiler(compiler_)
|
||||
{
|
||||
}
|
||||
Compiler &compiler;
|
||||
|
||||
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
|
||||
bool begin_function_scope(const uint32_t *, uint32_t) override;
|
||||
bool end_function_scope(const uint32_t *, uint32_t) override;
|
||||
SmallVector<SPIRFunction *> function_stack;
|
||||
};
|
||||
|
||||
void discover_geometry_emitters();
|
||||
|
||||
// Finds all resources that are written to from inside the critical section, if present.
|
||||
// The critical section is delimited by OpBeginInvocationInterlockEXT and
|
||||
// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
|
||||
|
|
25
thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
vendored
25
thirdparty/spirv-cross/spirv_cross_parsed_ir.cpp
vendored
|
@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
|
|||
dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
|
||||
break;
|
||||
|
||||
case DecorationFPFastMathMode:
|
||||
dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
@ -643,6 +647,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
|
|||
return dec.index;
|
||||
case DecorationFPRoundingMode:
|
||||
return dec.fp_rounding_mode;
|
||||
case DecorationFPFastMathMode:
|
||||
return dec.fp_fast_math_mode;
|
||||
default:
|
||||
return 1;
|
||||
}
|
||||
|
@ -730,6 +736,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
|
|||
dec.fp_rounding_mode = FPRoundingModeMax;
|
||||
break;
|
||||
|
||||
case DecorationFPFastMathMode:
|
||||
dec.fp_fast_math_mode = FPFastMathModeMaskNone;
|
||||
break;
|
||||
|
||||
case DecorationHlslCounterBufferGOOGLE:
|
||||
{
|
||||
auto &counter = meta[id].hlsl_magic_counter_buffer;
|
||||
|
@ -1050,16 +1060,21 @@ void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_
|
|||
uint32_t parent_id = increase_bound_by(1);
|
||||
make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set);
|
||||
|
||||
if (!constant_type.array_size_literal.back())
|
||||
SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
|
||||
// The array size of OpConstantNull can be either literal or specialization constant.
|
||||
// In the latter case, we cannot take the value as-is, as it can be changed to anything.
|
||||
// Rather, we assume it to be *one* for the sake of initializer.
|
||||
bool is_literal_array_size = constant_type.array_size_literal.back();
|
||||
uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
|
||||
|
||||
SmallVector<uint32_t> elements(constant_type.array.back());
|
||||
for (uint32_t i = 0; i < constant_type.array.back(); i++)
|
||||
SmallVector<uint32_t> elements(count);
|
||||
for (uint32_t i = 0; i < count; i++)
|
||||
elements[i] = parent_id;
|
||||
|
||||
if (add_to_typed_id_set)
|
||||
add_typed_id(TypeConstant, id);
|
||||
variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id;
|
||||
auto& constant = variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false);
|
||||
constant.self = id;
|
||||
constant.is_null_array_specialized_length = !is_literal_array_size;
|
||||
}
|
||||
else if (!constant_type.member_types.empty())
|
||||
{
|
||||
|
|
77
thirdparty/spirv-cross/spirv_cross_util.cpp
vendored
77
thirdparty/spirv-cross/spirv_cross_util.cpp
vendored
|
@ -1,77 +0,0 @@
|
|||
/*
|
||||
* Copyright 2015-2021 Arm Limited
|
||||
* SPDX-License-Identifier: Apache-2.0 OR MIT
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* At your option, you may choose to accept this material under either:
|
||||
* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
|
||||
* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
|
||||
*/
|
||||
|
||||
#include "spirv_cross_util.hpp"
|
||||
#include "spirv_common.hpp"
|
||||
|
||||
using namespace spv;
|
||||
using namespace SPIRV_CROSS_NAMESPACE;
|
||||
|
||||
namespace spirv_cross_util
|
||||
{
|
||||
void rename_interface_variable(Compiler &compiler, const SmallVector<Resource> &resources, uint32_t location,
|
||||
const std::string &name)
|
||||
{
|
||||
for (auto &v : resources)
|
||||
{
|
||||
if (!compiler.has_decoration(v.id, spv::DecorationLocation))
|
||||
continue;
|
||||
|
||||
auto loc = compiler.get_decoration(v.id, spv::DecorationLocation);
|
||||
if (loc != location)
|
||||
continue;
|
||||
|
||||
auto &type = compiler.get_type(v.base_type_id);
|
||||
|
||||
// This is more of a friendly variant. If we need to rename interface variables, we might have to rename
|
||||
// structs as well and make sure all the names match up.
|
||||
if (type.basetype == SPIRType::Struct)
|
||||
{
|
||||
compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location));
|
||||
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
|
||||
compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i));
|
||||
}
|
||||
|
||||
compiler.set_name(v.id, name);
|
||||
}
|
||||
}
|
||||
|
||||
void inherit_combined_sampler_bindings(Compiler &compiler)
|
||||
{
|
||||
auto &samplers = compiler.get_combined_image_samplers();
|
||||
for (auto &s : samplers)
|
||||
{
|
||||
if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet))
|
||||
{
|
||||
uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet);
|
||||
compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set);
|
||||
}
|
||||
|
||||
if (compiler.has_decoration(s.image_id, spv::DecorationBinding))
|
||||
{
|
||||
uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding);
|
||||
compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding);
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace spirv_cross_util
|
37
thirdparty/spirv-cross/spirv_cross_util.hpp
vendored
37
thirdparty/spirv-cross/spirv_cross_util.hpp
vendored
|
@ -1,37 +0,0 @@
|
|||
/*
|
||||
* Copyright 2015-2021 Arm Limited
|
||||
* SPDX-License-Identifier: Apache-2.0 OR MIT
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/*
|
||||
* At your option, you may choose to accept this material under either:
|
||||
* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
|
||||
* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
|
||||
*/
|
||||
|
||||
#ifndef SPIRV_CROSS_UTIL_HPP
|
||||
#define SPIRV_CROSS_UTIL_HPP
|
||||
|
||||
#include "spirv_cross.hpp"
|
||||
|
||||
namespace spirv_cross_util
|
||||
{
|
||||
void rename_interface_variable(SPIRV_CROSS_NAMESPACE::Compiler &compiler,
|
||||
const SPIRV_CROSS_NAMESPACE::SmallVector<SPIRV_CROSS_NAMESPACE::Resource> &resources,
|
||||
uint32_t location, const std::string &name);
|
||||
void inherit_combined_sampler_bindings(SPIRV_CROSS_NAMESPACE::Compiler &compiler);
|
||||
} // namespace spirv_cross_util
|
||||
|
||||
#endif
|
736
thirdparty/spirv-cross/spirv_glsl.cpp
vendored
736
thirdparty/spirv-cross/spirv_glsl.cpp
vendored
File diff suppressed because it is too large
Load diff
16
thirdparty/spirv-cross/spirv_glsl.hpp
vendored
16
thirdparty/spirv-cross/spirv_glsl.hpp
vendored
|
@ -297,6 +297,9 @@ public:
|
|||
float_formatter = formatter;
|
||||
}
|
||||
|
||||
// Returns the macro name corresponding to constant id
|
||||
std::string constant_value_macro_name(uint32_t id) const;
|
||||
|
||||
protected:
|
||||
struct ShaderSubgroupSupportHelper
|
||||
{
|
||||
|
@ -450,6 +453,7 @@ protected:
|
|||
virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
|
||||
virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const;
|
||||
virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
|
||||
virtual void emit_workgroup_initialization(const SPIRVariable &var);
|
||||
|
||||
struct TextureFunctionBaseArguments
|
||||
{
|
||||
|
@ -622,6 +626,8 @@ protected:
|
|||
const char *uint16_t_literal_suffix = "us";
|
||||
const char *nonuniform_qualifier = "nonuniformEXT";
|
||||
const char *boolean_mix_function = "mix";
|
||||
const char *printf_function = "debugPrintfEXT";
|
||||
std::string constant_null_initializer = "";
|
||||
SPIRType::BaseType boolean_in_struct_remapped_type = SPIRType::Boolean;
|
||||
bool swizzle_is_function = false;
|
||||
bool shared_is_implied = false;
|
||||
|
@ -629,6 +635,7 @@ protected:
|
|||
bool explicit_struct_type = false;
|
||||
bool use_initializer_list = false;
|
||||
bool use_typed_initializer_list = false;
|
||||
bool requires_matching_array_initializer = false;
|
||||
bool can_declare_struct_inline = true;
|
||||
bool can_declare_arrays_inline = true;
|
||||
bool native_row_major_matrix = true;
|
||||
|
@ -679,7 +686,6 @@ protected:
|
|||
const SmallVector<uint32_t> &indices);
|
||||
void emit_block_chain(SPIRBlock &block);
|
||||
void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
|
||||
std::string constant_value_macro_name(uint32_t id);
|
||||
int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
|
||||
void emit_constant(const SPIRConstant &constant);
|
||||
void emit_specialization_constant_op(const SPIRConstantOp &constant);
|
||||
|
@ -695,6 +701,7 @@ protected:
|
|||
void emit_variable_temporary_copies(const SPIRVariable &var);
|
||||
|
||||
bool should_dereference(uint32_t id);
|
||||
bool should_dereference_caller_param(uint32_t id);
|
||||
bool should_forward(uint32_t id) const;
|
||||
bool should_suppress_usage_tracking(uint32_t id) const;
|
||||
void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
|
||||
|
@ -762,7 +769,7 @@ protected:
|
|||
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
|
||||
virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
|
||||
|
||||
virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
|
||||
virtual bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
|
||||
virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
|
||||
spv::StorageClass storage, bool &is_packed);
|
||||
|
||||
|
@ -792,8 +799,9 @@ protected:
|
|||
std::string declare_temporary(uint32_t type, uint32_t id);
|
||||
void emit_uninitialized_temporary(uint32_t type, uint32_t id);
|
||||
SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
|
||||
void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
|
||||
virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
|
||||
std::string to_non_uniform_aware_expression(uint32_t id);
|
||||
std::string to_atomic_ptr_expression(uint32_t id);
|
||||
std::string to_expression(uint32_t id, bool register_expression_read = true);
|
||||
std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
|
||||
std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
|
||||
|
@ -1009,6 +1017,8 @@ protected:
|
|||
const Instruction *get_next_instruction_in_block(const Instruction &instr);
|
||||
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
|
||||
|
||||
std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
|
||||
|
|
1480
thirdparty/spirv-cross/spirv_msl.cpp
vendored
1480
thirdparty/spirv-cross/spirv_msl.cpp
vendored
File diff suppressed because it is too large
Load diff
83
thirdparty/spirv-cross/spirv_msl.hpp
vendored
83
thirdparty/spirv-cross/spirv_msl.hpp
vendored
|
@ -324,6 +324,8 @@ public:
|
|||
// of the shader with the additional fixed sample mask.
|
||||
uint32_t additional_fixed_sample_mask = 0xffffffff;
|
||||
bool enable_point_size_builtin = true;
|
||||
bool enable_point_size_default = false;
|
||||
float default_point_size = 1.0f;
|
||||
bool enable_frag_depth_builtin = true;
|
||||
bool enable_frag_stencil_ref_builtin = true;
|
||||
bool disable_rasterization = false;
|
||||
|
@ -536,6 +538,14 @@ public:
|
|||
// if the fragment does not modify the depth value.
|
||||
bool input_attachment_is_ds_attachment = false;
|
||||
|
||||
// If BuiltInPosition is not written, automatically disable rasterization.
|
||||
// The result can be queried with get_is_rasterization_disabled.
|
||||
bool auto_disable_rasterization = false;
|
||||
|
||||
// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
|
||||
// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
|
||||
bool use_fast_math_pragmas = false;
|
||||
|
||||
bool is_ios() const
|
||||
{
|
||||
return platform == iOS;
|
||||
|
@ -756,6 +766,19 @@ public:
|
|||
void set_combined_sampler_suffix(const char *suffix);
|
||||
const char *get_combined_sampler_suffix() const;
|
||||
|
||||
// Information about specialization constants that are translated into MSL macros
|
||||
// instead of using function constant
|
||||
// These must only be called after a successful call to CompilerMSL::compile().
|
||||
bool specialization_constant_is_macro(uint32_t constant_id) const;
|
||||
|
||||
// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
|
||||
// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
|
||||
// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
|
||||
// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
|
||||
// to determine the minimal fast-math that applies to all default execution modes and all operations.
|
||||
// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
|
||||
uint32_t get_fp_fast_math_flags(bool incl_ops);
|
||||
|
||||
protected:
|
||||
// An enum of SPIR-V functions that are implemented in additional
|
||||
// source code that is added to the shader if necessary.
|
||||
|
@ -763,6 +786,7 @@ protected:
|
|||
{
|
||||
SPVFuncImplNone,
|
||||
SPVFuncImplMod,
|
||||
SPVFuncImplSMod,
|
||||
SPVFuncImplRadians,
|
||||
SPVFuncImplDegrees,
|
||||
SPVFuncImplFindILsb,
|
||||
|
@ -784,12 +808,11 @@ protected:
|
|||
SPVFuncImplInverse4x4,
|
||||
SPVFuncImplInverse3x3,
|
||||
SPVFuncImplInverse2x2,
|
||||
// It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
|
||||
// emitted before them.
|
||||
SPVFuncImplForwardArgs,
|
||||
// Likewise, this must come before *Swizzle.
|
||||
// It is very important that this come before *Swizzle, to ensure it's emitted before them.
|
||||
SPVFuncImplGetSwizzle,
|
||||
SPVFuncImplTextureSwizzle,
|
||||
SPVFuncImplGatherReturn,
|
||||
SPVFuncImplGatherCompareReturn,
|
||||
SPVFuncImplGatherSwizzle,
|
||||
SPVFuncImplGatherCompareSwizzle,
|
||||
SPVFuncImplGatherConstOffsets,
|
||||
|
@ -806,6 +829,30 @@ protected:
|
|||
SPVFuncImplSubgroupShuffleXor,
|
||||
SPVFuncImplSubgroupShuffleUp,
|
||||
SPVFuncImplSubgroupShuffleDown,
|
||||
SPVFuncImplSubgroupRotate,
|
||||
SPVFuncImplSubgroupClusteredAdd,
|
||||
SPVFuncImplSubgroupClusteredFAdd = SPVFuncImplSubgroupClusteredAdd,
|
||||
SPVFuncImplSubgroupClusteredIAdd = SPVFuncImplSubgroupClusteredAdd,
|
||||
SPVFuncImplSubgroupClusteredMul,
|
||||
SPVFuncImplSubgroupClusteredFMul = SPVFuncImplSubgroupClusteredMul,
|
||||
SPVFuncImplSubgroupClusteredIMul = SPVFuncImplSubgroupClusteredMul,
|
||||
SPVFuncImplSubgroupClusteredMin,
|
||||
SPVFuncImplSubgroupClusteredFMin = SPVFuncImplSubgroupClusteredMin,
|
||||
SPVFuncImplSubgroupClusteredSMin = SPVFuncImplSubgroupClusteredMin,
|
||||
SPVFuncImplSubgroupClusteredUMin = SPVFuncImplSubgroupClusteredMin,
|
||||
SPVFuncImplSubgroupClusteredMax,
|
||||
SPVFuncImplSubgroupClusteredFMax = SPVFuncImplSubgroupClusteredMax,
|
||||
SPVFuncImplSubgroupClusteredSMax = SPVFuncImplSubgroupClusteredMax,
|
||||
SPVFuncImplSubgroupClusteredUMax = SPVFuncImplSubgroupClusteredMax,
|
||||
SPVFuncImplSubgroupClusteredAnd,
|
||||
SPVFuncImplSubgroupClusteredBitwiseAnd = SPVFuncImplSubgroupClusteredAnd,
|
||||
SPVFuncImplSubgroupClusteredLogicalAnd = SPVFuncImplSubgroupClusteredAnd,
|
||||
SPVFuncImplSubgroupClusteredOr,
|
||||
SPVFuncImplSubgroupClusteredBitwiseOr = SPVFuncImplSubgroupClusteredOr,
|
||||
SPVFuncImplSubgroupClusteredLogicalOr = SPVFuncImplSubgroupClusteredOr,
|
||||
SPVFuncImplSubgroupClusteredXor,
|
||||
SPVFuncImplSubgroupClusteredBitwiseXor = SPVFuncImplSubgroupClusteredXor,
|
||||
SPVFuncImplSubgroupClusteredLogicalXor = SPVFuncImplSubgroupClusteredXor,
|
||||
SPVFuncImplQuadBroadcast,
|
||||
SPVFuncImplQuadSwap,
|
||||
SPVFuncImplReflectScalar,
|
||||
|
@ -841,6 +888,7 @@ protected:
|
|||
SPVFuncImplTextureCast,
|
||||
SPVFuncImplMulExtended,
|
||||
SPVFuncImplSetMeshOutputsEXT,
|
||||
SPVFuncImplAssume,
|
||||
};
|
||||
|
||||
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
|
||||
|
@ -858,6 +906,11 @@ protected:
|
|||
void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
|
||||
void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
|
||||
void emit_subgroup_op(const Instruction &i) override;
|
||||
void emit_subgroup_cluster_op(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0,
|
||||
const char *op);
|
||||
void emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0,
|
||||
const char *op, SPIRType::BaseType input_type,
|
||||
SPIRType::BaseType expected_result_type);
|
||||
std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
|
||||
SmallVector<uint32_t> &inherited_expressions) override;
|
||||
void emit_fixup() override;
|
||||
|
@ -872,6 +925,7 @@ protected:
|
|||
void emit_mesh_entry_point();
|
||||
void emit_mesh_outputs();
|
||||
void emit_mesh_tasks(SPIRBlock &block) override;
|
||||
void emit_workgroup_initialization(const SPIRVariable &var) override;
|
||||
|
||||
// Allow Metal to use the array<T> template to make arrays a value type
|
||||
std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override;
|
||||
|
@ -985,6 +1039,7 @@ protected:
|
|||
void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
|
||||
void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var);
|
||||
|
||||
void ensure_struct_members_valid_vecsizes(SPIRType &struct_type, uint32_t &location);
|
||||
void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);
|
||||
|
||||
void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
|
||||
|
@ -1069,7 +1124,8 @@ protected:
|
|||
bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
|
||||
std::string get_argument_address_space(const SPIRVariable &argument);
|
||||
std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
|
||||
static bool decoration_flags_signal_volatile(const Bitset &flags);
|
||||
bool decoration_flags_signal_volatile(const Bitset &flags) const;
|
||||
bool decoration_flags_signal_coherent(const Bitset &flags) const;
|
||||
const char *to_restrict(uint32_t id, bool space);
|
||||
SPIRType &get_stage_in_struct_type();
|
||||
SPIRType &get_stage_out_struct_type();
|
||||
|
@ -1082,7 +1138,7 @@ protected:
|
|||
uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
|
||||
bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
|
||||
const char *get_memory_order(uint32_t spv_mem_sem);
|
||||
void add_pragma_line(const std::string &line);
|
||||
void add_pragma_line(const std::string &line, bool recompile_on_unique);
|
||||
void add_typedef_line(const std::string &line);
|
||||
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
|
||||
bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
|
||||
|
@ -1133,12 +1189,13 @@ protected:
|
|||
void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
|
||||
|
||||
void analyze_sampled_image_usage();
|
||||
void analyze_workgroup_variables();
|
||||
|
||||
bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override;
|
||||
bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
|
||||
bool &is_packed) override;
|
||||
void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
|
||||
void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
|
||||
bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
|
||||
|
||||
bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
|
||||
bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
|
||||
|
@ -1164,9 +1221,10 @@ protected:
|
|||
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
|
||||
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
|
||||
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
|
||||
std::set<std::string> pragma_lines;
|
||||
std::set<std::string> typedef_lines;
|
||||
std::vector<std::string> pragma_lines;
|
||||
std::vector<std::string> typedef_lines;
|
||||
SmallVector<uint32_t> vars_needing_early_declaration;
|
||||
std::unordered_set<uint32_t> constant_macro_ids;
|
||||
|
||||
std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
|
||||
std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number;
|
||||
|
@ -1210,11 +1268,14 @@ protected:
|
|||
bool needs_swizzle_buffer_def = false;
|
||||
bool used_swizzle_buffer = false;
|
||||
bool added_builtin_tess_level = false;
|
||||
bool needs_local_invocation_index = false;
|
||||
bool needs_subgroup_invocation_id = false;
|
||||
bool needs_subgroup_size = false;
|
||||
bool needs_sample_id = false;
|
||||
bool needs_helper_invocation = false;
|
||||
bool needs_workgroup_zero_init = false;
|
||||
bool writes_to_depth = false;
|
||||
bool writes_to_point_size = false;
|
||||
std::string qual_pos_var_name;
|
||||
std::string stage_in_var_name = "in";
|
||||
std::string stage_out_var_name = "out";
|
||||
|
@ -1276,6 +1337,7 @@ protected:
|
|||
|
||||
bool suppress_missing_prototypes = false;
|
||||
bool suppress_incompatible_pointer_types_discard_qualifiers = false;
|
||||
bool suppress_sometimes_unitialized = false;
|
||||
|
||||
void add_spv_func_and_recompile(SPVFuncImpl spv_func);
|
||||
|
||||
|
@ -1308,7 +1370,7 @@ protected:
|
|||
}
|
||||
|
||||
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
|
||||
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
|
||||
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args, uint32_t length);
|
||||
void check_resource_write(uint32_t var_id);
|
||||
|
||||
CompilerMSL &compiler;
|
||||
|
@ -1319,6 +1381,7 @@ protected:
|
|||
bool uses_image_write = false;
|
||||
bool uses_buffer_write = false;
|
||||
bool uses_discard = false;
|
||||
bool needs_local_invocation_index = false;
|
||||
bool needs_subgroup_invocation_id = false;
|
||||
bool needs_subgroup_size = false;
|
||||
bool needs_sample_id = false;
|
||||
|
|
76
thirdparty/spirv-cross/spirv_parser.cpp
vendored
76
thirdparty/spirv-cross/spirv_parser.cpp
vendored
|
@ -305,6 +305,7 @@ void Parser::parse(const Instruction &instruction)
|
|||
}
|
||||
|
||||
case OpExtInst:
|
||||
case OpExtInstWithForwardRefsKHR:
|
||||
{
|
||||
// The SPIR-V debug information extended instructions might come at global scope.
|
||||
if (current_block)
|
||||
|
@ -380,13 +381,21 @@ void Parser::parse(const Instruction &instruction)
|
|||
auto mode = static_cast<ExecutionMode>(ops[1]);
|
||||
execution.flags.set(mode);
|
||||
|
||||
if (mode == ExecutionModeLocalSizeId)
|
||||
switch (mode)
|
||||
{
|
||||
case ExecutionModeLocalSizeId:
|
||||
execution.workgroup_size.id_x = ops[2];
|
||||
execution.workgroup_size.id_y = ops[3];
|
||||
execution.workgroup_size.id_z = ops[4];
|
||||
}
|
||||
break;
|
||||
|
||||
case ExecutionModeFPFastMathDefault:
|
||||
execution.fp_fast_math_defaults[ops[2]] = ops[3];
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -536,12 +545,37 @@ void Parser::parse(const Instruction &instruction)
|
|||
uint32_t id = ops[0];
|
||||
uint32_t width = ops[1];
|
||||
auto &type = set<SPIRType>(id, op);
|
||||
|
||||
if (width != 16 && width != 8 && length > 2)
|
||||
SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
|
||||
|
||||
if (width == 64)
|
||||
type.basetype = SPIRType::Double;
|
||||
else if (width == 32)
|
||||
type.basetype = SPIRType::Float;
|
||||
else if (width == 16)
|
||||
type.basetype = SPIRType::Half;
|
||||
{
|
||||
if (length > 2)
|
||||
{
|
||||
if (ops[2] == spv::FPEncodingBFloat16KHR)
|
||||
type.basetype = SPIRType::BFloat16;
|
||||
else
|
||||
SPIRV_CROSS_THROW("Unrecognized encoding for OpTypeFloat 16.");
|
||||
}
|
||||
else
|
||||
type.basetype = SPIRType::Half;
|
||||
}
|
||||
else if (width == 8)
|
||||
{
|
||||
if (length < 2)
|
||||
SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
|
||||
else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
|
||||
type.basetype = SPIRType::FloatE4M3;
|
||||
else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
|
||||
type.basetype = SPIRType::FloatE5M2;
|
||||
else
|
||||
SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
|
||||
}
|
||||
else
|
||||
SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
|
||||
type.width = width;
|
||||
|
@ -592,6 +626,22 @@ void Parser::parse(const Instruction &instruction)
|
|||
break;
|
||||
}
|
||||
|
||||
case OpTypeCooperativeMatrixKHR:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
auto &base = get<SPIRType>(ops[1]);
|
||||
auto &matrixbase = set<SPIRType>(id, base);
|
||||
|
||||
matrixbase.op = op;
|
||||
matrixbase.cooperative.scope_id = ops[2];
|
||||
matrixbase.cooperative.rows_id = ops[3];
|
||||
matrixbase.cooperative.columns_id = ops[4];
|
||||
matrixbase.cooperative.use_id = ops[5];
|
||||
matrixbase.self = id;
|
||||
matrixbase.parent_type = ops[1];
|
||||
break;
|
||||
}
|
||||
|
||||
case OpTypeArray:
|
||||
{
|
||||
uint32_t id = ops[0];
|
||||
|
@ -835,17 +885,27 @@ void Parser::parse(const Instruction &instruction)
|
|||
break;
|
||||
}
|
||||
|
||||
// Constants
|
||||
// Constants
|
||||
case OpSpecConstant:
|
||||
case OpConstant:
|
||||
case OpConstantCompositeReplicateEXT:
|
||||
case OpSpecConstantCompositeReplicateEXT:
|
||||
{
|
||||
uint32_t id = ops[1];
|
||||
auto &type = get<SPIRType>(ops[0]);
|
||||
|
||||
if (type.width > 32)
|
||||
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
|
||||
if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
|
||||
{
|
||||
auto subconstant = uint32_t(ops[2]);
|
||||
set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
|
||||
}
|
||||
else
|
||||
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
|
||||
{
|
||||
|
||||
if (type.width > 32)
|
||||
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
|
||||
else
|
||||
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue