Apple: Use image atomic operations on supported Apple hardware

Co-authored-by: A Thousand Ships <96648715+AThousandShips@users.noreply.github.com>
This commit is contained in:
Stuart Carnie 2025-06-27 09:59:21 +10:00
parent 9b22b41531
commit 5230f6c60c
32 changed files with 5354 additions and 719 deletions

View file

@ -2490,6 +2490,9 @@
<constant name="SUPPORTS_BUFFER_DEVICE_ADDRESS" value="6" enum="Features">
Features support for buffer device address extension.
</constant>
<constant name="SUPPORTS_IMAGE_ATOMIC_32_BIT" value="7" enum="Features">
Support for 32-bit image atomic operations.
</constant>
<constant name="LIMIT_MAX_BOUND_UNIFORM_SETS" value="0" enum="Limit">
Maximum number of uniform sets that can be bound at a given time.
</constant>

View file

@ -0,0 +1,56 @@
/**************************************************************************/
/* foundation_helpers.h */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#pragma once
#import <Foundation/NSString.h>
class String;
template <typename T>
class CharStringT;
using CharString = CharStringT<char>;
namespace conv {
/**
* Converts a Godot String to an NSString without allocating an intermediate UTF-8 buffer.
* */
NSString *to_nsstring(const String &p_str);
/**
* Converts a Godot CharString to an NSString without allocating an intermediate UTF-8 buffer.
* */
NSString *to_nsstring(const CharString &p_str);
/**
* Converts an NSString to a Godot String without allocating intermediate buffers.
* */
String to_string(NSString *p_str);
} //namespace conv

View file

@ -0,0 +1,85 @@
/**************************************************************************/
/* foundation_helpers.mm */
/**************************************************************************/
/* This file is part of: */
/* GODOT ENGINE */
/* https://godotengine.org */
/**************************************************************************/
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
/* */
/* Permission is hereby granted, free of charge, to any person obtaining */
/* a copy of this software and associated documentation files (the */
/* "Software"), to deal in the Software without restriction, including */
/* without limitation the rights to use, copy, modify, merge, publish, */
/* distribute, sublicense, and/or sell copies of the Software, and to */
/* permit persons to whom the Software is furnished to do so, subject to */
/* the following conditions: */
/* */
/* The above copyright notice and this permission notice shall be */
/* included in all copies or substantial portions of the Software. */
/* */
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
/**************************************************************************/
#import "foundation_helpers.h"
#import "core/string/ustring.h"
#import <CoreFoundation/CFString.h>
namespace conv {
NSString *to_nsstring(const String &p_str) {
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
length:p_str.length() * sizeof(char32_t)
encoding:NSUTF32LittleEndianStringEncoding];
}
NSString *to_nsstring(const CharString &p_str) {
return [[NSString alloc] initWithBytes:(const void *)p_str.ptr()
length:p_str.length()
encoding:NSUTF8StringEncoding];
}
String to_string(NSString *p_str) {
CFStringRef str = (__bridge CFStringRef)p_str;
CFStringEncoding fastest = CFStringGetFastestEncoding(str);
// Sometimes, CFString will return a pointer to it's encoded data,
// so we can create the string without allocating intermediate buffers.
const char *p = CFStringGetCStringPtr(str, fastest);
if (p) {
switch (fastest) {
case kCFStringEncodingASCII:
return String::ascii(Span(p, CFStringGetLength(str)));
case kCFStringEncodingUTF8:
return String::utf8(p);
case kCFStringEncodingUTF32LE:
return String::utf32(Span((char32_t *)p, CFStringGetLength(str)));
default:
break;
}
}
CFRange range = CFRangeMake(0, CFStringGetLength(str));
CFIndex byte_len = 0;
// Try to losslessly convert the string directly into a String's buffer to avoid intermediate allocations.
CFIndex n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, nil, 0, &byte_len);
if (n == range.length) {
String res;
res.resize_uninitialized((byte_len / sizeof(char32_t)) + 1);
res[n] = 0;
n = CFStringGetBytes(str, range, kCFStringEncodingUTF32LE, 0, NO, (UInt8 *)res.ptrw(), res.length() * sizeof(char32_t), nil);
return res;
}
return String::utf8(p_str.UTF8String);
}
} //namespace conv

View file

@ -5586,6 +5586,8 @@ bool RenderingDeviceDriverD3D12::has_feature(Features p_feature) {
return true;
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
return true;
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
return true;
default:
return false;
}

View file

@ -12,7 +12,6 @@ thirdparty_obj = []
thirdparty_dir = "#thirdparty/spirv-cross/"
thirdparty_sources = [
"spirv_cfg.cpp",
"spirv_cross_util.cpp",
"spirv_cross.cpp",
"spirv_parser.cpp",
"spirv_msl.cpp",

View file

@ -94,6 +94,8 @@ struct API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MetalFeatures {
bool metal_fx_spatial = false; /**< If true, Metal FX spatial functions are supported. */
bool metal_fx_temporal = false; /**< If true, Metal FX temporal functions are supported. */
bool supports_gpu_address = false; /**< If true, referencing a GPU address in a shader is supported. */
bool supports_image_atomic_32_bit = false; /**< If true, 32-bit atomic operations on images are supported. */
bool supports_image_atomic_64_bit = false; /**< If true, 64-bit atomic operations on images are supported. */
};
struct MetalLimits {

View file

@ -121,6 +121,12 @@ void MetalDeviceProperties::init_features(id<MTLDevice> p_device) {
features.simdPermute = [p_device supportsFamily:MTLGPUFamilyApple6];
features.simdReduction = [p_device supportsFamily:MTLGPUFamilyApple7];
features.argument_buffers_tier = p_device.argumentBuffersSupport;
features.supports_image_atomic_32_bit = [p_device supportsFamily:MTLGPUFamilyApple6];
features.supports_image_atomic_64_bit = [p_device supportsFamily:MTLGPUFamilyApple8];
if (OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_IMAGE_ATOMICS") == "1") {
features.supports_image_atomic_32_bit = false;
features.supports_image_atomic_64_bit = false;
}
if (@available(macOS 13.0, iOS 16.0, tvOS 16.0, *)) {
features.needs_arg_encoders = !([p_device supportsFamily:MTLGPUFamilyMetal3] && features.argument_buffers_tier == MTLArgumentBuffersTier2);

View file

@ -309,9 +309,23 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDCommandBuffer {
private:
#pragma mark - Common State
// From RenderingDevice
static constexpr uint32_t MAX_PUSH_CONSTANT_SIZE = 128;
RenderingDeviceDriverMetal *device_driver = nullptr;
id<MTLCommandQueue> queue = nil;
id<MTLCommandBuffer> commandBuffer = nil;
bool state_begin = false;
_FORCE_INLINE_ id<MTLCommandBuffer> command_buffer() {
DEV_ASSERT(state_begin);
if (commandBuffer == nil) {
commandBuffer = queue.commandBuffer;
}
return commandBuffer;
}
void _end_compute_dispatch();
void _end_blit();
@ -326,6 +340,11 @@ private:
void _end_render_pass();
void _render_clear_render_area();
#pragma mark - Compute
void _compute_set_dirty_state();
void _compute_bind_uniform_sets();
public:
MDCommandBufferStateType type = MDCommandBufferStateType::None;
@ -349,18 +368,18 @@ public:
LocalVector<NSUInteger> vertex_offsets;
ResourceUsageMap resource_usage;
// clang-format off
enum DirtyFlag: uint8_t {
DIRTY_NONE = 0b0000'0000,
DIRTY_PIPELINE = 0b0000'0001, //! pipeline state
DIRTY_UNIFORMS = 0b0000'0010, //! uniform sets
DIRTY_DEPTH = 0b0000'0100, //! depth / stencil state
DIRTY_VERTEX = 0b0000'1000, //! vertex buffers
DIRTY_VIEWPORT = 0b0001'0000, //! viewport rectangles
DIRTY_SCISSOR = 0b0010'0000, //! scissor rectangles
DIRTY_BLEND = 0b0100'0000, //! blend state
DIRTY_RASTER = 0b1000'0000, //! encoder state like cull mode
DIRTY_ALL = 0xff,
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_DEPTH = 1 << 3, //! depth / stencil state
DIRTY_VERTEX = 1 << 4, //! vertex buffers
DIRTY_VIEWPORT = 1 << 5, //! viewport rectangles
DIRTY_SCISSOR = 1 << 6, //! scissor rectangles
DIRTY_BLEND = 1 << 7, //! blend state
DIRTY_RASTER = 1 << 8, //! encoder state like cull mode
DIRTY_ALL = (1 << 9) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
@ -368,6 +387,9 @@ public:
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[2] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
@ -422,6 +444,13 @@ public:
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
_FORCE_INLINE_ void mark_blend_dirty() {
if (!blend_constants.has_value()) {
return;
@ -464,16 +493,46 @@ public:
MDComputePipeline *pipeline = nullptr;
id<MTLComputeCommandEncoder> encoder = nil;
ResourceUsageMap resource_usage;
_FORCE_INLINE_ void reset() {
pipeline = nil;
encoder = nil;
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
// clang-format off
enum DirtyFlag: uint16_t {
DIRTY_NONE = 0,
DIRTY_PIPELINE = 1 << 0, //! pipeline state
DIRTY_UNIFORMS = 1 << 1, //! uniform sets
DIRTY_PUSH = 1 << 2, //! push constants
DIRTY_ALL = (1 << 3) - 1,
};
// clang-format on
BitField<DirtyFlag> dirty = DIRTY_NONE;
LocalVector<MDUniformSet *> uniform_sets;
// Bit mask of the uniform sets that are dirty, to prevent redundant binding.
uint64_t uniform_set_mask = 0;
uint8_t push_constant_data[MAX_PUSH_CONSTANT_SIZE];
uint32_t push_constant_data_len = 0;
uint32_t push_constant_bindings[1] = { 0 };
_FORCE_INLINE_ void reset();
void end_encoding();
_FORCE_INLINE_ void mark_uniforms_dirty(void) {
if (uniform_sets.is_empty()) {
return;
}
for (uint32_t i = 0; i < uniform_sets.size(); i++) {
if (uniform_sets[i] != nullptr) {
uniform_set_mask |= 1 << i;
}
}
dirty.set_flag(DirtyFlag::DIRTY_UNIFORMS);
}
_FORCE_INLINE_ void mark_push_constants_dirty() {
if (push_constant_data_len == 0) {
return;
}
dirty.set_flag(DirtyFlag::DIRTY_PUSH);
}
void end_encoding();
} compute;
// State specific to a blit pass.
@ -496,6 +555,7 @@ public:
void encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDescriptor *p_desc, NSString *p_label);
void bind_pipeline(RDD::PipelineID p_pipeline);
void encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data);
#pragma mark - Render Commands
@ -661,8 +721,6 @@ public:
Vector<UniformSet> sets;
bool uses_argument_buffers = true;
virtual void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) = 0;
MDShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers) :
name(p_name), sets(p_sets), uses_argument_buffers(p_uses_argument_buffers) {}
virtual ~MDShader() = default;
@ -671,15 +729,13 @@ public:
class API_AVAILABLE(macos(11.0), ios(14.0), tvos(14.0)) MDComputeShader final : public MDShader {
public:
struct {
uint32_t binding = -1;
int32_t binding = -1;
uint32_t size = 0;
} push_constants;
MTLSize local = {};
MDLibrary *kernel;
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
MDComputeShader(CharString p_name, Vector<UniformSet> p_sets, bool p_uses_argument_buffers, MDLibrary *p_kernel);
};
@ -700,8 +756,6 @@ public:
MDLibrary *vert;
MDLibrary *frag;
void encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) final;
MDRenderShader(CharString p_name,
Vector<UniformSet> p_sets,
bool p_needs_view_mask_buffer,

View file

@ -62,8 +62,8 @@
#undef MAX
void MDCommandBuffer::begin() {
DEV_ASSERT(commandBuffer == nil);
commandBuffer = queue.commandBuffer;
DEV_ASSERT(commandBuffer == nil && !state_begin);
state_begin = true;
}
void MDCommandBuffer::end() {
@ -83,6 +83,7 @@ void MDCommandBuffer::commit() {
end();
[commandBuffer commit];
commandBuffer = nil;
state_begin = false;
}
void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
@ -136,7 +137,7 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
render.desc.colorAttachments[0].resolveTexture = res_tex;
}
#endif
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:render.desc];
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:render.desc];
}
if (render.pipeline != rp) {
@ -160,9 +161,44 @@ void MDCommandBuffer::bind_pipeline(RDD::PipelineID p_pipeline) {
DEV_ASSERT(type == MDCommandBufferStateType::None);
type = MDCommandBufferStateType::Compute;
compute.pipeline = (MDComputePipeline *)p;
compute.encoder = commandBuffer.computeCommandEncoder;
[compute.encoder setComputePipelineState:compute.pipeline->state];
if (compute.pipeline != p) {
compute.dirty.set_flag(ComputeState::DIRTY_PIPELINE);
compute.mark_uniforms_dirty();
compute.pipeline = (MDComputePipeline *)p;
}
}
}
void MDCommandBuffer::encode_push_constant_data(RDD::ShaderID p_shader, VectorView<uint32_t> p_data) {
switch (type) {
case MDCommandBufferStateType::Render: {
MDRenderShader *shader = (MDRenderShader *)(p_shader.id);
if (shader->push_constants.vert.binding == -1 && shader->push_constants.frag.binding == -1) {
return;
}
render.push_constant_bindings[0] = shader->push_constants.vert.binding;
render.push_constant_bindings[1] = shader->push_constants.frag.binding;
void const *ptr = p_data.ptr();
render.push_constant_data_len = p_data.size() * sizeof(uint32_t);
DEV_ASSERT(render.push_constant_data_len <= sizeof(RenderState::push_constant_data));
memcpy(render.push_constant_data, ptr, render.push_constant_data_len);
render.mark_push_constants_dirty();
} break;
case MDCommandBufferStateType::Compute: {
MDComputeShader *shader = (MDComputeShader *)(p_shader.id);
if (shader->push_constants.binding == -1) {
return;
}
compute.push_constant_bindings[0] = shader->push_constants.binding;
void const *ptr = p_data.ptr();
compute.push_constant_data_len = p_data.size() * sizeof(uint32_t);
DEV_ASSERT(compute.push_constant_data_len <= sizeof(ComputeState::push_constant_data));
memcpy(compute.push_constant_data, ptr, compute.push_constant_data_len);
compute.mark_push_constants_dirty();
} break;
case MDCommandBufferStateType::Blit:
case MDCommandBufferStateType::None:
return;
}
}
@ -181,7 +217,7 @@ id<MTLBlitCommandEncoder> MDCommandBuffer::blit_command_encoder() {
}
type = MDCommandBufferStateType::Blit;
blit.encoder = commandBuffer.blitCommandEncoder;
blit.encoder = command_buffer().blitCommandEncoder;
return blit.encoder;
}
@ -200,7 +236,7 @@ void MDCommandBuffer::encodeRenderCommandEncoderWithDescriptor(MTLRenderPassDesc
break;
}
id<MTLRenderCommandEncoder> enc = [commandBuffer renderCommandEncoderWithDescriptor:p_desc];
id<MTLRenderCommandEncoder> enc = [command_buffer() renderCommandEncoderWithDescriptor:p_desc];
if (p_label != nil) {
[enc pushDebugGroup:p_label];
[enc popDebugGroup];
@ -344,6 +380,19 @@ void MDCommandBuffer::render_clear_attachments(VectorView<RDD::AttachmentClear>
void MDCommandBuffer::_render_set_dirty_state() {
_render_bind_uniform_sets();
if (render.dirty.has_flag(RenderState::DIRTY_PUSH)) {
if (render.push_constant_bindings[0] != (uint32_t)-1) {
[render.encoder setVertexBytes:render.push_constant_data
length:render.push_constant_data_len
atIndex:render.push_constant_bindings[0]];
}
if (render.push_constant_bindings[1] != (uint32_t)-1) {
[render.encoder setFragmentBytes:render.push_constant_data
length:render.push_constant_data_len
atIndex:render.push_constant_bindings[1]];
}
}
MDSubpass const &subpass = render.get_subpass();
if (subpass.view_count > 1) {
uint32_t view_range[2] = { 0, subpass.view_count };
@ -552,7 +601,7 @@ uint32_t MDCommandBuffer::_populate_vertices(simd::float4 *p_vertices, uint32_t
}
void MDCommandBuffer::render_begin_pass(RDD::RenderPassID p_render_pass, RDD::FramebufferID p_frameBuffer, RDD::CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RDD::RenderPassClearValue> p_clear_values) {
DEV_ASSERT(commandBuffer != nil);
DEV_ASSERT(command_buffer() != nil);
end();
MDRenderPass *pass = (MDRenderPass *)(p_render_pass.id);
@ -639,7 +688,7 @@ void MDCommandBuffer::_render_clear_render_area() {
}
void MDCommandBuffer::render_next_subpass() {
DEV_ASSERT(commandBuffer != nil);
DEV_ASSERT(command_buffer() != nil);
if (render.current_subpass == UINT32_MAX) {
render.current_subpass = 0;
@ -726,7 +775,7 @@ void MDCommandBuffer::render_next_subpass() {
// the defaultRasterSampleCount from the pipeline's sample count.
render.desc = desc;
} else {
render.encoder = [commandBuffer renderCommandEncoderWithDescriptor:desc];
render.encoder = [command_buffer() renderCommandEncoderWithDescriptor:desc];
if (!render.is_rendering_entire_area) {
_render_clear_render_area();
@ -895,6 +944,7 @@ void MDCommandBuffer::RenderState::reset() {
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
push_constant_data_len = 0;
clear_values.clear();
viewports.clear();
scissors.clear();
@ -960,29 +1010,108 @@ void MDCommandBuffer::ComputeState::end_encoding() {
#pragma mark - Compute
void MDCommandBuffer::_compute_set_dirty_state() {
if (compute.dirty.has_flag(ComputeState::DIRTY_PIPELINE)) {
compute.encoder = [command_buffer() computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent];
[compute.encoder setComputePipelineState:compute.pipeline->state];
}
_compute_bind_uniform_sets();
if (compute.dirty.has_flag(ComputeState::DIRTY_PUSH)) {
if (compute.push_constant_bindings[0] != (uint32_t)-1) {
[compute.encoder setBytes:compute.push_constant_data
length:compute.push_constant_data_len
atIndex:compute.push_constant_bindings[0]];
}
}
compute.dirty.clear();
}
void MDCommandBuffer::_compute_bind_uniform_sets() {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
if (!compute.dirty.has_flag(ComputeState::DIRTY_UNIFORMS)) {
return;
}
compute.dirty.clear_flag(ComputeState::DIRTY_UNIFORMS);
uint64_t set_uniforms = compute.uniform_set_mask;
compute.uniform_set_mask = 0;
MDComputeShader *shader = compute.pipeline->shader;
while (set_uniforms != 0) {
// Find the index of the next set bit.
uint32_t index = (uint32_t)__builtin_ctzll(set_uniforms);
// Clear the set bit.
set_uniforms &= (set_uniforms - 1);
MDUniformSet *set = compute.uniform_sets[index];
if (set == nullptr || index >= (uint32_t)shader->sets.size()) {
continue;
}
set->bind_uniforms(shader, compute, index);
}
}
void MDCommandBuffer::ComputeState::reset() {
pipeline = nil;
encoder = nil;
dirty = DIRTY_NONE;
uniform_sets.clear();
uniform_set_mask = 0;
push_constant_data_len = 0;
// Keep the keys, as they are likely to be used again.
for (KeyValue<StageResourceUsage, LocalVector<__unsafe_unretained id<MTLResource>>> &kv : resource_usage) {
kv.value.clear();
}
}
void MDCommandBuffer::compute_bind_uniform_set(RDD::UniformSetID p_uniform_set, RDD::ShaderID p_shader, uint32_t p_set_index) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
MDShader *shader = (MDShader *)(p_shader.id);
MDUniformSet *set = (MDUniformSet *)(p_uniform_set.id);
set->bind_uniforms(shader, compute, p_set_index);
if (compute.uniform_sets.size() <= p_set_index) {
uint32_t s = render.uniform_sets.size();
compute.uniform_sets.resize(p_set_index + 1);
// Set intermediate values to null.
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[p_set_index] + 1, nullptr);
}
if (compute.uniform_sets[p_set_index] != set) {
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
compute.uniform_set_mask |= 1ULL << p_set_index;
compute.uniform_sets[p_set_index] = set;
}
}
void MDCommandBuffer::compute_bind_uniform_sets(VectorView<RDD::UniformSetID> p_uniform_sets, RDD::ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
MDShader *shader = (MDShader *)(p_shader.id);
// TODO(sgc): Bind multiple buffers using [encoder setBuffers:offsets:withRange:]
for (size_t i = 0u; i < p_set_count; ++i) {
for (size_t i = 0; i < p_set_count; ++i) {
MDUniformSet *set = (MDUniformSet *)(p_uniform_sets[i].id);
set->bind_uniforms(shader, compute, p_first_set_index + i);
uint32_t index = p_first_set_index + i;
if (compute.uniform_sets.size() <= index) {
uint32_t s = compute.uniform_sets.size();
compute.uniform_sets.resize(index + 1);
// Set intermediate values to null.
std::fill(&compute.uniform_sets[s], &compute.uniform_sets[index] + 1, nullptr);
}
if (compute.uniform_sets[index] != set) {
compute.dirty.set_flag(ComputeState::DIRTY_UNIFORMS);
compute.uniform_set_mask |= 1ULL << index;
compute.uniform_sets[index] = set;
}
}
}
void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
_compute_set_dirty_state();
MTLRegion region = MTLRegionMake3D(0, 0, 0, p_x_groups, p_y_groups, p_z_groups);
id<MTLComputeCommandEncoder> enc = compute.encoder;
@ -992,6 +1121,8 @@ void MDCommandBuffer::compute_dispatch(uint32_t p_x_groups, uint32_t p_y_groups,
void MDCommandBuffer::compute_dispatch_indirect(RDD::BufferID p_indirect_buffer, uint64_t p_offset) {
DEV_ASSERT(type == MDCommandBufferStateType::Compute);
_compute_set_dirty_state();
id<MTLBuffer> indirectBuffer = rid::get(p_indirect_buffer);
id<MTLComputeCommandEncoder> enc = compute.encoder;
@ -1021,20 +1152,6 @@ MDComputeShader::MDComputeShader(CharString p_name,
MDShader(p_name, p_sets, p_uses_argument_buffers), kernel(p_kernel) {
}
void MDComputeShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Compute);
if (push_constants.binding == (uint32_t)-1) {
return;
}
id<MTLComputeCommandEncoder> enc = p_cb->compute.encoder;
void const *ptr = p_data.ptr();
size_t length = p_data.size() * sizeof(uint32_t);
[enc setBytes:ptr length:length atIndex:push_constants.binding];
}
MDRenderShader::MDRenderShader(CharString p_name,
Vector<UniformSet> p_sets,
bool p_needs_view_mask_buffer,
@ -1046,22 +1163,6 @@ MDRenderShader::MDRenderShader(CharString p_name,
frag(p_frag) {
}
void MDRenderShader::encode_push_constant_data(VectorView<uint32_t> p_data, MDCommandBuffer *p_cb) {
DEV_ASSERT(p_cb->type == MDCommandBufferStateType::Render);
id<MTLRenderCommandEncoder> __unsafe_unretained enc = p_cb->render.encoder;
void const *ptr = p_data.ptr();
size_t length = p_data.size() * sizeof(uint32_t);
if (push_constants.vert.binding > -1) {
[enc setVertexBytes:ptr length:length atIndex:push_constants.vert.binding];
}
if (push_constants.frag.binding > -1) {
[enc setFragmentBytes:ptr length:length atIndex:push_constants.frag.binding];
}
}
void MDUniformSet::bind_uniforms_argument_buffers(MDShader *p_shader, MDCommandBuffer::RenderState &p_state, uint32_t p_set_index) {
DEV_ASSERT(p_shader->uses_argument_buffers);
DEV_ASSERT(p_state.encoder != nil);

View file

@ -58,6 +58,7 @@
#include "core/io/marshalls.h"
#include "core/string/ustring.h"
#include "core/templates/hash_map.h"
#include "drivers/apple/foundation_helpers.h"
#import <Metal/MTLTexture.h>
#import <Metal/Metal.h>
@ -317,12 +318,6 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderWrite;
}
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
if (format_caps & kMTLFmtCapsAtomic) {
desc.usage |= MTLTextureUsageShaderAtomic;
}
}
bool can_be_attachment = flags::any(format_caps, (kMTLFmtCapsColorAtt | kMTLFmtCapsDSAtt));
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT) &&
@ -334,6 +329,18 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
desc.usage |= MTLTextureUsageShaderRead;
}
if (p_format.usage_bits & TEXTURE_USAGE_STORAGE_ATOMIC_BIT) {
if (@available(macOS 14.0, iOS 17.0, tvOS 17.0, *)) {
if (format_caps & kMTLFmtCapsAtomic) {
desc.usage |= MTLTextureUsageShaderAtomic;
} else {
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic operations on this texture format are not supported.");
}
} else {
ERR_FAIL_V_MSG(RDD::TextureID(), "Atomic texture operations not supported on this OS version.");
}
}
if (p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) {
ERR_FAIL_V_MSG(RDD::TextureID(), "unsupported: TEXTURE_USAGE_VRS_ATTACHMENT_BIT");
}
@ -363,7 +370,29 @@ RDD::TextureID RenderingDeviceDriverMetal::texture_create(const TextureFormat &p
// Check if it is a linear format for atomic operations and therefore needs a buffer,
// as generally Metal does not support atomic operations on textures.
bool needs_buffer = is_linear || (p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D && flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && (p_format.format == DATA_FORMAT_R32_UINT || p_format.format == DATA_FORMAT_R32_SINT || p_format.format == DATA_FORMAT_R32G32_UINT || p_format.format == DATA_FORMAT_R32G32_SINT));
bool needs_buffer = is_linear;
// Check for atomic requirements.
if (flags::any(p_format.usage_bits, TEXTURE_USAGE_STORAGE_BIT) && p_format.array_layers == 1 && p_format.mipmaps == 1 && p_format.texture_type == TEXTURE_TYPE_2D) {
switch (p_format.format) {
case RenderingDeviceCommons::DATA_FORMAT_R32_SINT:
case RenderingDeviceCommons::DATA_FORMAT_R32_UINT: {
if (!device_properties->features.supports_image_atomic_32_bit) {
// We can emulate 32-bit atomic operations on textures.
needs_buffer = true;
}
} break;
case RenderingDeviceCommons::DATA_FORMAT_R32G32_SINT:
case RenderingDeviceCommons::DATA_FORMAT_R32G32_UINT: {
if (!device_properties->features.supports_image_atomic_64_bit) {
// No emulation for 64-bit atomics.
ERR_FAIL_V_MSG(TextureID(), "64-bit atomic operations are not supported.");
}
} break;
default:
break;
}
}
id<MTLTexture> obj = nil;
if (needs_buffer) {
@ -900,9 +929,15 @@ Error RenderingDeviceDriverMetal::command_queue_execute_and_present(CommandQueue
MDCommandBuffer *cmd_buffer = (MDCommandBuffer *)(p_cmd_buffers[size - 1].id);
Fence *fence = (Fence *)(p_cmd_fence.id);
if (fence != nullptr) {
[cmd_buffer->get_command_buffer() addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
id<MTLCommandBuffer> cb = cmd_buffer->get_command_buffer();
if (cb == nil) {
// If there is nothing to do, signal the fence immediately.
dispatch_semaphore_signal(fence->semaphore);
}];
} else {
[cb addCompletedHandler:^(id<MTLCommandBuffer> buffer) {
dispatch_semaphore_signal(fence->semaphore);
}];
}
}
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
@ -1730,8 +1765,7 @@ void RenderingDeviceDriverMetal::pipeline_free(PipelineID p_pipeline_id) {
void RenderingDeviceDriverMetal::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
MDCommandBuffer *cb = (MDCommandBuffer *)(p_cmd_buffer.id);
MDShader *shader = (MDShader *)(p_shader.id);
shader->encode_push_constant_data(p_data, cb);
cb->encode_push_constant_data(p_shader, p_data);
}
// ----- CACHE -----
@ -2417,6 +2451,7 @@ RDD::PipelineID RenderingDeviceDriverMetal::compute_pipeline_create(ShaderID p_s
MTLComputePipelineDescriptor *desc = [MTLComputePipelineDescriptor new];
desc.computeFunction = function;
desc.label = conv::to_nsstring(shader->name);
if (archive) {
desc.binaryArchives = @[ archive ];
}
@ -2735,6 +2770,8 @@ bool RenderingDeviceDriverMetal::has_feature(Features p_feature) {
return device_properties->features.metal_fx_spatial;
case SUPPORTS_METALFX_TEMPORAL:
return device_properties->features.metal_fx_temporal;
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
return device_properties->features.supports_image_atomic_32_bit;
default:
return false;
}

View file

@ -199,6 +199,8 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
set_indexes[set_indexes_size - 1] = offset;
}
CompilerMSL::Options msl_options{};
// MAJOR * 10000 + MINOR * 100
uint32_t msl_version = CompilerMSL::Options::make_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
msl_options.set_msl_version(device_profile->features.mslVersionMajor, device_profile->features.mslVersionMinor);
mtl_reflection_data.msl_version = msl_options.msl_version;
msl_options.platform = device_profile->platform == MetalDeviceProfile::Platform::macOS ? CompilerMSL::Options::macOS : CompilerMSL::Options::iOS;
@ -209,7 +211,7 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
}
bool disable_argument_buffers = false;
if (String v = OS::get_singleton()->get_environment(U"GODOT_DISABLE_ARGUMENT_BUFFERS"); v == U"1") {
if (String v = OS::get_singleton()->get_environment("GODOT_MTL_DISABLE_ARGUMENT_BUFFERS"); v == "1") {
disable_argument_buffers = true;
}
@ -236,6 +238,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
msl_options.multiview_layered_rendering = true;
msl_options.view_mask_buffer_index = VIEW_MASK_BUFFER_INDEX;
}
if (msl_version >= CompilerMSL::Options::make_msl_version(3, 2)) {
// All 3.2+ versions support device coherence, so we can disable texture fences.
msl_options.readwrite_texture_fences = false;
}
CompilerGLSL::Options options{};
options.vertex.flip_vert_y = true;
@ -417,6 +423,10 @@ bool RenderingShaderContainerMetal::_set_code_from_spirv(const Vector<RenderingD
// VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER
primary.texture_type = MTLTextureTypeTextureBuffer;
} break;
case spv::DimTileImageDataEXT: {
// Godot does not use this extension.
// See: https://registry.khronos.org/vulkan/specs/latest/man/html/VK_EXT_shader_tile_image.html
} break;
case spv::DimMax: {
// Add all enumerations to silence the compiler warning
// and generate future warnings, should a new one be added.

View file

@ -5894,6 +5894,13 @@ bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) {
return true;
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
return buffer_device_address_support;
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
// MoltenVK has previously had issues with 32-bit atomics on images.
return false;
#else
return true;
#endif
default:
return false;
}

View file

@ -417,6 +417,7 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) {
width = fog_size.x;
height = fog_size.y;
depth = fog_size.z;
atomic_type = RD::get_singleton()->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT) ? RD::UNIFORM_TYPE_IMAGE : RD::UNIFORM_TYPE_STORAGE_BUFFER;
RD::TextureFormat tf;
tf.format = RD::DATA_FORMAT_R16G16B16A16_SFLOAT;
@ -440,29 +441,29 @@ void Fog::VolumetricFog::init(const Vector3i &fog_size, RID p_sky_shader) {
fog_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(fog_map, "Fog map");
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
Vector<uint8_t> dm;
dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
if (atomic_type == RD::UNIFORM_TYPE_STORAGE_BUFFER) {
Vector<uint8_t> dm;
dm.resize_initialized(fog_size.x * fog_size.y * fog_size.z * 4);
density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
#else
tf.format = RD::DATA_FORMAT_R32_UINT;
tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT;
density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
#endif
density_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
light_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
emissive_map = RD::get_singleton()->storage_buffer_create(dm.size(), dm);
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
} else {
tf.format = RD::DATA_FORMAT_R32_UINT;
tf.usage_bits = RD::TEXTURE_USAGE_STORAGE_BIT | RD::TEXTURE_USAGE_CAN_COPY_TO_BIT | RD::TEXTURE_USAGE_STORAGE_ATOMIC_BIT;
density_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(density_map, "Fog density map");
RD::get_singleton()->texture_clear(density_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
light_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(light_map, "Fog light map");
RD::get_singleton()->texture_clear(light_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
emissive_map = RD::get_singleton()->texture_create(tf, RD::TextureView());
RD::get_singleton()->set_resource_name(emissive_map, "Fog emissive map");
RD::get_singleton()->texture_clear(emissive_map, Color(0, 0, 0, 0), 0, 1, 0, 1);
}
Vector<RD::Uniform> uniforms;
{
@ -579,11 +580,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 1;
u.append_id(fog->emissive_map);
uniforms.push_back(u);
@ -599,11 +596,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 3;
u.append_id(fog->density_map);
uniforms.push_back(u);
@ -611,11 +604,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 4;
u.append_id(fog->light_map);
uniforms.push_back(u);
@ -918,22 +907,14 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
}
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 16;
u.append_id(fog->density_map);
uniforms.push_back(u);
}
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 17;
u.append_id(fog->light_map);
uniforms.push_back(u);
@ -941,11 +922,7 @@ void Fog::volumetric_fog_update(const VolumetricFogSettings &p_settings, const P
{
RD::Uniform u;
#if defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED)
u.uniform_type = RD::UNIFORM_TYPE_STORAGE_BUFFER;
#else
u.uniform_type = RD::UNIFORM_TYPE_IMAGE;
#endif
u.uniform_type = fog->atomic_type;
u.binding = 18;
u.append_id(fog->emissive_map);
uniforms.push_back(u);

View file

@ -316,6 +316,9 @@ public:
int last_shadow_filter = -1;
// If the device doesn't support image atomics, use storage buffers instead.
RD::UniformType atomic_type = RD::UNIFORM_TYPE_IMAGE;
virtual void configure(RenderSceneBuffersRD *p_render_buffers) override {}
virtual void free_data() override {}

View file

@ -234,11 +234,13 @@ void ShaderRD::_build_variant_code(StringBuilder &builder, uint32_t p_variant, c
builder.append(String("#define ") + String(E.key) + "_CODE_USED\n");
}
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
if (RD::get_singleton()->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
RenderingDevice *rd = RD::get_singleton();
if (rd->get_device_capabilities().device_family == RDD::DEVICE_VULKAN) {
builder.append("#define MOLTENVK_USED\n");
}
// Image atomics are supported on Metal 3.1 but no support in MoltenVK or SPIRV-Cross yet.
builder.append("#define NO_IMAGE_ATOMICS\n");
if (!rd->has_feature(RD::SUPPORTS_IMAGE_ATOMIC_32_BIT)) {
builder.append("#define NO_IMAGE_ATOMICS\n");
}
#endif
builder.append(String("#define RENDER_DRIVER_") + OS::get_singleton()->get_current_rendering_driver_name().to_upper() + "\n");

View file

@ -2,6 +2,8 @@
#version 450
#pragma use_vulkan_memory_model
#VERSION_DEFINES
layout(local_size_x = 4, local_size_y = 4, local_size_z = 4) in;

View file

@ -2,6 +2,8 @@
#version 450
#pragma use_vulkan_memory_model
#VERSION_DEFINES
#ifdef MODE_DENSITY

View file

@ -7979,6 +7979,7 @@ void RenderingDevice::_bind_methods() {
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_SPATIAL);
BIND_ENUM_CONSTANT(SUPPORTS_METALFX_TEMPORAL);
BIND_ENUM_CONSTANT(SUPPORTS_BUFFER_DEVICE_ADDRESS);
BIND_ENUM_CONSTANT(SUPPORTS_IMAGE_ATOMIC_32_BIT);
BIND_ENUM_CONSTANT(LIMIT_MAX_BOUND_UNIFORM_SETS);
BIND_ENUM_CONSTANT(LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS);

View file

@ -952,6 +952,7 @@ public:
// If not supported, a fragment shader with only side effects (i.e., writes to buffers, but doesn't output to attachments), may be optimized down to no-op by the GPU driver.
SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS,
SUPPORTS_BUFFER_DEVICE_ADDRESS,
SUPPORTS_IMAGE_ATOMIC_32_BIT,
};
enum SubgroupOperations {

View file

@ -978,7 +978,7 @@ Its version and license is described in this file under `hidapi`.
## spirv-cross
- Upstream: https://github.com/KhronosGroup/SPIRV-Cross
- Version: git (6173e24b31f09a0c3217103a130e74c4ddec14a6, 2024)
- Version: git (d7440cbc6c50332600fdf21c45e6a5df0b07e54c, 2025)
- License: Apache 2.0
Files extracted from upstream source:

File diff suppressed because it is too large Load diff

View file

@ -580,7 +580,10 @@ struct SPIRType : IVariant
Interpolant,
Char,
// MSL specific type, that is used by 'object'(analog of 'task' from glsl) shader.
MeshGridProperties
MeshGridProperties,
BFloat16,
FloatE4M3,
FloatE5M2
};
// Scalar/vector/matrix support.
@ -605,6 +608,14 @@ struct SPIRType : IVariant
bool pointer = false;
bool forward_pointer = false;
struct
{
uint32_t use_id = 0;
uint32_t rows_id = 0;
uint32_t columns_id = 0;
uint32_t scope_id = 0;
} cooperative;
spv::StorageClass storage = spv::StorageClassGeneric;
SmallVector<TypeID> member_types;
@ -686,6 +697,7 @@ struct SPIREntryPoint
FunctionID self = 0;
std::string name;
std::string orig_name;
std::unordered_map<uint32_t, uint32_t> fp_fast_math_defaults;
SmallVector<VariableID> interface_variables;
Bitset flags;
@ -1026,6 +1038,9 @@ struct SPIRFunction : IVariant
// consider arrays value types.
SmallVector<ID> constant_arrays_needed_on_stack;
// Does this function (or any function called by it), emit geometry?
bool emits_geometry = false;
bool active = false;
bool flush_undeclared = true;
bool do_combined_parameters = true;
@ -1226,6 +1241,26 @@ struct SPIRConstant : IVariant
return u.f32;
}
static inline float fe4m3_to_f32(uint8_t v)
{
if ((v & 0x7f) == 0x7f)
{
union
{
float f32;
uint32_t u32;
} u;
u.u32 = (v & 0x80) ? 0xffffffffu : 0x7fffffffu;
return u.f32;
}
else
{
// Reuse the FP16 to FP32 code. Cute bit-hackery.
return f16_to_f32((int16_t(int8_t(v)) << 7) & (0xffff ^ 0x4000)) * 256.0f;
}
}
inline uint32_t specialization_constant_id(uint32_t col, uint32_t row) const
{
return m.c[col].id[row];
@ -1266,6 +1301,24 @@ struct SPIRConstant : IVariant
return f16_to_f32(scalar_u16(col, row));
}
inline float scalar_bf16(uint32_t col = 0, uint32_t row = 0) const
{
uint32_t v = scalar_u16(col, row) << 16;
float fp32;
memcpy(&fp32, &v, sizeof(float));
return fp32;
}
inline float scalar_floate4m3(uint32_t col = 0, uint32_t row = 0) const
{
return fe4m3_to_f32(scalar_u8(col, row));
}
inline float scalar_bf8(uint32_t col = 0, uint32_t row = 0) const
{
return f16_to_f32(scalar_u8(col, row) << 8);
}
inline float scalar_f32(uint32_t col = 0, uint32_t row = 0) const
{
return m.c[col].r[row].f32;
@ -1336,9 +1389,10 @@ struct SPIRConstant : IVariant
SPIRConstant() = default;
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized)
SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized, bool replicated_ = false)
: constant_type(constant_type_)
, specialization(specialized)
, replicated(replicated_)
{
subconstants.reserve(num_elements);
for (uint32_t i = 0; i < num_elements; i++)
@ -1410,9 +1464,16 @@ struct SPIRConstant : IVariant
// If true, this is a LUT, and should always be declared in the outer scope.
bool is_used_as_lut = false;
// If this is a null constant of array type with specialized length.
// May require special handling in initializer
bool is_null_array_specialized_length = false;
// For composites which are constant arrays, etc.
SmallVector<ConstantID> subconstants;
// Whether the subconstants are intended to be replicated (e.g. OpConstantCompositeReplicateEXT)
bool replicated = false;
// Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant,
// and uses them to initialize the constant. This allows the user
// to still be able to specialize the value by supplying corresponding
@ -1708,6 +1769,7 @@ struct Meta
uint32_t spec_id = 0;
uint32_t index = 0;
spv::FPRoundingMode fp_rounding_mode = spv::FPRoundingModeMax;
spv::FPFastMathModeMask fp_fast_math_mode = spv::FPFastMathModeMaskNone;
bool builtin = false;
bool qualified_alias_explicit_override = false;

View file

@ -82,7 +82,7 @@ bool Compiler::variable_storage_is_aliased(const SPIRVariable &v)
ir.meta[type.self].decoration.decoration_flags.get(DecorationBufferBlock);
bool image = type.basetype == SPIRType::Image;
bool counter = type.basetype == SPIRType::AtomicCounter;
bool buffer_reference = type.storage == StorageClassPhysicalStorageBufferEXT;
bool buffer_reference = type.storage == StorageClassPhysicalStorageBuffer;
bool is_restrict;
if (ssbo)
@ -171,6 +171,7 @@ bool Compiler::block_is_control_dependent(const SPIRBlock &block)
case OpGroupNonUniformLogicalXor:
case OpGroupNonUniformQuadBroadcast:
case OpGroupNonUniformQuadSwap:
case OpGroupNonUniformRotateKHR:
// Control barriers
case OpControlBarrier:
@ -210,6 +211,7 @@ bool Compiler::block_is_pure(const SPIRBlock &block)
case OpCopyMemory:
case OpStore:
case OpCooperativeMatrixStoreKHR:
{
auto &type = expression_type(ops[0]);
if (type.storage != StorageClassFunction)
@ -370,6 +372,7 @@ void Compiler::register_global_read_dependencies(const SPIRBlock &block, uint32_
}
case OpLoad:
case OpCooperativeMatrixLoadKHR:
case OpImageRead:
{
// If we're in a storage class which does not get invalidated, adding dependencies here is no big deal.
@ -481,7 +484,7 @@ void Compiler::register_write(uint32_t chain)
}
}
if (type.storage == StorageClassPhysicalStorageBufferEXT || variable_storage_is_aliased(*var))
if (type.storage == StorageClassPhysicalStorageBuffer || variable_storage_is_aliased(*var))
flush_all_aliased_variables();
else if (var)
flush_dependees(*var);
@ -587,6 +590,7 @@ const SPIRType &Compiler::expression_type(uint32_t id) const
bool Compiler::expression_is_lvalue(uint32_t id) const
{
auto &type = expression_type(id);
switch (type.basetype)
{
case SPIRType::SampledImage:
@ -818,6 +822,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
case OpAtomicStore:
case OpStore:
case OpCooperativeMatrixStoreKHR:
// Invalid SPIR-V.
if (length < 1)
return false;
@ -910,6 +915,7 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t
case OpInBoundsAccessChain:
case OpPtrAccessChain:
case OpLoad:
case OpCooperativeMatrixLoadKHR:
case OpCopyObject:
case OpImageTexelPointer:
case OpAtomicLoad:
@ -2364,6 +2370,10 @@ void Compiler::set_execution_mode(ExecutionMode mode, uint32_t arg0, uint32_t ar
execution.output_primitives = arg0;
break;
case ExecutionModeFPFastMathDefault:
execution.fp_fast_math_defaults[arg0] = arg1;
break;
default:
break;
}
@ -3461,6 +3471,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
switch (op)
{
case OpStore:
case OpCooperativeMatrixStoreKHR:
{
if (length < 2)
return false;
@ -3581,6 +3592,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3
}
case OpLoad:
case OpCooperativeMatrixLoadKHR:
{
if (length < 3)
return false;
@ -3800,6 +3812,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
switch (op)
{
case OpStore:
case OpCooperativeMatrixStoreKHR:
if (length < 2)
return false;
if (args[0] == variable_id)
@ -3810,6 +3823,7 @@ bool Compiler::StaticExpressionAccessHandler::handle(spv::Op op, const uint32_t
break;
case OpLoad:
case OpCooperativeMatrixLoadKHR:
if (length < 3)
return false;
if (args[2] == variable_id && static_expression == 0) // Tried to read from variable before it was initialized.
@ -4285,6 +4299,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
switch (op.op)
{
case OpStore:
case OpCooperativeMatrixStoreKHR:
case OpCopyMemory:
if (ops[0] == var)
return false;
@ -4323,6 +4338,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
case OpCopyObject:
case OpLoad:
case OpCooperativeMatrixLoadKHR:
if (ops[2] == var)
return true;
break;
@ -4350,6 +4366,39 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint
return true;
}
bool Compiler::GeometryEmitDisocveryHandler::handle(spv::Op opcode, const uint32_t *, uint32_t)
{
if (opcode == OpEmitVertex || opcode == OpEndPrimitive)
{
for (auto *func : function_stack)
func->emits_geometry = true;
}
return true;
}
bool Compiler::GeometryEmitDisocveryHandler::begin_function_scope(const uint32_t *stream, uint32_t)
{
auto &callee = compiler.get<SPIRFunction>(stream[2]);
function_stack.push_back(&callee);
return true;
}
bool Compiler::GeometryEmitDisocveryHandler::end_function_scope([[maybe_unused]] const uint32_t *stream, uint32_t)
{
assert(function_stack.back() == &compiler.get<SPIRFunction>(stream[2]));
function_stack.pop_back();
return true;
}
void Compiler::discover_geometry_emitters()
{
GeometryEmitDisocveryHandler handler(*this);
traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
}
Bitset Compiler::get_buffer_block_flags(VariableID id) const
{
return ir.get_buffer_block_flags(get<SPIRVariable>(id));
@ -4462,6 +4511,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
switch (opcode)
{
case OpStore:
case OpCooperativeMatrixStoreKHR:
if (length < 1)
return false;
@ -4478,6 +4528,7 @@ bool Compiler::ActiveBuiltinHandler::handle(spv::Op opcode, const uint32_t *args
case OpCopyObject:
case OpLoad:
case OpCooperativeMatrixLoadKHR:
if (length < 3)
return false;
@ -4910,13 +4961,16 @@ void Compiler::make_constant_null(uint32_t id, uint32_t type)
uint32_t parent_id = ir.increase_bound_by(1);
make_constant_null(parent_id, constant_type.parent_type);
if (!constant_type.array_size_literal.back())
SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
SmallVector<uint32_t> elements(constant_type.array.back());
for (uint32_t i = 0; i < constant_type.array.back(); i++)
// The array size of OpConstantNull can be either literal or specialization constant.
// In the latter case, we cannot take the value as-is, as it can be changed to anything.
// Rather, we assume it to be *one* for the sake of initializer.
bool is_literal_array_size = constant_type.array_size_literal.back();
uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
SmallVector<uint32_t> elements(count);
for (uint32_t i = 0; i < count; i++)
elements[i] = parent_id;
set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
auto &constant = set<SPIRConstant>(id, type, elements.data(), uint32_t(elements.size()), false);
constant.is_null_array_specialized_length = !is_literal_array_size;
}
else if (!constant_type.member_types.empty())
{
@ -5177,7 +5231,7 @@ bool Compiler::PhysicalStorageBufferPointerHandler::type_is_bda_block_entry(uint
uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_minimum_scalar_alignment(const SPIRType &type) const
{
if (type.storage == spv::StorageClassPhysicalStorageBufferEXT)
if (type.storage == spv::StorageClassPhysicalStorageBuffer)
return 8;
else if (type.basetype == SPIRType::Struct)
{
@ -5252,6 +5306,13 @@ bool Compiler::PhysicalStorageBufferPointerHandler::handle(Op op, const uint32_t
break;
}
case OpCooperativeMatrixLoadKHR:
case OpCooperativeMatrixStoreKHR:
{
// TODO: Can we meaningfully deal with this?
break;
}
default:
break;
}
@ -5274,6 +5335,10 @@ uint32_t Compiler::PhysicalStorageBufferPointerHandler::get_base_non_block_type_
void Compiler::PhysicalStorageBufferPointerHandler::analyze_non_block_types_from_block(const SPIRType &type)
{
if (analyzed_type_ids.count(type.self))
return;
analyzed_type_ids.insert(type.self);
for (auto &member : type.member_types)
{
auto &subtype = compiler.get<SPIRType>(member);
@ -5407,6 +5472,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
switch (opcode)
{
case OpLoad:
case OpCooperativeMatrixLoadKHR:
{
if (length < 3)
return false;
@ -5484,6 +5550,7 @@ bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_
case OpStore:
case OpImageWrite:
case OpAtomicStore:
case OpCooperativeMatrixStoreKHR:
{
if (length < 1)
return false;

View file

@ -1054,6 +1054,7 @@ protected:
std::unordered_set<uint32_t> non_block_types;
std::unordered_map<uint32_t, PhysicalBlockMeta> physical_block_type_meta;
std::unordered_map<uint32_t, PhysicalBlockMeta *> access_chain_to_physical_block;
std::unordered_set<uint32_t> analyzed_type_ids;
void mark_aligned_access(uint32_t id, const uint32_t *args, uint32_t length);
PhysicalBlockMeta *find_block_meta(uint32_t id) const;
@ -1072,6 +1073,22 @@ protected:
bool single_function);
bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);
struct GeometryEmitDisocveryHandler : OpcodeHandler
{
explicit GeometryEmitDisocveryHandler(Compiler &compiler_)
: compiler(compiler_)
{
}
Compiler &compiler;
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
bool begin_function_scope(const uint32_t *, uint32_t) override;
bool end_function_scope(const uint32_t *, uint32_t) override;
SmallVector<SPIRFunction *> function_stack;
};
void discover_geometry_emitters();
// Finds all resources that are written to from inside the critical section, if present.
// The critical section is delimited by OpBeginInvocationInterlockEXT and
// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written

View file

@ -452,6 +452,10 @@ void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument)
dec.fp_rounding_mode = static_cast<FPRoundingMode>(argument);
break;
case DecorationFPFastMathMode:
dec.fp_fast_math_mode = static_cast<FPFastMathModeMask>(argument);
break;
default:
break;
}
@ -643,6 +647,8 @@ uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const
return dec.index;
case DecorationFPRoundingMode:
return dec.fp_rounding_mode;
case DecorationFPFastMathMode:
return dec.fp_fast_math_mode;
default:
return 1;
}
@ -730,6 +736,10 @@ void ParsedIR::unset_decoration(ID id, Decoration decoration)
dec.fp_rounding_mode = FPRoundingModeMax;
break;
case DecorationFPFastMathMode:
dec.fp_fast_math_mode = FPFastMathModeMaskNone;
break;
case DecorationHlslCounterBufferGOOGLE:
{
auto &counter = meta[id].hlsl_magic_counter_buffer;
@ -1050,16 +1060,21 @@ void ParsedIR::make_constant_null(uint32_t id, uint32_t type, bool add_to_typed_
uint32_t parent_id = increase_bound_by(1);
make_constant_null(parent_id, constant_type.parent_type, add_to_typed_id_set);
if (!constant_type.array_size_literal.back())
SPIRV_CROSS_THROW("Array size of OpConstantNull must be a literal.");
// The array size of OpConstantNull can be either literal or specialization constant.
// In the latter case, we cannot take the value as-is, as it can be changed to anything.
// Rather, we assume it to be *one* for the sake of initializer.
bool is_literal_array_size = constant_type.array_size_literal.back();
uint32_t count = is_literal_array_size ? constant_type.array.back() : 1;
SmallVector<uint32_t> elements(constant_type.array.back());
for (uint32_t i = 0; i < constant_type.array.back(); i++)
SmallVector<uint32_t> elements(count);
for (uint32_t i = 0; i < count; i++)
elements[i] = parent_id;
if (add_to_typed_id_set)
add_typed_id(TypeConstant, id);
variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false).self = id;
auto& constant = variant_set<SPIRConstant>(ids[id], type, elements.data(), uint32_t(elements.size()), false);
constant.self = id;
constant.is_null_array_specialized_length = !is_literal_array_size;
}
else if (!constant_type.member_types.empty())
{

View file

@ -1,77 +0,0 @@
/*
* Copyright 2015-2021 Arm Limited
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* At your option, you may choose to accept this material under either:
* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
*/
#include "spirv_cross_util.hpp"
#include "spirv_common.hpp"
using namespace spv;
using namespace SPIRV_CROSS_NAMESPACE;
namespace spirv_cross_util
{
void rename_interface_variable(Compiler &compiler, const SmallVector<Resource> &resources, uint32_t location,
const std::string &name)
{
for (auto &v : resources)
{
if (!compiler.has_decoration(v.id, spv::DecorationLocation))
continue;
auto loc = compiler.get_decoration(v.id, spv::DecorationLocation);
if (loc != location)
continue;
auto &type = compiler.get_type(v.base_type_id);
// This is more of a friendly variant. If we need to rename interface variables, we might have to rename
// structs as well and make sure all the names match up.
if (type.basetype == SPIRType::Struct)
{
compiler.set_name(v.base_type_id, join("SPIRV_Cross_Interface_Location", location));
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
compiler.set_member_name(v.base_type_id, i, join("InterfaceMember", i));
}
compiler.set_name(v.id, name);
}
}
void inherit_combined_sampler_bindings(Compiler &compiler)
{
auto &samplers = compiler.get_combined_image_samplers();
for (auto &s : samplers)
{
if (compiler.has_decoration(s.image_id, spv::DecorationDescriptorSet))
{
uint32_t set = compiler.get_decoration(s.image_id, spv::DecorationDescriptorSet);
compiler.set_decoration(s.combined_id, spv::DecorationDescriptorSet, set);
}
if (compiler.has_decoration(s.image_id, spv::DecorationBinding))
{
uint32_t binding = compiler.get_decoration(s.image_id, spv::DecorationBinding);
compiler.set_decoration(s.combined_id, spv::DecorationBinding, binding);
}
}
}
} // namespace spirv_cross_util

View file

@ -1,37 +0,0 @@
/*
* Copyright 2015-2021 Arm Limited
* SPDX-License-Identifier: Apache-2.0 OR MIT
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/*
* At your option, you may choose to accept this material under either:
* 1. The Apache License, Version 2.0, found at <http://www.apache.org/licenses/LICENSE-2.0>, or
* 2. The MIT License, found at <http://opensource.org/licenses/MIT>.
*/
#ifndef SPIRV_CROSS_UTIL_HPP
#define SPIRV_CROSS_UTIL_HPP
#include "spirv_cross.hpp"
namespace spirv_cross_util
{
void rename_interface_variable(SPIRV_CROSS_NAMESPACE::Compiler &compiler,
const SPIRV_CROSS_NAMESPACE::SmallVector<SPIRV_CROSS_NAMESPACE::Resource> &resources,
uint32_t location, const std::string &name);
void inherit_combined_sampler_bindings(SPIRV_CROSS_NAMESPACE::Compiler &compiler);
} // namespace spirv_cross_util
#endif

File diff suppressed because it is too large Load diff

View file

@ -297,6 +297,9 @@ public:
float_formatter = formatter;
}
// Returns the macro name corresponding to constant id
std::string constant_value_macro_name(uint32_t id) const;
protected:
struct ShaderSubgroupSupportHelper
{
@ -450,6 +453,7 @@ protected:
virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0);
virtual bool variable_decl_is_remapped_storage(const SPIRVariable &var, spv::StorageClass storage) const;
virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id);
virtual void emit_workgroup_initialization(const SPIRVariable &var);
struct TextureFunctionBaseArguments
{
@ -622,6 +626,8 @@ protected:
const char *uint16_t_literal_suffix = "us";
const char *nonuniform_qualifier = "nonuniformEXT";
const char *boolean_mix_function = "mix";
const char *printf_function = "debugPrintfEXT";
std::string constant_null_initializer = "";
SPIRType::BaseType boolean_in_struct_remapped_type = SPIRType::Boolean;
bool swizzle_is_function = false;
bool shared_is_implied = false;
@ -629,6 +635,7 @@ protected:
bool explicit_struct_type = false;
bool use_initializer_list = false;
bool use_typed_initializer_list = false;
bool requires_matching_array_initializer = false;
bool can_declare_struct_inline = true;
bool can_declare_arrays_inline = true;
bool native_row_major_matrix = true;
@ -679,7 +686,6 @@ protected:
const SmallVector<uint32_t> &indices);
void emit_block_chain(SPIRBlock &block);
void emit_hoisted_temporaries(SmallVector<std::pair<TypeID, ID>> &temporaries);
std::string constant_value_macro_name(uint32_t id);
int get_constant_mapping_to_workgroup_component(const SPIRConstant &constant) const;
void emit_constant(const SPIRConstant &constant);
void emit_specialization_constant_op(const SPIRConstantOp &constant);
@ -695,6 +701,7 @@ protected:
void emit_variable_temporary_copies(const SPIRVariable &var);
bool should_dereference(uint32_t id);
bool should_dereference_caller_param(uint32_t id);
bool should_forward(uint32_t id) const;
bool should_suppress_usage_tracking(uint32_t id) const;
void emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, uint32_t right, uint32_t lerp);
@ -762,7 +769,7 @@ protected:
spv::StorageClass get_expression_effective_storage_class(uint32_t ptr);
virtual bool access_chain_needs_stage_io_builtin_translation(uint32_t base);
virtual void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
virtual bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type);
virtual bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type,
spv::StorageClass storage, bool &is_packed);
@ -792,8 +799,9 @@ protected:
std::string declare_temporary(uint32_t type, uint32_t id);
void emit_uninitialized_temporary(uint32_t type, uint32_t id);
SPIRExpression &emit_uninitialized_temporary_expression(uint32_t type, uint32_t id);
void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
virtual void append_global_func_args(const SPIRFunction &func, uint32_t index, SmallVector<std::string> &arglist);
std::string to_non_uniform_aware_expression(uint32_t id);
std::string to_atomic_ptr_expression(uint32_t id);
std::string to_expression(uint32_t id, bool register_expression_read = true);
std::string to_composite_constructor_expression(const SPIRType &parent_type, uint32_t id, bool block_like_type);
std::string to_rerolled_array_expression(const SPIRType &parent_type, const std::string &expr, const SPIRType &type);
@ -1009,6 +1017,8 @@ protected:
const Instruction *get_next_instruction_in_block(const Instruction &instr);
static uint32_t mask_relevant_memory_semantics(uint32_t semantics);
std::string convert_floate4m3_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_floate5m2_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_half_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_float_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);
std::string convert_double_to_string(const SPIRConstant &value, uint32_t col, uint32_t row);

File diff suppressed because it is too large Load diff

View file

@ -324,6 +324,8 @@ public:
// of the shader with the additional fixed sample mask.
uint32_t additional_fixed_sample_mask = 0xffffffff;
bool enable_point_size_builtin = true;
bool enable_point_size_default = false;
float default_point_size = 1.0f;
bool enable_frag_depth_builtin = true;
bool enable_frag_stencil_ref_builtin = true;
bool disable_rasterization = false;
@ -536,6 +538,14 @@ public:
// if the fragment does not modify the depth value.
bool input_attachment_is_ds_attachment = false;
// If BuiltInPosition is not written, automatically disable rasterization.
// The result can be queried with get_is_rasterization_disabled.
bool auto_disable_rasterization = false;
// Use Fast Math pragmas in MSL code, based on SPIR-V float controls and FP ExecutionModes.
// Requires MSL 3.2 or above, and has no effect with earlier MSL versions.
bool use_fast_math_pragmas = false;
bool is_ios() const
{
return platform == iOS;
@ -756,6 +766,19 @@ public:
void set_combined_sampler_suffix(const char *suffix);
const char *get_combined_sampler_suffix() const;
// Information about specialization constants that are translated into MSL macros
// instead of using function constant
// These must only be called after a successful call to CompilerMSL::compile().
bool specialization_constant_is_macro(uint32_t constant_id) const;
// Returns a mask of SPIR-V FP Fast Math Mode flags, that represents the set of flags that can be applied
// across all floating-point types. Each FPFastMathDefault execution mode operation identifies the flags
// for one floating-point type, and the value returned here is a bitwise-AND combination across all types.
// If incl_ops is enabled, the FPFastMathMode of any SPIR-V operations are also included in the bitwise-AND
// to determine the minimal fast-math that applies to all default execution modes and all operations.
// The returned value is also affected by execution modes SignedZeroInfNanPreserve and ContractionOff.
uint32_t get_fp_fast_math_flags(bool incl_ops);
protected:
// An enum of SPIR-V functions that are implemented in additional
// source code that is added to the shader if necessary.
@ -763,6 +786,7 @@ protected:
{
SPVFuncImplNone,
SPVFuncImplMod,
SPVFuncImplSMod,
SPVFuncImplRadians,
SPVFuncImplDegrees,
SPVFuncImplFindILsb,
@ -784,12 +808,11 @@ protected:
SPVFuncImplInverse4x4,
SPVFuncImplInverse3x3,
SPVFuncImplInverse2x2,
// It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's
// emitted before them.
SPVFuncImplForwardArgs,
// Likewise, this must come before *Swizzle.
// It is very important that this come before *Swizzle, to ensure it's emitted before them.
SPVFuncImplGetSwizzle,
SPVFuncImplTextureSwizzle,
SPVFuncImplGatherReturn,
SPVFuncImplGatherCompareReturn,
SPVFuncImplGatherSwizzle,
SPVFuncImplGatherCompareSwizzle,
SPVFuncImplGatherConstOffsets,
@ -806,6 +829,30 @@ protected:
SPVFuncImplSubgroupShuffleXor,
SPVFuncImplSubgroupShuffleUp,
SPVFuncImplSubgroupShuffleDown,
SPVFuncImplSubgroupRotate,
SPVFuncImplSubgroupClusteredAdd,
SPVFuncImplSubgroupClusteredFAdd = SPVFuncImplSubgroupClusteredAdd,
SPVFuncImplSubgroupClusteredIAdd = SPVFuncImplSubgroupClusteredAdd,
SPVFuncImplSubgroupClusteredMul,
SPVFuncImplSubgroupClusteredFMul = SPVFuncImplSubgroupClusteredMul,
SPVFuncImplSubgroupClusteredIMul = SPVFuncImplSubgroupClusteredMul,
SPVFuncImplSubgroupClusteredMin,
SPVFuncImplSubgroupClusteredFMin = SPVFuncImplSubgroupClusteredMin,
SPVFuncImplSubgroupClusteredSMin = SPVFuncImplSubgroupClusteredMin,
SPVFuncImplSubgroupClusteredUMin = SPVFuncImplSubgroupClusteredMin,
SPVFuncImplSubgroupClusteredMax,
SPVFuncImplSubgroupClusteredFMax = SPVFuncImplSubgroupClusteredMax,
SPVFuncImplSubgroupClusteredSMax = SPVFuncImplSubgroupClusteredMax,
SPVFuncImplSubgroupClusteredUMax = SPVFuncImplSubgroupClusteredMax,
SPVFuncImplSubgroupClusteredAnd,
SPVFuncImplSubgroupClusteredBitwiseAnd = SPVFuncImplSubgroupClusteredAnd,
SPVFuncImplSubgroupClusteredLogicalAnd = SPVFuncImplSubgroupClusteredAnd,
SPVFuncImplSubgroupClusteredOr,
SPVFuncImplSubgroupClusteredBitwiseOr = SPVFuncImplSubgroupClusteredOr,
SPVFuncImplSubgroupClusteredLogicalOr = SPVFuncImplSubgroupClusteredOr,
SPVFuncImplSubgroupClusteredXor,
SPVFuncImplSubgroupClusteredBitwiseXor = SPVFuncImplSubgroupClusteredXor,
SPVFuncImplSubgroupClusteredLogicalXor = SPVFuncImplSubgroupClusteredXor,
SPVFuncImplQuadBroadcast,
SPVFuncImplQuadSwap,
SPVFuncImplReflectScalar,
@ -841,6 +888,7 @@ protected:
SPVFuncImplTextureCast,
SPVFuncImplMulExtended,
SPVFuncImplSetMeshOutputsEXT,
SPVFuncImplAssume,
};
// If the underlying resource has been used for comparison then duplicate loads of that resource must be too
@ -858,6 +906,11 @@ protected:
void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override;
void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override;
void emit_subgroup_op(const Instruction &i) override;
void emit_subgroup_cluster_op(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0,
const char *op);
void emit_subgroup_cluster_op_cast(uint32_t result_type, uint32_t result_id, uint32_t cluster_size, uint32_t op0,
const char *op, SPIRType::BaseType input_type,
SPIRType::BaseType expected_result_type);
std::string to_texture_op(const Instruction &i, bool sparse, bool *forward,
SmallVector<uint32_t> &inherited_expressions) override;
void emit_fixup() override;
@ -872,6 +925,7 @@ protected:
void emit_mesh_entry_point();
void emit_mesh_outputs();
void emit_mesh_tasks(SPIRBlock &block) override;
void emit_workgroup_initialization(const SPIRVariable &var) override;
// Allow Metal to use the array<T> template to make arrays a value type
std::string type_to_array_glsl(const SPIRType &type, uint32_t variable_id) override;
@ -985,6 +1039,7 @@ protected:
void add_tess_level_input_to_interface_block(const std::string &ib_var_ref, SPIRType &ib_type, SPIRVariable &var);
void add_tess_level_input(const std::string &base_ref, const std::string &mbr_name, SPIRVariable &var);
void ensure_struct_members_valid_vecsizes(SPIRType &struct_type, uint32_t &location);
void fix_up_interface_member_indices(spv::StorageClass storage, uint32_t ib_type_id);
void mark_location_as_used_by_shader(uint32_t location, const SPIRType &type,
@ -1069,7 +1124,8 @@ protected:
bool validate_member_packing_rules_msl(const SPIRType &type, uint32_t index) const;
std::string get_argument_address_space(const SPIRVariable &argument);
std::string get_type_address_space(const SPIRType &type, uint32_t id, bool argument = false);
static bool decoration_flags_signal_volatile(const Bitset &flags);
bool decoration_flags_signal_volatile(const Bitset &flags) const;
bool decoration_flags_signal_coherent(const Bitset &flags) const;
const char *to_restrict(uint32_t id, bool space);
SPIRType &get_stage_in_struct_type();
SPIRType &get_stage_out_struct_type();
@ -1082,7 +1138,7 @@ protected:
uint32_t mem_order_1, uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
bool op1_is_pointer = false, bool op1_is_literal = false, uint32_t op2 = 0);
const char *get_memory_order(uint32_t spv_mem_sem);
void add_pragma_line(const std::string &line);
void add_pragma_line(const std::string &line, bool recompile_on_unique);
void add_typedef_line(const std::string &line);
void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem);
bool emit_array_copy(const char *expr, uint32_t lhs_id, uint32_t rhs_id,
@ -1133,12 +1189,13 @@ protected:
void emit_store_statement(uint32_t lhs_expression, uint32_t rhs_expression) override;
void analyze_sampled_image_usage();
void analyze_workgroup_variables();
bool access_chain_needs_stage_io_builtin_translation(uint32_t base) override;
bool prepare_access_chain_for_scalar_access(std::string &expr, const SPIRType &type, spv::StorageClass storage,
bool &is_packed) override;
void fix_up_interpolant_access_chain(const uint32_t *ops, uint32_t length);
void check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
bool check_physical_type_cast(std::string &expr, const SPIRType *type, uint32_t physical_type) override;
bool emit_tessellation_access_chain(const uint32_t *ops, uint32_t length);
bool emit_tessellation_io_load(uint32_t result_type, uint32_t id, uint32_t ptr);
@ -1164,9 +1221,10 @@ protected:
std::unordered_map<uint32_t, uint32_t> fragment_output_components;
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_input_location;
std::unordered_map<uint32_t, uint32_t> builtin_to_automatic_output_location;
std::set<std::string> pragma_lines;
std::set<std::string> typedef_lines;
std::vector<std::string> pragma_lines;
std::vector<std::string> typedef_lines;
SmallVector<uint32_t> vars_needing_early_declaration;
std::unordered_set<uint32_t> constant_macro_ids;
std::unordered_map<StageSetBinding, std::pair<MSLResourceBinding, bool>, InternalHasher> resource_bindings;
std::unordered_map<StageSetBinding, uint32_t, InternalHasher> resource_arg_buff_idx_to_binding_number;
@ -1210,11 +1268,14 @@ protected:
bool needs_swizzle_buffer_def = false;
bool used_swizzle_buffer = false;
bool added_builtin_tess_level = false;
bool needs_local_invocation_index = false;
bool needs_subgroup_invocation_id = false;
bool needs_subgroup_size = false;
bool needs_sample_id = false;
bool needs_helper_invocation = false;
bool needs_workgroup_zero_init = false;
bool writes_to_depth = false;
bool writes_to_point_size = false;
std::string qual_pos_var_name;
std::string stage_in_var_name = "in";
std::string stage_out_var_name = "out";
@ -1276,6 +1337,7 @@ protected:
bool suppress_missing_prototypes = false;
bool suppress_incompatible_pointer_types_discard_qualifiers = false;
bool suppress_sometimes_unitialized = false;
void add_spv_func_and_recompile(SPVFuncImpl spv_func);
@ -1308,7 +1370,7 @@ protected:
}
bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override;
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args);
CompilerMSL::SPVFuncImpl get_spv_func_impl(spv::Op opcode, const uint32_t *args, uint32_t length);
void check_resource_write(uint32_t var_id);
CompilerMSL &compiler;
@ -1319,6 +1381,7 @@ protected:
bool uses_image_write = false;
bool uses_buffer_write = false;
bool uses_discard = false;
bool needs_local_invocation_index = false;
bool needs_subgroup_invocation_id = false;
bool needs_subgroup_size = false;
bool needs_sample_id = false;

View file

@ -305,6 +305,7 @@ void Parser::parse(const Instruction &instruction)
}
case OpExtInst:
case OpExtInstWithForwardRefsKHR:
{
// The SPIR-V debug information extended instructions might come at global scope.
if (current_block)
@ -380,13 +381,21 @@ void Parser::parse(const Instruction &instruction)
auto mode = static_cast<ExecutionMode>(ops[1]);
execution.flags.set(mode);
if (mode == ExecutionModeLocalSizeId)
switch (mode)
{
case ExecutionModeLocalSizeId:
execution.workgroup_size.id_x = ops[2];
execution.workgroup_size.id_y = ops[3];
execution.workgroup_size.id_z = ops[4];
}
break;
case ExecutionModeFPFastMathDefault:
execution.fp_fast_math_defaults[ops[2]] = ops[3];
break;
default:
break;
}
break;
}
@ -536,12 +545,37 @@ void Parser::parse(const Instruction &instruction)
uint32_t id = ops[0];
uint32_t width = ops[1];
auto &type = set<SPIRType>(id, op);
if (width != 16 && width != 8 && length > 2)
SPIRV_CROSS_THROW("Unrecognized FP encoding mode for OpTypeFloat.");
if (width == 64)
type.basetype = SPIRType::Double;
else if (width == 32)
type.basetype = SPIRType::Float;
else if (width == 16)
type.basetype = SPIRType::Half;
{
if (length > 2)
{
if (ops[2] == spv::FPEncodingBFloat16KHR)
type.basetype = SPIRType::BFloat16;
else
SPIRV_CROSS_THROW("Unrecognized encoding for OpTypeFloat 16.");
}
else
type.basetype = SPIRType::Half;
}
else if (width == 8)
{
if (length < 2)
SPIRV_CROSS_THROW("Missing encoding for OpTypeFloat 8.");
else if (ops[2] == spv::FPEncodingFloat8E4M3EXT)
type.basetype = SPIRType::FloatE4M3;
else if (ops[2] == spv::FPEncodingFloat8E5M2EXT)
type.basetype = SPIRType::FloatE5M2;
else
SPIRV_CROSS_THROW("Invalid encoding for OpTypeFloat 8.");
}
else
SPIRV_CROSS_THROW("Unrecognized bit-width of floating point type.");
type.width = width;
@ -592,6 +626,22 @@ void Parser::parse(const Instruction &instruction)
break;
}
case OpTypeCooperativeMatrixKHR:
{
uint32_t id = ops[0];
auto &base = get<SPIRType>(ops[1]);
auto &matrixbase = set<SPIRType>(id, base);
matrixbase.op = op;
matrixbase.cooperative.scope_id = ops[2];
matrixbase.cooperative.rows_id = ops[3];
matrixbase.cooperative.columns_id = ops[4];
matrixbase.cooperative.use_id = ops[5];
matrixbase.self = id;
matrixbase.parent_type = ops[1];
break;
}
case OpTypeArray:
{
uint32_t id = ops[0];
@ -835,17 +885,27 @@ void Parser::parse(const Instruction &instruction)
break;
}
// Constants
// Constants
case OpSpecConstant:
case OpConstant:
case OpConstantCompositeReplicateEXT:
case OpSpecConstantCompositeReplicateEXT:
{
uint32_t id = ops[1];
auto &type = get<SPIRType>(ops[0]);
if (type.width > 32)
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
if (op == OpConstantCompositeReplicateEXT || op == OpSpecConstantCompositeReplicateEXT)
{
auto subconstant = uint32_t(ops[2]);
set<SPIRConstant>(id, ops[0], &subconstant, 1, op == OpSpecConstantCompositeReplicateEXT, true);
}
else
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
{
if (type.width > 32)
set<SPIRConstant>(id, ops[0], ops[2] | (uint64_t(ops[3]) << 32), op == OpSpecConstant);
else
set<SPIRConstant>(id, ops[0], ops[2], op == OpSpecConstant);
}
break;
}