mirror of
https://github.com/godotengine/godot.git
synced 2025-10-19 16:03:29 +00:00
Update meshoptimizer to v0.25
Also expose new flags as SurfaceTool enums for future use
This commit is contained in:
parent
21fbf033f7
commit
90ff46c292
8 changed files with 1162 additions and 143 deletions
|
@ -89,6 +89,10 @@ public:
|
|||
SIMPLIFY_ERROR_ABSOLUTE = 1 << 2, // From meshopt_SimplifyErrorAbsolute
|
||||
/* Remove disconnected parts of the mesh during simplification incrementally, regardless of the topological restrictions inside components. */
|
||||
SIMPLIFY_PRUNE = 1 << 3, // From meshopt_SimplifyPrune
|
||||
/* Produce more regular triangle sizes and shapes during simplification, at some cost to geometric quality. */
|
||||
SIMPLIFY_REGULARIZE = 1 << 4, // From meshopt_SimplifyRegularize
|
||||
/* Allow collapses across attribute discontinuities, except for vertices that are tagged with 0x02 in vertex_lock. */
|
||||
SIMPLIFY_PERMISSIVE = 1 << 5, // From meshopt_SimplifyPermissive
|
||||
};
|
||||
|
||||
typedef void (*OptimizeVertexCacheFunc)(unsigned int *destination, const unsigned int *indices, size_t index_count, size_t vertex_count);
|
||||
|
|
2
thirdparty/README.md
vendored
2
thirdparty/README.md
vendored
|
@ -679,7 +679,7 @@ Patches:
|
|||
## meshoptimizer
|
||||
|
||||
- Upstream: https://github.com/zeux/meshoptimizer
|
||||
- Version: 0.24 (7b2d4f4c817aea55d74dcd65d9763ac2ca608026, 2025)
|
||||
- Version: 0.25 (6daea4695c48338363b08022d2fb15deaef6ac09, 2025)
|
||||
- License: MIT
|
||||
|
||||
Files extracted from upstream repository:
|
||||
|
|
13
thirdparty/meshoptimizer/allocator.cpp
vendored
13
thirdparty/meshoptimizer/allocator.cpp
vendored
|
@ -1,8 +1,17 @@
|
|||
// This file is part of meshoptimizer library; see meshoptimizer.h for version/license details
|
||||
#include "meshoptimizer.h"
|
||||
|
||||
#ifdef MESHOPTIMIZER_ALLOC_EXPORT
|
||||
meshopt_Allocator::Storage& meshopt_Allocator::storage()
|
||||
{
|
||||
static Storage s = {::operator new, ::operator delete };
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
||||
void meshopt_setAllocator(void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t), void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*))
|
||||
{
|
||||
meshopt_Allocator::Storage::allocate = allocate;
|
||||
meshopt_Allocator::Storage::deallocate = deallocate;
|
||||
meshopt_Allocator::Storage& s = meshopt_Allocator::storage();
|
||||
s.allocate = allocate;
|
||||
s.deallocate = deallocate;
|
||||
}
|
||||
|
|
25
thirdparty/meshoptimizer/indexgenerator.cpp
vendored
25
thirdparty/meshoptimizer/indexgenerator.cpp
vendored
|
@ -439,6 +439,31 @@ void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const uns
|
|||
generateShadowBuffer(destination, indices, index_count, vertex_count, hasher, allocator);
|
||||
}
|
||||
|
||||
void meshopt_generatePositionRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(vertex_positions_stride >= 12 && vertex_positions_stride <= 256);
|
||||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
|
||||
meshopt_Allocator allocator;
|
||||
VertexCustomHasher hasher = {vertex_positions, vertex_positions_stride / sizeof(float), NULL, NULL};
|
||||
|
||||
size_t table_size = hashBuckets(vertex_count);
|
||||
unsigned int* table = allocator.allocate<unsigned int>(table_size);
|
||||
memset(table, -1, table_size * sizeof(unsigned int));
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
unsigned int* entry = hashLookup(table, table_size, hasher, unsigned(i), ~0u);
|
||||
|
||||
if (*entry == ~0u)
|
||||
*entry = unsigned(i);
|
||||
|
||||
destination[i] = *entry;
|
||||
}
|
||||
}
|
||||
|
||||
void meshopt_generateAdjacencyIndexBuffer(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
|
170
thirdparty/meshoptimizer/meshoptimizer.h
vendored
170
thirdparty/meshoptimizer/meshoptimizer.h
vendored
|
@ -1,5 +1,5 @@
|
|||
/**
|
||||
* meshoptimizer - version 0.24
|
||||
* meshoptimizer - version 0.25
|
||||
*
|
||||
* Copyright (C) 2016-2025, by Arseny Kapoulkine (arseny.kapoulkine@gmail.com)
|
||||
* Report bugs and download new versions at https://github.com/zeux/meshoptimizer
|
||||
|
@ -12,7 +12,7 @@
|
|||
#include <stddef.h>
|
||||
|
||||
/* Version macro; major * 1000 + minor * 10 + patch */
|
||||
#define MESHOPTIMIZER_VERSION 240 /* 0.24 */
|
||||
#define MESHOPTIMIZER_VERSION 250 /* 0.25 */
|
||||
|
||||
/* If no API is defined, assume default */
|
||||
#ifndef MESHOPTIMIZER_API
|
||||
|
@ -75,7 +75,7 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemap(unsigned int* destination,
|
|||
MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
/**
|
||||
* Experimental: Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
|
||||
* Generates a vertex remap table from the vertex buffer and an optional index buffer and returns number of unique vertices
|
||||
* As a result, all vertices that are equivalent map to the same (new) location, with no gaps in the resulting sequence.
|
||||
* Equivalence is checked in two steps: vertex positions are compared for equality, and then the user-specified equality function is called (if provided).
|
||||
* Resulting remap table maps old vertices to new vertices and can be used in meshopt_remapVertexBuffer/meshopt_remapIndexBuffer.
|
||||
|
@ -85,7 +85,7 @@ MESHOPTIMIZER_API size_t meshopt_generateVertexRemapMulti(unsigned int* destinat
|
|||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* callback can be NULL if no additional equality check is needed; otherwise, it should return 1 if vertices with specified indices are equivalent and 0 if they are not
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context);
|
||||
MESHOPTIMIZER_API size_t meshopt_generateVertexRemapCustom(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, int (*callback)(void*, unsigned int, unsigned int), void* context);
|
||||
|
||||
/**
|
||||
* Generates vertex buffer from the source vertex buffer and remap table generated by meshopt_generateVertexRemap
|
||||
|
@ -124,6 +124,16 @@ MESHOPTIMIZER_API void meshopt_generateShadowIndexBuffer(unsigned int* destinati
|
|||
*/
|
||||
MESHOPTIMIZER_API void meshopt_generateShadowIndexBufferMulti(unsigned int* destination, const unsigned int* indices, size_t index_count, size_t vertex_count, const struct meshopt_Stream* streams, size_t stream_count);
|
||||
|
||||
/**
|
||||
* Experimental: Generates a remap table that maps all vertices with the same position to the same (existing) index.
|
||||
* Similarly to meshopt_generateShadowIndexBuffer, this can be helpful to pre-process meshes for position-only rendering.
|
||||
* This can also be used to implement algorithms that require positional-only connectivity, such as hierarchical simplification.
|
||||
*
|
||||
* destination must contain enough space for the resulting remap table (vertex_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_generatePositionRemap(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Generate index buffer that can be used as a geometry shader input with triangle adjacency topology
|
||||
* Each triangle is converted into a 6-vertex patch with the following layout:
|
||||
|
@ -155,7 +165,7 @@ MESHOPTIMIZER_API void meshopt_generateTessellationIndexBuffer(unsigned int* des
|
|||
|
||||
/**
|
||||
* Generate index buffer that can be used for visibility buffer rendering and returns the size of the reorder table
|
||||
* Each triangle's provoking vertex index is equal to primitive id; this allows passing it to the fragment shader using nointerpolate attribute.
|
||||
* Each triangle's provoking vertex index is equal to primitive id; this allows passing it to the fragment shader using flat/nointerpolation attribute.
|
||||
* This is important for performance on hardware where primitive id can't be accessed efficiently in fragment shader.
|
||||
* The reorder table stores the original vertex id for each vertex in the new index buffer, and should be used in the vertex shader to load vertex data.
|
||||
* The provoking vertex is assumed to be the first vertex in the triangle; if this is not the case (OpenGL), rotate each triangle (abc -> bca) before rendering.
|
||||
|
@ -298,7 +308,7 @@ MESHOPTIMIZER_API size_t meshopt_encodeVertexBuffer(unsigned char* buffer, size_
|
|||
MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, size_t vertex_size);
|
||||
|
||||
/**
|
||||
* Experimental: Vertex buffer encoder
|
||||
* Vertex buffer encoder
|
||||
* Encodes vertex data just like meshopt_encodeVertexBuffer, but allows to override compression level.
|
||||
* For compression level to take effect, the vertex encoding version must be set to 1.
|
||||
* The default compression level implied by meshopt_encodeVertexBuffer is 2.
|
||||
|
@ -306,7 +316,7 @@ MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferBound(size_t vertex_count, si
|
|||
* level should be in the range [0, 3] with 0 being the fastest and 3 being the slowest and producing the best compression ratio.
|
||||
* version should be -1 to use the default version (specified via meshopt_encodeVertexVersion), or 0/1 to override the version; per above, level won't take effect if version is 0.
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version);
|
||||
MESHOPTIMIZER_API size_t meshopt_encodeVertexBufferLevel(unsigned char* buffer, size_t buffer_size, const void* vertices, size_t vertex_count, size_t vertex_size, int level, int version);
|
||||
|
||||
/**
|
||||
* Set vertex encoder format version
|
||||
|
@ -343,10 +353,14 @@ MESHOPTIMIZER_API int meshopt_decodeVertexVersion(const unsigned char* buffer, s
|
|||
*
|
||||
* meshopt_decodeFilterExp decodes exponential encoding of floating-point data with 8-bit exponent and 24-bit integer mantissa as 2^E*M.
|
||||
* Each 32-bit component is decoded in isolation; stride must be divisible by 4.
|
||||
*
|
||||
* Experimental: meshopt_decodeFilterColor decodes YCoCg (+A) color encoding where RGB is converted to YCoCg space with variable bit quantization.
|
||||
* Each component is stored as an 8-bit or 16-bit normalized integer; stride must be equal to 4 or 8.
|
||||
*/
|
||||
MESHOPTIMIZER_API void meshopt_decodeFilterOct(void* buffer, size_t count, size_t stride);
|
||||
MESHOPTIMIZER_API void meshopt_decodeFilterQuat(void* buffer, size_t count, size_t stride);
|
||||
MESHOPTIMIZER_API void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_decodeFilterColor(void* buffer, size_t count, size_t stride);
|
||||
|
||||
/**
|
||||
* Vertex buffer filter encoders
|
||||
|
@ -363,6 +377,10 @@ MESHOPTIMIZER_API void meshopt_decodeFilterExp(void* buffer, size_t count, size_
|
|||
* meshopt_encodeFilterExp encodes arbitrary (finite) floating-point data with 8-bit exponent and K-bit integer mantissa (1 <= K <= 24).
|
||||
* Exponent can be shared between all components of a given vector as defined by stride or all values of a given component; stride must be divisible by 4.
|
||||
* Input data must contain stride/4 floats for every vector (count*stride/4 total).
|
||||
*
|
||||
* Experimental: meshopt_encodeFilterColor encodes RGBA color data by converting RGB to YCoCg color space with variable bit quantization.
|
||||
* Each component is stored as an 8-bit or 16-bit integer; stride must be equal to 4 or 8.
|
||||
* Input data must contain 4 floats for every color (count*4 total).
|
||||
*/
|
||||
enum meshopt_EncodeExpMode
|
||||
{
|
||||
|
@ -379,6 +397,7 @@ enum meshopt_EncodeExpMode
|
|||
MESHOPTIMIZER_API void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data);
|
||||
MESHOPTIMIZER_API void meshopt_encodeFilterQuat(void* destination, size_t count, size_t stride, int bits, const float* data);
|
||||
MESHOPTIMIZER_API void meshopt_encodeFilterExp(void* destination, size_t count, size_t stride, int bits, const float* data, enum meshopt_EncodeExpMode mode);
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_encodeFilterColor(void* destination, size_t count, size_t stride, int bits, const float* data);
|
||||
|
||||
/**
|
||||
* Simplification options
|
||||
|
@ -391,18 +410,34 @@ enum
|
|||
meshopt_SimplifySparse = 1 << 1,
|
||||
/* Treat error limit and resulting error as absolute instead of relative to mesh extents. */
|
||||
meshopt_SimplifyErrorAbsolute = 1 << 2,
|
||||
/* Experimental: remove disconnected parts of the mesh during simplification incrementally, regardless of the topological restrictions inside components. */
|
||||
/* Remove disconnected parts of the mesh during simplification incrementally, regardless of the topological restrictions inside components. */
|
||||
meshopt_SimplifyPrune = 1 << 3,
|
||||
/* Experimental: Produce more regular triangle sizes and shapes during simplification, at some cost to geometric quality. */
|
||||
meshopt_SimplifyRegularize = 1 << 4,
|
||||
/* Experimental: Allow collapses across attribute discontinuities, except for vertices that are tagged with meshopt_SimplifyVertex_Protect in vertex_lock. */
|
||||
meshopt_SimplifyPermissive = 1 << 5,
|
||||
};
|
||||
|
||||
/**
|
||||
* Experimental: Simplification vertex flags/locks, for use in `vertex_lock` arrays in simplification APIs
|
||||
*/
|
||||
enum
|
||||
{
|
||||
/* Do not move this vertex. */
|
||||
meshopt_SimplifyVertex_Lock = 1 << 0,
|
||||
/* Protect attribute discontinuity at this vertex; must be used together with meshopt_SimplifyPermissive option. */
|
||||
meshopt_SimplifyVertex_Protect = 1 << 1,
|
||||
};
|
||||
|
||||
/**
|
||||
* Mesh simplifier
|
||||
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible
|
||||
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
|
||||
* If not all attributes from the input mesh are required, it's recommended to reindex the mesh without them prior to simplification.
|
||||
* If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
*
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
* If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
|
@ -414,50 +449,86 @@ MESHOPTIMIZER_API size_t meshopt_simplify(unsigned int* destination, const unsig
|
|||
|
||||
/**
|
||||
* Mesh simplifier with attribute metric
|
||||
* The algorithm enhances meshopt_simplify by incorporating attribute values into the error metric used to prioritize simplification order; see meshopt_simplify documentation for details.
|
||||
* Note that the number of attributes affects memory requirements and running time; this algorithm requires ~1.5x more memory and time compared to meshopt_simplify when using 4 scalar attributes.
|
||||
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible.
|
||||
* Similar to meshopt_simplify, but incorporates attribute values into the error metric used to prioritize simplification order.
|
||||
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
|
||||
* If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
*
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
* Note that the number of attributes with non-zero weights affects memory requirements and running time.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* vertex_attributes should have attribute_count floats for each vertex
|
||||
* attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position
|
||||
* attribute_count must be <= 32
|
||||
* vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
* options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default
|
||||
* result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
|
||||
*/
|
||||
MESHOPTIMIZER_API size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier with position/attribute update
|
||||
* Reduces the number of triangles in the mesh, attempting to preserve mesh appearance as much as possible.
|
||||
* Similar to meshopt_simplifyWithAttributes, but destructively updates positions and attribute values for optimal appearance.
|
||||
* The algorithm tries to preserve mesh topology and can stop short of the target goal based on topology constraints or target error.
|
||||
* If not all attributes from the input mesh are needed, it's recommended to reindex the mesh without them prior to simplification.
|
||||
* Returns the number of indices after simplification, indices are destructively updated with new index data
|
||||
*
|
||||
* The updated index buffer references vertices from the original vertex buffer, however the vertex positions and attributes are updated in-place.
|
||||
* Creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended; if the original vertex data is needed, it should be copied before simplification.
|
||||
* Note that the number of attributes with non-zero weights affects memory requirements and running time. Attributes with zero weights are not updated.
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* vertex_attributes should have attribute_count floats for each vertex
|
||||
* attribute_weights should have attribute_count floats in total; the weights determine relative priority of attributes between each other and wrt position
|
||||
* attribute_count must be <= 32
|
||||
* vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; 1 denotes vertices that can't be moved
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
* options must be a bitmask composed of meshopt_SimplifyX options; 0 is a safe default
|
||||
* result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyWithUpdate(unsigned int* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier (sloppy)
|
||||
* Reduces the number of triangles in the mesh, sacrificing mesh appearance for simplification performance
|
||||
* The algorithm doesn't preserve mesh topology but can stop short of the target goal based on target error.
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
* If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements (*not* target_index_count)!
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* vertex_lock can be NULL; when it's not NULL, it should have a value for each vertex; vertices that can't be moved should set 1 consistently for all indices with the same position
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
* result_error can be NULL; when it's not NULL, it will contain the resulting (relative) error after simplification
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error);
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* result_error);
|
||||
|
||||
/**
|
||||
* Experimental: Mesh simplifier (pruner)
|
||||
* Mesh simplifier (pruner)
|
||||
* Reduces the number of triangles in the mesh by removing small isolated parts of the mesh
|
||||
* Returns the number of indices after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
* If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer, worst case is index_count elements
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
* target_error represents the error relative to mesh extents that can be tolerated, e.g. 0.01 = 1% deformation; value range [0..1]
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error);
|
||||
MESHOPTIMIZER_API size_t meshopt_simplifyPrune(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error);
|
||||
|
||||
/**
|
||||
* Point cloud simplifier
|
||||
* Reduces the number of points in the cloud to reach the given target
|
||||
* Returns the number of points after simplification, with destination containing new index data
|
||||
* The resulting index buffer references vertices from the original vertex buffer.
|
||||
* If the original vertex data isn't required, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
* If the original vertex data isn't needed, creating a compact vertex buffer using meshopt_optimizeVertexFetch is recommended.
|
||||
*
|
||||
* destination must contain enough space for the target index buffer (target_vertex_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
|
@ -548,12 +619,12 @@ struct meshopt_CoverageStatistics
|
|||
};
|
||||
|
||||
/**
|
||||
* Experimental: Coverage analyzer
|
||||
* Coverage analyzer
|
||||
* Returns coverage statistics (ratio of viewport pixels covered from each axis) using a software rasterizer
|
||||
*
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
MESHOPTIMIZER_API struct meshopt_CoverageStatistics meshopt_analyzeCoverage(const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Meshlet is a small mesh cluster (subset) that consists of:
|
||||
|
@ -674,26 +745,26 @@ MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeClusterBounds(const unsig
|
|||
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeMeshletBounds(const unsigned int* meshlet_vertices, const unsigned char* meshlet_triangles, size_t triangle_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Sphere bounds generator
|
||||
* Sphere bounds generator
|
||||
* Creates bounding sphere around a set of points or a set of spheres; returns the center and radius of the sphere, with other fields of the result set to 0.
|
||||
*
|
||||
* positions should have float3 position in the first 12 bytes of each element
|
||||
* radii can be NULL; when it's not NULL, it should have a non-negative float radius in the first 4 bytes of each element
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL struct meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride);
|
||||
MESHOPTIMIZER_API struct meshopt_Bounds meshopt_computeSphereBounds(const float* positions, size_t count, size_t positions_stride, const float* radii, size_t radii_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Cluster partitioner
|
||||
* Cluster partitioner
|
||||
* Partitions clusters into groups of similar size, prioritizing grouping clusters that share vertices or are close to each other.
|
||||
*
|
||||
* destination must contain enough space for the resulting partiotion data (cluster_count elements)
|
||||
* destination must contain enough space for the resulting partition data (cluster_count elements)
|
||||
* destination[i] will contain the partition id for cluster i, with the total number of partitions returned by the function
|
||||
* cluster_indices should have the vertex indices referenced by each cluster, stored sequentially
|
||||
* cluster_index_counts should have the number of indices in each cluster; sum of all cluster_index_counts must be equal to total_index_count
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex (or can be NULL if not used)
|
||||
* target_partition_size is a target size for each partition, in clusters; the resulting partitions may be smaller or larger
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size);
|
||||
MESHOPTIMIZER_API size_t meshopt_partitionClusters(unsigned int* destination, const unsigned int* cluster_indices, size_t total_index_count, const unsigned int* cluster_index_counts, size_t cluster_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_partition_size);
|
||||
|
||||
/**
|
||||
* Spatial sorter
|
||||
|
@ -715,14 +786,14 @@ MESHOPTIMIZER_API void meshopt_spatialSortRemap(unsigned int* destination, const
|
|||
MESHOPTIMIZER_API void meshopt_spatialSortTriangles(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride);
|
||||
|
||||
/**
|
||||
* Experimental: Spatial clusterizer
|
||||
* Spatial clusterizer
|
||||
* Reorders points into clusters optimized for spatial locality, and generates a new index buffer.
|
||||
* Ensures the output can be split into cluster_size chunks where each chunk has good positional locality. Only the last chunk will be smaller than cluster_size.
|
||||
*
|
||||
* destination must contain enough space for the resulting index buffer (vertex_count elements)
|
||||
* vertex_positions should have float3 position in the first 12 bytes of each vertex
|
||||
*/
|
||||
MESHOPTIMIZER_EXPERIMENTAL void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size);
|
||||
MESHOPTIMIZER_API void meshopt_spatialClusterPoints(unsigned int* destination, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t cluster_size);
|
||||
|
||||
/**
|
||||
* Quantize a float into half-precision (as defined by IEEE-754 fp16) floating point value
|
||||
|
@ -829,6 +900,8 @@ inline size_t meshopt_simplify(T* destination, const T* indices, size_t index_co
|
|||
template <typename T>
|
||||
inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifyWithUpdate(T* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options = 0, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error = NULL);
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifyPrune(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float target_error);
|
||||
|
@ -890,14 +963,21 @@ inline int meshopt_quantizeSnorm(float v, int N)
|
|||
class meshopt_Allocator
|
||||
{
|
||||
public:
|
||||
template <typename T>
|
||||
struct StorageT
|
||||
struct Storage
|
||||
{
|
||||
static void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t);
|
||||
static void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*);
|
||||
void* (MESHOPTIMIZER_ALLOC_CALLCONV* allocate)(size_t);
|
||||
void (MESHOPTIMIZER_ALLOC_CALLCONV* deallocate)(void*);
|
||||
};
|
||||
|
||||
typedef StorageT<void> Storage;
|
||||
#ifdef MESHOPTIMIZER_ALLOC_EXPORT
|
||||
MESHOPTIMIZER_API static Storage& storage();
|
||||
#else
|
||||
static Storage& storage()
|
||||
{
|
||||
static Storage s = {::operator new, ::operator delete };
|
||||
return s;
|
||||
}
|
||||
#endif
|
||||
|
||||
meshopt_Allocator()
|
||||
: blocks()
|
||||
|
@ -908,14 +988,14 @@ public:
|
|||
~meshopt_Allocator()
|
||||
{
|
||||
for (size_t i = count; i > 0; --i)
|
||||
Storage::deallocate(blocks[i - 1]);
|
||||
storage().deallocate(blocks[i - 1]);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
T* allocate(size_t size)
|
||||
{
|
||||
assert(count < sizeof(blocks) / sizeof(blocks[0]));
|
||||
T* result = static_cast<T*>(Storage::allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
|
||||
T* result = static_cast<T*>(storage().allocate(size > size_t(-1) / sizeof(T) ? size_t(-1) : size * sizeof(T)));
|
||||
blocks[count++] = result;
|
||||
return result;
|
||||
}
|
||||
|
@ -923,7 +1003,7 @@ public:
|
|||
void deallocate(void* ptr)
|
||||
{
|
||||
assert(count > 0 && blocks[count - 1] == ptr);
|
||||
Storage::deallocate(ptr);
|
||||
storage().deallocate(ptr);
|
||||
count--;
|
||||
}
|
||||
|
||||
|
@ -931,12 +1011,6 @@ private:
|
|||
void* blocks[24];
|
||||
size_t count;
|
||||
};
|
||||
|
||||
// This makes sure that allocate/deallocate are lazily generated in translation units that need them and are deduplicated by the linker
|
||||
template <typename T>
|
||||
void* (MESHOPTIMIZER_ALLOC_CALLCONV* meshopt_Allocator::StorageT<T>::allocate)(size_t) = operator new;
|
||||
template <typename T>
|
||||
void (MESHOPTIMIZER_ALLOC_CALLCONV* meshopt_Allocator::StorageT<T>::deallocate)(void*) = operator delete;
|
||||
#endif
|
||||
|
||||
/* Inline implementation for C++ templated wrappers */
|
||||
|
@ -958,7 +1032,7 @@ struct meshopt_IndexAdapter<T, false>
|
|||
{
|
||||
size_t size = count > size_t(-1) / sizeof(unsigned int) ? size_t(-1) : count * sizeof(unsigned int);
|
||||
|
||||
data = static_cast<unsigned int*>(meshopt_Allocator::Storage::allocate(size));
|
||||
data = static_cast<unsigned int*>(meshopt_Allocator::storage().allocate(size));
|
||||
|
||||
if (input)
|
||||
{
|
||||
|
@ -975,7 +1049,7 @@ struct meshopt_IndexAdapter<T, false>
|
|||
result[i] = T(data[i]);
|
||||
}
|
||||
|
||||
meshopt_Allocator::Storage::deallocate(data);
|
||||
meshopt_Allocator::storage().deallocate(data);
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1197,13 +1271,21 @@ inline size_t meshopt_simplifyWithAttributes(T* destination, const T* indices, s
|
|||
return meshopt_simplifyWithAttributes(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifyWithUpdate(T* indices, size_t index_count, float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* result_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> inout(indices, indices, index_count);
|
||||
|
||||
return meshopt_simplifyWithUpdate(inout.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, vertex_attributes, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
inline size_t meshopt_simplifySloppy(T* destination, const T* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* result_error)
|
||||
{
|
||||
meshopt_IndexAdapter<T> in(NULL, indices, index_count);
|
||||
meshopt_IndexAdapter<T> out(destination, NULL, index_count);
|
||||
|
||||
return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, target_index_count, target_error, result_error);
|
||||
return meshopt_simplifySloppy(out.data, in.data, index_count, vertex_positions, vertex_count, vertex_positions_stride, NULL, target_index_count, target_error, result_error);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
|
|
14
thirdparty/meshoptimizer/overdrawoptimizer.cpp
vendored
14
thirdparty/meshoptimizer/overdrawoptimizer.cpp
vendored
|
@ -10,24 +10,24 @@
|
|||
namespace meshopt
|
||||
{
|
||||
|
||||
static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
|
||||
static void calculateSortData(float* sort_data, const unsigned int* indices, size_t index_count, const float* vertex_positions, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* clusters, size_t cluster_count)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
float mesh_centroid[3] = {};
|
||||
|
||||
for (size_t i = 0; i < index_count; ++i)
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
const float* p = vertex_positions + vertex_stride_float * indices[i];
|
||||
const float* p = vertex_positions + vertex_stride_float * i;
|
||||
|
||||
mesh_centroid[0] += p[0];
|
||||
mesh_centroid[1] += p[1];
|
||||
mesh_centroid[2] += p[2];
|
||||
}
|
||||
|
||||
mesh_centroid[0] /= index_count;
|
||||
mesh_centroid[1] /= index_count;
|
||||
mesh_centroid[2] /= index_count;
|
||||
mesh_centroid[0] /= float(vertex_count);
|
||||
mesh_centroid[1] /= float(vertex_count);
|
||||
mesh_centroid[2] /= float(vertex_count);
|
||||
|
||||
for (size_t cluster = 0; cluster < cluster_count; ++cluster)
|
||||
{
|
||||
|
@ -306,7 +306,7 @@ void meshopt_optimizeOverdraw(unsigned int* destination, const unsigned int* ind
|
|||
|
||||
// fill sort data
|
||||
float* sort_data = allocator.allocate<float>(cluster_count);
|
||||
calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_positions_stride, clusters, cluster_count);
|
||||
calculateSortData(sort_data, indices, index_count, vertex_positions, vertex_count, vertex_positions_stride, clusters, cluster_count);
|
||||
|
||||
// sort clusters using sort data
|
||||
unsigned short* sort_keys = allocator.allocate<unsigned short>(cluster_count);
|
||||
|
|
645
thirdparty/meshoptimizer/simplifier.cpp
vendored
645
thirdparty/meshoptimizer/simplifier.cpp
vendored
|
@ -27,6 +27,7 @@
|
|||
// Matthias Teschner, Bruno Heidelberger, Matthias Mueller, Danat Pomeranets, Markus Gross. Optimized Spatial Hashing for Collision Detection of Deformable Objects. 2003
|
||||
// Peter Van Sandt, Yannis Chronis, Jignesh M. Patel. Efficiently Searching In-Memory Sorted Arrays: Revenge of the Interpolation Search? 2019
|
||||
// Hugues Hoppe. New Quadric Metric for Simplifying Meshes with Appearance Attributes. 1999
|
||||
// Hugues Hoppe, Steve Marschner. Efficient Minimization of New Quadric Metric for Simplifying Meshes with Appearance Attributes. 2000
|
||||
namespace meshopt
|
||||
{
|
||||
|
||||
|
@ -316,11 +317,13 @@ const unsigned char kCanCollapse[Kind_Count][Kind_Count] = {
|
|||
// if a vertex is manifold or seam, adjoining edges are guaranteed to have an opposite edge
|
||||
// note that for seam edges, the opposite edge isn't present in the attribute-based topology
|
||||
// but is present if you consider a position-only mesh variant
|
||||
// while many complex collapses have the opposite edge, since complex vertices collapse to the
|
||||
// same wedge, keeping opposite edges separate improves the quality by considering both targets
|
||||
const unsigned char kHasOpposite[Kind_Count][Kind_Count] = {
|
||||
{1, 1, 1, 0, 1},
|
||||
{1, 1, 1, 1, 1},
|
||||
{1, 0, 1, 0, 0},
|
||||
{1, 1, 1, 0, 1},
|
||||
{0, 0, 0, 0, 0},
|
||||
{1, 0, 0, 0, 0},
|
||||
{1, 0, 1, 0, 0},
|
||||
};
|
||||
|
||||
|
@ -336,6 +339,25 @@ static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool hasEdge(const EdgeAdjacency& adjacency, unsigned int a, unsigned int b, const unsigned int* remap, const unsigned int* wedge)
|
||||
{
|
||||
unsigned int v = a;
|
||||
|
||||
do
|
||||
{
|
||||
unsigned int count = adjacency.offsets[v + 1] - adjacency.offsets[v];
|
||||
const EdgeAdjacency::Edge* edges = adjacency.data + adjacency.offsets[v];
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
if (remap[edges[i].next] == remap[b])
|
||||
return true;
|
||||
|
||||
v = wedge[v];
|
||||
} while (v != a);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned int* loopback, size_t vertex_count, const EdgeAdjacency& adjacency, const unsigned int* remap, const unsigned int* wedge, const unsigned char* vertex_lock, const unsigned int* sparse_remap, unsigned int options)
|
||||
{
|
||||
memset(loop, -1, vertex_count * sizeof(unsigned int));
|
||||
|
@ -394,6 +416,13 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
|
|||
{
|
||||
result[i] = Kind_Manifold;
|
||||
}
|
||||
else if (openi != ~0u && openo != ~0u && remap[openi] == remap[openo] && openi != i)
|
||||
{
|
||||
// classify half-seams as seams (the branch below would mis-classify them as borders)
|
||||
// half-seam is a single vertex that connects to both vertices of a potential seam
|
||||
// treating these as seams allows collapsing the "full" seam vertex onto them
|
||||
result[i] = Kind_Seam;
|
||||
}
|
||||
else if (openi != i && openo != i)
|
||||
{
|
||||
result[i] = Kind_Border;
|
||||
|
@ -446,15 +475,50 @@ static void classifyVertices(unsigned char* result, unsigned int* loop, unsigned
|
|||
}
|
||||
}
|
||||
|
||||
if (options & meshopt_SimplifyPermissive)
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
if (result[i] == Kind_Seam || result[i] == Kind_Locked)
|
||||
{
|
||||
if (remap[i] != i)
|
||||
{
|
||||
// only process primary vertices; wedges will be updated to match the primary vertex
|
||||
result[i] = result[remap[i]];
|
||||
continue;
|
||||
}
|
||||
|
||||
bool protect = false;
|
||||
|
||||
// vertex_lock may protect any wedge, not just the primary vertex, so we switch to complex only if no wedges are protected
|
||||
unsigned int v = unsigned(i);
|
||||
do
|
||||
{
|
||||
unsigned int rv = sparse_remap ? sparse_remap[v] : v;
|
||||
protect |= vertex_lock && (vertex_lock[rv] & meshopt_SimplifyVertex_Protect) != 0;
|
||||
v = wedge[v];
|
||||
} while (v != i);
|
||||
|
||||
// protect if any adjoining edge doesn't have an opposite edge (indicating vertex is on the border)
|
||||
do
|
||||
{
|
||||
const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[v]];
|
||||
size_t count = adjacency.offsets[v + 1] - adjacency.offsets[v];
|
||||
|
||||
for (size_t j = 0; j < count; ++j)
|
||||
protect |= !hasEdge(adjacency, edges[j].next, v, remap, wedge);
|
||||
v = wedge[v];
|
||||
} while (v != i);
|
||||
|
||||
result[i] = protect ? result[i] : int(Kind_Complex);
|
||||
}
|
||||
|
||||
if (vertex_lock)
|
||||
{
|
||||
// vertex_lock may lock any wedge, not just the primary vertex, so we need to lock the primary vertex and relock any wedges
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i);
|
||||
assert(vertex_lock[ri] <= 1); // values other than 0/1 are reserved for future use
|
||||
|
||||
if (vertex_lock[ri])
|
||||
if (vertex_lock[ri] & meshopt_SimplifyVertex_Lock)
|
||||
result[remap[i]] = Kind_Locked;
|
||||
}
|
||||
|
||||
|
@ -479,7 +543,7 @@ struct Vector3
|
|||
float x, y, z;
|
||||
};
|
||||
|
||||
static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL)
|
||||
static float rescalePositions(Vector3* result, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned int* sparse_remap = NULL, float* out_offset = NULL)
|
||||
{
|
||||
size_t vertex_stride_float = vertex_positions_stride / sizeof(float);
|
||||
|
||||
|
@ -525,6 +589,13 @@ static float rescalePositions(Vector3* result, const float* vertex_positions_dat
|
|||
}
|
||||
}
|
||||
|
||||
if (out_offset)
|
||||
{
|
||||
out_offset[0] = minv[0];
|
||||
out_offset[1] = minv[1];
|
||||
out_offset[2] = minv[2];
|
||||
}
|
||||
|
||||
return extent;
|
||||
}
|
||||
|
||||
|
@ -546,11 +617,45 @@ static void rescaleAttributes(float* result, const float* vertex_attributes_data
|
|||
}
|
||||
}
|
||||
|
||||
static void finalizeVertices(float* vertex_positions_data, size_t vertex_positions_stride, float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, size_t vertex_count, const Vector3* vertex_positions, const float* vertex_attributes, const unsigned int* sparse_remap, const unsigned int* attribute_remap, float vertex_scale, const float* vertex_offset, const unsigned char* vertex_update)
|
||||
{
|
||||
size_t vertex_positions_stride_float = vertex_positions_stride / sizeof(float);
|
||||
size_t vertex_attributes_stride_float = vertex_attributes_stride / sizeof(float);
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
if (!vertex_update[i])
|
||||
continue;
|
||||
|
||||
unsigned int ri = sparse_remap ? sparse_remap[i] : unsigned(i);
|
||||
|
||||
const Vector3& p = vertex_positions[i];
|
||||
float* v = vertex_positions_data + ri * vertex_positions_stride_float;
|
||||
|
||||
v[0] = p.x * vertex_scale + vertex_offset[0];
|
||||
v[1] = p.y * vertex_scale + vertex_offset[1];
|
||||
v[2] = p.z * vertex_scale + vertex_offset[2];
|
||||
|
||||
if (attribute_count)
|
||||
{
|
||||
const float* sa = vertex_attributes + i * attribute_count;
|
||||
float* va = vertex_attributes_data + ri * vertex_attributes_stride_float;
|
||||
|
||||
for (size_t k = 0; k < attribute_count; ++k)
|
||||
{
|
||||
unsigned int rk = attribute_remap[k];
|
||||
|
||||
va[rk] = sa[k] / attribute_weights[rk];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static const size_t kMaxAttributes = 32;
|
||||
|
||||
struct Quadric
|
||||
{
|
||||
// a00*x^2 + a11*y^2 + a22*z^2 + 2*(a10*xy + a20*xz + a21*yz) + b0*x + b1*y + b2*z + c
|
||||
// a00*x^2 + a11*y^2 + a22*z^2 + 2*a10*xy + 2*a20*xz + 2*a21*yz + 2*b0*x + 2*b1*y + 2*b2*z + c
|
||||
float a00, a11, a22;
|
||||
float a10, a20, a21;
|
||||
float b0, b1, b2, c;
|
||||
|
@ -612,6 +717,14 @@ static void quadricAdd(Quadric& Q, const Quadric& R)
|
|||
Q.w += R.w;
|
||||
}
|
||||
|
||||
static void quadricAdd(QuadricGrad& G, const QuadricGrad& R)
|
||||
{
|
||||
G.gx += R.gx;
|
||||
G.gy += R.gy;
|
||||
G.gz += R.gz;
|
||||
G.gw += R.gw;
|
||||
}
|
||||
|
||||
static void quadricAdd(QuadricGrad* G, const QuadricGrad* R, size_t attribute_count)
|
||||
{
|
||||
for (size_t k = 0; k < attribute_count; ++k)
|
||||
|
@ -694,6 +807,17 @@ static void quadricFromPlane(Quadric& Q, float a, float b, float c, float d, flo
|
|||
Q.w = w;
|
||||
}
|
||||
|
||||
static void quadricFromPoint(Quadric& Q, float x, float y, float z, float w)
|
||||
{
|
||||
Q.a00 = Q.a11 = Q.a22 = w;
|
||||
Q.a10 = Q.a20 = Q.a21 = 0;
|
||||
Q.b0 = -x * w;
|
||||
Q.b1 = -y * w;
|
||||
Q.b2 = -z * w;
|
||||
Q.c = (x * x + y * y + z * z) * w;
|
||||
Q.w = w;
|
||||
}
|
||||
|
||||
static void quadricFromTriangle(Quadric& Q, const Vector3& p0, const Vector3& p1, const Vector3& p2, float weight)
|
||||
{
|
||||
Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
|
||||
|
@ -814,7 +938,112 @@ static void quadricFromAttributes(Quadric& Q, QuadricGrad* G, const Vector3& p0,
|
|||
}
|
||||
}
|
||||
|
||||
static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
|
||||
static void quadricVolumeGradient(QuadricGrad& G, const Vector3& p0, const Vector3& p1, const Vector3& p2)
|
||||
{
|
||||
Vector3 p10 = {p1.x - p0.x, p1.y - p0.y, p1.z - p0.z};
|
||||
Vector3 p20 = {p2.x - p0.x, p2.y - p0.y, p2.z - p0.z};
|
||||
|
||||
// normal = cross(p1 - p0, p2 - p0)
|
||||
Vector3 normal = {p10.y * p20.z - p10.z * p20.y, p10.z * p20.x - p10.x * p20.z, p10.x * p20.y - p10.y * p20.x};
|
||||
float area = normalize(normal) * 0.5f;
|
||||
|
||||
G.gx = normal.x * area;
|
||||
G.gy = normal.y * area;
|
||||
G.gz = normal.z * area;
|
||||
G.gw = (-p0.x * normal.x - p0.y * normal.y - p0.z * normal.z) * area;
|
||||
}
|
||||
|
||||
static bool quadricSolve(Vector3& p, const Quadric& Q, const QuadricGrad& GV)
|
||||
{
|
||||
// solve A*p = -b where A is the quadric matrix and b is the linear term
|
||||
float a00 = Q.a00, a11 = Q.a11, a22 = Q.a22;
|
||||
float a10 = Q.a10, a20 = Q.a20, a21 = Q.a21;
|
||||
float x0 = -Q.b0, x1 = -Q.b1, x2 = -Q.b2;
|
||||
|
||||
float eps = 1e-6f * Q.w;
|
||||
|
||||
// LDL decomposition: A = LDL^T
|
||||
float d0 = a00;
|
||||
float l10 = a10 / d0;
|
||||
float l20 = a20 / d0;
|
||||
|
||||
float d1 = a11 - a10 * l10;
|
||||
float dl21 = a21 - a20 * l10;
|
||||
float l21 = dl21 / d1;
|
||||
|
||||
float d2 = a22 - a20 * l20 - dl21 * l21;
|
||||
|
||||
// solve L*y = x
|
||||
float y0 = x0;
|
||||
float y1 = x1 - l10 * y0;
|
||||
float y2 = x2 - l20 * y0 - l21 * y1;
|
||||
|
||||
// solve D*z = y
|
||||
float z0 = y0 / d0;
|
||||
float z1 = y1 / d1;
|
||||
float z2 = y2 / d2;
|
||||
|
||||
// augment system with linear constraint GV using Lagrange multiplier
|
||||
float a30 = GV.gx, a31 = GV.gy, a32 = GV.gz;
|
||||
float x3 = -GV.gw;
|
||||
|
||||
float l30 = a30 / d0;
|
||||
float dl31 = a31 - a30 * l10;
|
||||
float l31 = dl31 / d1;
|
||||
float dl32 = a32 - a30 * l20 - dl31 * l21;
|
||||
float l32 = dl32 / d2;
|
||||
float d3 = 0.f - a30 * l30 - dl31 * l31 - dl32 * l32;
|
||||
|
||||
float y3 = x3 - l30 * y0 - l31 * y1 - l32 * y2;
|
||||
float z3 = fabsf(d3) > eps ? y3 / d3 : 0.f; // if d3 is zero, we can ignore the constraint
|
||||
|
||||
// substitute L^T*p = z
|
||||
float lambda = z3;
|
||||
float pz = z2 - l32 * lambda;
|
||||
float py = z1 - l21 * pz - l31 * lambda;
|
||||
float px = z0 - l10 * py - l20 * pz - l30 * lambda;
|
||||
|
||||
p.x = px;
|
||||
p.y = py;
|
||||
p.z = pz;
|
||||
|
||||
return fabsf(d0) > eps && fabsf(d1) > eps && fabsf(d2) > eps;
|
||||
}
|
||||
|
||||
static void quadricReduceAttributes(Quadric& Q, const Quadric& A, const QuadricGrad* G, size_t attribute_count)
|
||||
{
|
||||
// update vertex quadric with attribute quadric; multiply by vertex weight to minimize normalized error
|
||||
Q.a00 += A.a00 * Q.w;
|
||||
Q.a11 += A.a11 * Q.w;
|
||||
Q.a22 += A.a22 * Q.w;
|
||||
Q.a10 += A.a10 * Q.w;
|
||||
Q.a20 += A.a20 * Q.w;
|
||||
Q.a21 += A.a21 * Q.w;
|
||||
Q.b0 += A.b0 * Q.w;
|
||||
Q.b1 += A.b1 * Q.w;
|
||||
Q.b2 += A.b2 * Q.w;
|
||||
|
||||
float iaw = A.w == 0 ? 0.f : Q.w / A.w;
|
||||
|
||||
// update linear system based on attribute gradients (BB^T/a)
|
||||
for (size_t k = 0; k < attribute_count; ++k)
|
||||
{
|
||||
const QuadricGrad& g = G[k];
|
||||
|
||||
Q.a00 -= (g.gx * g.gx) * iaw;
|
||||
Q.a11 -= (g.gy * g.gy) * iaw;
|
||||
Q.a22 -= (g.gz * g.gz) * iaw;
|
||||
Q.a10 -= (g.gx * g.gy) * iaw;
|
||||
Q.a20 -= (g.gx * g.gz) * iaw;
|
||||
Q.a21 -= (g.gy * g.gz) * iaw;
|
||||
|
||||
Q.b0 -= (g.gx * g.gw) * iaw;
|
||||
Q.b1 -= (g.gy * g.gw) * iaw;
|
||||
Q.b2 -= (g.gz * g.gw) * iaw;
|
||||
}
|
||||
}
|
||||
|
||||
static void fillFaceQuadrics(Quadric* vertex_quadrics, QuadricGrad* volume_gradients, const unsigned int* indices, size_t index_count, const Vector3* vertex_positions, const unsigned int* remap)
|
||||
{
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
|
@ -828,6 +1057,36 @@ static void fillFaceQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
|
|||
quadricAdd(vertex_quadrics[remap[i0]], Q);
|
||||
quadricAdd(vertex_quadrics[remap[i1]], Q);
|
||||
quadricAdd(vertex_quadrics[remap[i2]], Q);
|
||||
|
||||
if (volume_gradients)
|
||||
{
|
||||
QuadricGrad GV;
|
||||
quadricVolumeGradient(GV, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2]);
|
||||
|
||||
quadricAdd(volume_gradients[remap[i0]], GV);
|
||||
quadricAdd(volume_gradients[remap[i1]], GV);
|
||||
quadricAdd(volume_gradients[remap[i2]], GV);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void fillVertexQuadrics(Quadric* vertex_quadrics, const Vector3* vertex_positions, size_t vertex_count, const unsigned int* remap, unsigned int options)
|
||||
{
|
||||
// by default, we use a very small weight to improve triangulation and numerical stability without affecting the shape or error
|
||||
float factor = (options & meshopt_SimplifyRegularize) ? 1e-1f : 1e-7f;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
if (remap[i] != i)
|
||||
continue;
|
||||
|
||||
const Vector3& p = vertex_positions[i];
|
||||
float w = vertex_quadrics[i].w * factor;
|
||||
|
||||
Quadric Q;
|
||||
quadricFromPoint(Q, p.x, p.y, p.z, w);
|
||||
|
||||
quadricAdd(vertex_quadrics[i], Q);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -857,15 +1116,11 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
|
|||
if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
|
||||
continue;
|
||||
|
||||
// seam edges should occur twice (i0->i1 and i1->i0) - skip redundant edges
|
||||
if (kHasOpposite[k0][k1] && remap[i1] > remap[i0])
|
||||
continue;
|
||||
|
||||
unsigned int i2 = indices[i + next[e + 1]];
|
||||
|
||||
// we try hard to maintain border edge geometry; seam edges can move more freely
|
||||
// due to topological restrictions on collapses, seam quadrics slightly improves collapse structure but aren't critical
|
||||
const float kEdgeWeightSeam = 1.f;
|
||||
const float kEdgeWeightSeam = 0.5f; // applied twice due to opposite edges
|
||||
const float kEdgeWeightBorder = 10.f;
|
||||
|
||||
float edgeWeight = (k0 == Kind_Border || k1 == Kind_Border) ? kEdgeWeightBorder : kEdgeWeightSeam;
|
||||
|
@ -873,6 +1128,13 @@ static void fillEdgeQuadrics(Quadric* vertex_quadrics, const unsigned int* indic
|
|||
Quadric Q;
|
||||
quadricFromTriangleEdge(Q, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight);
|
||||
|
||||
Quadric QT;
|
||||
quadricFromTriangle(QT, vertex_positions[i0], vertex_positions[i1], vertex_positions[i2], edgeWeight);
|
||||
|
||||
// mix edge quadric with triangle quadric to stabilize collapses in both directions; both quadrics inherit edge weight so that their error is added
|
||||
QT.w = 0;
|
||||
quadricAdd(Q, QT);
|
||||
|
||||
quadricAdd(vertex_quadrics[remap[i0]], Q);
|
||||
quadricAdd(vertex_quadrics[remap[i1]], Q);
|
||||
}
|
||||
|
@ -954,6 +1216,50 @@ static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vert
|
|||
return false;
|
||||
}
|
||||
|
||||
static bool hasTriangleFlips(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, unsigned int i0, const Vector3& v1)
|
||||
{
|
||||
const Vector3& v0 = vertex_positions[i0];
|
||||
|
||||
const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]];
|
||||
size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0];
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int a = edges[i].next, b = edges[i].prev;
|
||||
|
||||
if (hasTriangleFlip(vertex_positions[a], vertex_positions[b], v0, v1))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static float getNeighborhoodRadius(const EdgeAdjacency& adjacency, const Vector3* vertex_positions, unsigned int i0)
|
||||
{
|
||||
const Vector3& v0 = vertex_positions[i0];
|
||||
|
||||
const EdgeAdjacency::Edge* edges = &adjacency.data[adjacency.offsets[i0]];
|
||||
size_t count = adjacency.offsets[i0 + 1] - adjacency.offsets[i0];
|
||||
|
||||
float result = 0.f;
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
unsigned int a = edges[i].next, b = edges[i].prev;
|
||||
|
||||
const Vector3& va = vertex_positions[a];
|
||||
const Vector3& vb = vertex_positions[b];
|
||||
|
||||
float da = (va.x - v0.x) * (va.x - v0.x) + (va.y - v0.y) * (va.y - v0.y) + (va.z - v0.z) * (va.z - v0.z);
|
||||
float db = (vb.x - v0.x) * (vb.x - v0.x) + (vb.y - v0.y) * (vb.y - v0.y) + (vb.z - v0.z) * (vb.z - v0.z);
|
||||
|
||||
result = result < da ? da : result;
|
||||
result = result < db ? db : result;
|
||||
}
|
||||
|
||||
return sqrtf(result);
|
||||
}
|
||||
|
||||
static size_t boundEdgeCollapses(const EdgeAdjacency& adjacency, size_t vertex_count, size_t index_count, unsigned char* vertex_kind)
|
||||
{
|
||||
size_t dual_count = 0;
|
||||
|
@ -1008,19 +1314,11 @@ static size_t pickEdgeCollapses(Collapse* collapses, size_t collapse_capacity, c
|
|||
|
||||
// two vertices are on a border or a seam, but there's no direct edge between them
|
||||
// this indicates that they belong to two different edge loops and we should not collapse this edge
|
||||
// loop[] tracks half edges so we only need to check i0->i1
|
||||
if (k0 == k1 && (k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
|
||||
continue;
|
||||
|
||||
if (k0 == Kind_Locked || k1 == Kind_Locked)
|
||||
{
|
||||
// the same check as above, but for border/seam -> locked collapses
|
||||
// loop[] and loopback[] track half edges so we only need to check one of them
|
||||
if ((k0 == Kind_Border || k0 == Kind_Seam) && loop[i0] != i1)
|
||||
if ((k0 == Kind_Border || k0 == Kind_Seam) && k1 != Kind_Manifold && loop[i0] != i1)
|
||||
continue;
|
||||
if ((k1 == Kind_Border || k1 == Kind_Seam) && loopback[i1] != i0)
|
||||
if ((k1 == Kind_Border || k1 == Kind_Seam) && k0 != Kind_Manifold && loopback[i1] != i0)
|
||||
continue;
|
||||
}
|
||||
|
||||
// edge can be collapsed in either direction - we will pick the one with minimum error
|
||||
// note: we evaluate error later during collapse ranking, here we just tag the edge as bidirectional
|
||||
|
@ -1052,14 +1350,10 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
|
|||
|
||||
unsigned int i0 = c.v0;
|
||||
unsigned int i1 = c.v1;
|
||||
|
||||
// most edges are bidirectional which means we need to evaluate errors for two collapses
|
||||
// to keep this code branchless we just use the same edge for unidirectional edges
|
||||
unsigned int j0 = c.bidi ? i1 : i0;
|
||||
unsigned int j1 = c.bidi ? i0 : i1;
|
||||
bool bidi = c.bidi;
|
||||
|
||||
float ei = quadricError(vertex_quadrics[remap[i0]], vertex_positions[i1]);
|
||||
float ej = c.bidi ? quadricError(vertex_quadrics[remap[j0]], vertex_positions[j1]) : FLT_MAX;
|
||||
float ej = bidi ? quadricError(vertex_quadrics[remap[i1]], vertex_positions[i0]) : FLT_MAX;
|
||||
|
||||
#if TRACE >= 3
|
||||
float di = ei, dj = ej;
|
||||
|
@ -1068,39 +1362,53 @@ static void rankEdgeCollapses(Collapse* collapses, size_t collapse_count, const
|
|||
if (attribute_count)
|
||||
{
|
||||
ei += quadricError(attribute_quadrics[i0], &attribute_gradients[i0 * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]);
|
||||
ej += c.bidi ? quadricError(attribute_quadrics[j0], &attribute_gradients[j0 * attribute_count], attribute_count, vertex_positions[j1], &vertex_attributes[j1 * attribute_count]) : 0;
|
||||
ej += bidi ? quadricError(attribute_quadrics[i1], &attribute_gradients[i1 * attribute_count], attribute_count, vertex_positions[i0], &vertex_attributes[i0 * attribute_count]) : 0;
|
||||
|
||||
// note: seam edges need to aggregate attribute errors between primary and secondary edges, as attribute quadrics are separate
|
||||
// seam edges need to aggregate attribute errors between primary and secondary edges, as attribute quadrics are separate
|
||||
if (vertex_kind[i0] == Kind_Seam)
|
||||
{
|
||||
// for seam collapses we need to find the seam pair; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges)
|
||||
unsigned int s0 = wedge[i0];
|
||||
unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0];
|
||||
|
||||
assert(s0 != i0 && wedge[s0] == i0);
|
||||
assert(wedge[s0] == i0); // s0 may be equal to i0 for half-seams
|
||||
assert(s1 != ~0u && remap[s1] == remap[i1]);
|
||||
|
||||
// note: this should never happen due to the assertion above, but when disabled if we ever hit this case we'll get a memory safety issue; for now play it safe
|
||||
s1 = (s1 != ~0u) ? s1 : wedge[i1];
|
||||
|
||||
ei += quadricError(attribute_quadrics[s0], &attribute_gradients[s0 * attribute_count], attribute_count, vertex_positions[s1], &vertex_attributes[s1 * attribute_count]);
|
||||
ej += c.bidi ? quadricError(attribute_quadrics[s1], &attribute_gradients[s1 * attribute_count], attribute_count, vertex_positions[s0], &vertex_attributes[s0 * attribute_count]) : 0;
|
||||
ej += bidi ? quadricError(attribute_quadrics[s1], &attribute_gradients[s1 * attribute_count], attribute_count, vertex_positions[s0], &vertex_attributes[s0 * attribute_count]) : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// complex edges can have multiple wedges, so we need to aggregate errors for all wedges
|
||||
// this is different from seams (where we aggregate pairwise) because all wedges collapse onto the same target
|
||||
if (vertex_kind[i0] == Kind_Complex)
|
||||
for (unsigned int v = wedge[i0]; v != i0; v = wedge[v])
|
||||
ei += quadricError(attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count, vertex_positions[i1], &vertex_attributes[i1 * attribute_count]);
|
||||
|
||||
if (vertex_kind[i1] == Kind_Complex && bidi)
|
||||
for (unsigned int v = wedge[i1]; v != i1; v = wedge[v])
|
||||
ej += quadricError(attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count, vertex_positions[i0], &vertex_attributes[i0 * attribute_count]);
|
||||
}
|
||||
}
|
||||
|
||||
// pick edge direction with minimal error
|
||||
c.v0 = ei <= ej ? i0 : j0;
|
||||
c.v1 = ei <= ej ? i1 : j1;
|
||||
c.error = ei <= ej ? ei : ej;
|
||||
// pick edge direction with minimal error (branchless)
|
||||
bool rev = bidi & (ej < ei);
|
||||
|
||||
c.v0 = rev ? i1 : i0;
|
||||
c.v1 = rev ? i0 : i1;
|
||||
c.error = ej < ei ? ej : ei;
|
||||
|
||||
#if TRACE >= 3
|
||||
if (i0 == j0) // c.bidi has been overwritten
|
||||
printf("edge eval %d -> %d: error %f (pos %f, attr %f)\n", c.v0, c.v1,
|
||||
sqrtf(c.error), sqrtf(ei <= ej ? di : dj), sqrtf(ei <= ej ? ei - di : ej - dj));
|
||||
if (bidi)
|
||||
printf("edge eval %d -> %d: error %f (pos %f, attr %f); reverse %f (pos %f, attr %f)\n",
|
||||
rev ? i1 : i0, rev ? i0 : i1,
|
||||
sqrtf(rev ? ej : ei), sqrtf(rev ? dj : di), sqrtf(rev ? ej - dj : ei - di),
|
||||
sqrtf(rev ? ei : ej), sqrtf(rev ? di : dj), sqrtf(rev ? ei - di : ej - dj));
|
||||
else
|
||||
printf("edge eval %d -> %d: error %f (pos %f, attr %f); reverse %f (pos %f, attr %f)\n", c.v0, c.v1,
|
||||
sqrtf(ei <= ej ? ei : ej), sqrtf(ei <= ej ? di : dj), sqrtf(ei <= ej ? ei - di : ej - dj),
|
||||
sqrtf(ei <= ej ? ej : ei), sqrtf(ei <= ej ? dj : di), sqrtf(ei <= ej ? ej - dj : ei - di));
|
||||
printf("edge eval %d -> %d: error %f (pos %f, attr %f)\n", i0, i1, sqrtf(c.error), sqrtf(di), sqrtf(ei - di));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
@ -1243,7 +1551,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
|
|||
// for seam collapses we need to move the seam pair together; this is a bit tricky since we need to rely on edge loops as target vertex may be locked (and thus have more than two wedges)
|
||||
unsigned int s0 = wedge[i0];
|
||||
unsigned int s1 = loop[i0] == i1 ? loopback[s0] : loop[s0];
|
||||
assert(s0 != i0 && wedge[s0] == i0);
|
||||
assert(wedge[s0] == i0); // s0 may be equal to i0 for half-seams
|
||||
assert(s1 != ~0u && remap[s1] == r1);
|
||||
|
||||
// additional asserts to verify that the seam pair is consistent
|
||||
|
@ -1289,7 +1597,7 @@ static size_t performEdgeCollapses(unsigned int* collapse_remap, unsigned char*
|
|||
return edge_collapses;
|
||||
}
|
||||
|
||||
static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_count, Quadric* vertex_quadrics, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Vector3* vertex_positions, const unsigned int* remap, float& vertex_error)
|
||||
static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_count, Quadric* vertex_quadrics, QuadricGrad* volume_gradients, Quadric* attribute_quadrics, QuadricGrad* attribute_gradients, size_t attribute_count, const Vector3* vertex_positions, const unsigned int* remap, float& vertex_error)
|
||||
{
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
|
@ -1304,8 +1612,13 @@ static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_cou
|
|||
|
||||
// ensure we only update vertex_quadrics once: primary vertex must be moved if any wedge is moved
|
||||
if (i0 == r0)
|
||||
{
|
||||
quadricAdd(vertex_quadrics[r1], vertex_quadrics[r0]);
|
||||
|
||||
if (volume_gradients)
|
||||
quadricAdd(volume_gradients[r1], volume_gradients[r0]);
|
||||
}
|
||||
|
||||
if (attribute_count)
|
||||
{
|
||||
quadricAdd(attribute_quadrics[i1], attribute_quadrics[i0]);
|
||||
|
@ -1321,7 +1634,116 @@ static void updateQuadrics(const unsigned int* collapse_remap, size_t vertex_cou
|
|||
}
|
||||
}
|
||||
|
||||
static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap)
|
||||
static void solveQuadrics(Vector3* vertex_positions, float* vertex_attributes, size_t vertex_count, const Quadric* vertex_quadrics, const QuadricGrad* volume_gradients, const Quadric* attribute_quadrics, const QuadricGrad* attribute_gradients, size_t attribute_count, const unsigned int* remap, const unsigned int* wedge, const EdgeAdjacency& adjacency, const unsigned char* vertex_kind, const unsigned char* vertex_update)
|
||||
{
|
||||
#if TRACE
|
||||
size_t stats[5] = {};
|
||||
#endif
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
if (!vertex_update[i])
|
||||
continue;
|
||||
|
||||
// moving externally locked vertices is prohibited
|
||||
// moving vertices on an attribute discontinuity may result in extrapolating UV outside of the chart bounds
|
||||
// moving vertices on a border requires a stronger edge quadric to preserve the border geometry
|
||||
if (vertex_kind[i] == Kind_Locked || vertex_kind[i] == Kind_Seam || vertex_kind[i] == Kind_Border)
|
||||
continue;
|
||||
|
||||
if (remap[i] != i)
|
||||
{
|
||||
vertex_positions[i] = vertex_positions[remap[i]];
|
||||
continue;
|
||||
}
|
||||
|
||||
TRACESTATS(0);
|
||||
|
||||
const Vector3& vp = vertex_positions[i];
|
||||
|
||||
Quadric Q = vertex_quadrics[i];
|
||||
QuadricGrad GV = {};
|
||||
|
||||
// add a point quadric for regularization to stabilize the solution
|
||||
Quadric R;
|
||||
quadricFromPoint(R, vp.x, vp.y, vp.z, Q.w * 1e-4f);
|
||||
quadricAdd(Q, R);
|
||||
|
||||
if (attribute_count)
|
||||
{
|
||||
// optimal point simultaneously minimizes attribute quadrics for all wedges
|
||||
unsigned int v = unsigned(i);
|
||||
do
|
||||
{
|
||||
quadricReduceAttributes(Q, attribute_quadrics[v], &attribute_gradients[v * attribute_count], attribute_count);
|
||||
v = wedge[v];
|
||||
} while (v != i);
|
||||
|
||||
// minimizing attribute quadrics results in volume loss so we incorporate volume gradient as a constraint
|
||||
if (volume_gradients)
|
||||
GV = volume_gradients[i];
|
||||
}
|
||||
|
||||
Vector3 p;
|
||||
if (!quadricSolve(p, Q, GV))
|
||||
{
|
||||
TRACESTATS(2);
|
||||
continue;
|
||||
}
|
||||
|
||||
// reject updates that move the vertex too far from its neighborhood
|
||||
// this detects and fixes most cases when the quadric is not well-defined
|
||||
float nr = getNeighborhoodRadius(adjacency, vertex_positions, unsigned(i));
|
||||
float dp = (p.x - vp.x) * (p.x - vp.x) + (p.y - vp.y) * (p.y - vp.y) + (p.z - vp.z) * (p.z - vp.z);
|
||||
|
||||
if (dp > nr * nr)
|
||||
{
|
||||
TRACESTATS(3);
|
||||
continue;
|
||||
}
|
||||
|
||||
// reject updates that would flip a neighboring triangle, as we do for edge collapse
|
||||
if (hasTriangleFlips(adjacency, vertex_positions, unsigned(i), p))
|
||||
{
|
||||
TRACESTATS(4);
|
||||
continue;
|
||||
}
|
||||
|
||||
TRACESTATS(1);
|
||||
vertex_positions[i] = p;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
printf("updated %d/%d positions; failed solve %d bounds %d flip %d\n", int(stats[1]), int(stats[0]), int(stats[2]), int(stats[3]), int(stats[4]));
|
||||
#endif
|
||||
|
||||
if (attribute_count == 0)
|
||||
return;
|
||||
|
||||
for (size_t i = 0; i < vertex_count; ++i)
|
||||
{
|
||||
if (!vertex_update[i])
|
||||
continue;
|
||||
|
||||
// updating externally locked vertices is prohibited
|
||||
if (vertex_kind[i] == Kind_Locked)
|
||||
continue;
|
||||
|
||||
const Vector3& p = vertex_positions[remap[i]];
|
||||
const Quadric& A = attribute_quadrics[i];
|
||||
|
||||
float iw = A.w == 0 ? 0.f : 1.f / A.w;
|
||||
|
||||
for (size_t k = 0; k < attribute_count; ++k)
|
||||
{
|
||||
const QuadricGrad& G = attribute_gradients[i * attribute_count + k];
|
||||
|
||||
vertex_attributes[i * attribute_count + k] = (G.gx * p.x + G.gy * p.y + G.gz * p.z + G.gw) * iw;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const unsigned int* collapse_remap, const unsigned int* remap)
|
||||
{
|
||||
size_t write = 0;
|
||||
|
||||
|
@ -1336,7 +1758,14 @@ static size_t remapIndexBuffer(unsigned int* indices, size_t index_count, const
|
|||
assert(collapse_remap[v1] == v1);
|
||||
assert(collapse_remap[v2] == v2);
|
||||
|
||||
if (v0 != v1 && v0 != v2 && v1 != v2)
|
||||
// collapse zero area triangles even if they are not topologically degenerate
|
||||
// this is required to cleanup manifold->seam collapses when a vertex is collapsed onto a seam pair
|
||||
// as well as complex collapses and some other cases where cross wedge collapses are performed
|
||||
unsigned int r0 = remap[v0];
|
||||
unsigned int r1 = remap[v1];
|
||||
unsigned int r2 = remap[v2];
|
||||
|
||||
if (r0 != r1 && r0 != r2 && r1 != r2)
|
||||
{
|
||||
indices[write + 0] = v0;
|
||||
indices[write + 1] = v1;
|
||||
|
@ -1494,18 +1923,24 @@ static void measureComponents(float* component_errors, size_t component_count, c
|
|||
|
||||
static size_t pruneComponents(unsigned int* indices, size_t index_count, const unsigned int* components, const float* component_errors, size_t component_count, float error_cutoff, float& nexterror)
|
||||
{
|
||||
(void)component_count;
|
||||
|
||||
size_t write = 0;
|
||||
float min_error = FLT_MAX;
|
||||
|
||||
for (size_t i = 0; i < index_count; i += 3)
|
||||
{
|
||||
unsigned int c = components[indices[i]];
|
||||
assert(c == components[indices[i + 1]] && c == components[indices[i + 2]]);
|
||||
unsigned int v0 = indices[i + 0], v1 = indices[i + 1], v2 = indices[i + 2];
|
||||
unsigned int c = components[v0];
|
||||
assert(c == components[v1] && c == components[v2]);
|
||||
|
||||
if (component_errors[c] > error_cutoff)
|
||||
{
|
||||
indices[write + 0] = indices[i + 0];
|
||||
indices[write + 1] = indices[i + 1];
|
||||
indices[write + 2] = indices[i + 2];
|
||||
min_error = min_error > component_errors[c] ? component_errors[c] : min_error;
|
||||
|
||||
indices[write + 0] = v0;
|
||||
indices[write + 1] = v1;
|
||||
indices[write + 2] = v2;
|
||||
write += 3;
|
||||
}
|
||||
}
|
||||
|
@ -1515,15 +1950,11 @@ static size_t pruneComponents(unsigned int* indices, size_t index_count, const u
|
|||
for (size_t i = 0; i < component_count; ++i)
|
||||
pruned_components += (component_errors[i] >= nexterror && component_errors[i] <= error_cutoff);
|
||||
|
||||
printf("pruned %d triangles in %d components (goal %e)\n", int((index_count - write) / 3), int(pruned_components), sqrtf(error_cutoff));
|
||||
printf("pruned %d triangles in %d components (goal %e); next %e\n", int((index_count - write) / 3), int(pruned_components), sqrtf(error_cutoff), min_error < FLT_MAX ? sqrtf(min_error) : min_error * 2);
|
||||
#endif
|
||||
|
||||
// update next error with the smallest error of the remaining components for future pruning
|
||||
nexterror = FLT_MAX;
|
||||
for (size_t i = 0; i < component_count; ++i)
|
||||
if (component_errors[i] > error_cutoff)
|
||||
nexterror = nexterror > component_errors[i] ? component_errors[i] : nexterror;
|
||||
|
||||
// update next error with the smallest error of the remaining components
|
||||
nexterror = min_error;
|
||||
return write;
|
||||
}
|
||||
|
||||
|
@ -1588,7 +2019,7 @@ struct TriangleHasher
|
|||
}
|
||||
};
|
||||
|
||||
static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, size_t vertex_count, int grid_size)
|
||||
static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_positions, const unsigned char* vertex_lock, size_t vertex_count, int grid_size)
|
||||
{
|
||||
assert(grid_size >= 1 && grid_size <= 1024);
|
||||
float cell_scale = float(grid_size - 1);
|
||||
|
@ -1601,6 +2032,9 @@ static void computeVertexIds(unsigned int* vertex_ids, const Vector3* vertex_pos
|
|||
int yi = int(v.y * cell_scale + 0.5f);
|
||||
int zi = int(v.z * cell_scale + 0.5f);
|
||||
|
||||
if (vertex_lock && (vertex_lock[i] & meshopt_SimplifyVertex_Lock))
|
||||
vertex_ids[i] = (1 << 30) | unsigned(i);
|
||||
else
|
||||
vertex_ids[i] = (xi << 20) | (yi << 10) | zi;
|
||||
}
|
||||
}
|
||||
|
@ -1835,9 +2269,10 @@ static float interpolate(float y, float x0, float y0, float x1, float y1, float
|
|||
|
||||
} // namespace meshopt
|
||||
|
||||
// Note: this is only exposed for debug visualization purposes; do *not* use
|
||||
// Note: this is only exposed for development purposes; do *not* use
|
||||
enum
|
||||
{
|
||||
meshopt_SimplifyInternalSolve = 1 << 29,
|
||||
meshopt_SimplifyInternalDebug = 1 << 30
|
||||
};
|
||||
|
||||
|
@ -1850,7 +2285,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
assert(vertex_positions_stride % sizeof(float) == 0);
|
||||
assert(target_index_count <= index_count);
|
||||
assert(target_error >= 0);
|
||||
assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute | meshopt_SimplifyPrune | meshopt_SimplifyInternalDebug)) == 0);
|
||||
assert((options & ~(meshopt_SimplifyLockBorder | meshopt_SimplifySparse | meshopt_SimplifyErrorAbsolute | meshopt_SimplifyPrune | meshopt_SimplifyRegularize | meshopt_SimplifyPermissive | meshopt_SimplifyInternalSolve | meshopt_SimplifyInternalDebug)) == 0);
|
||||
assert(vertex_attributes_stride >= attribute_count * sizeof(float) && vertex_attributes_stride <= 256);
|
||||
assert(vertex_attributes_stride % sizeof(float) == 0);
|
||||
assert(attribute_count <= kMaxAttributes);
|
||||
|
@ -1902,14 +2337,14 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
#endif
|
||||
|
||||
Vector3* vertex_positions = allocator.allocate<Vector3>(vertex_count);
|
||||
float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap);
|
||||
float vertex_offset[3] = {};
|
||||
float vertex_scale = rescalePositions(vertex_positions, vertex_positions_data, vertex_count, vertex_positions_stride, sparse_remap, vertex_offset);
|
||||
|
||||
float* vertex_attributes = NULL;
|
||||
unsigned int attribute_remap[kMaxAttributes];
|
||||
|
||||
if (attribute_count)
|
||||
{
|
||||
unsigned int attribute_remap[kMaxAttributes];
|
||||
|
||||
// remap attributes to only include ones with weight > 0 to minimize memory/compute overhead for quadrics
|
||||
size_t attributes_used = 0;
|
||||
for (size_t i = 0; i < attribute_count; ++i)
|
||||
|
@ -1926,6 +2361,7 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
|
||||
Quadric* attribute_quadrics = NULL;
|
||||
QuadricGrad* attribute_gradients = NULL;
|
||||
QuadricGrad* volume_gradients = NULL;
|
||||
|
||||
if (attribute_count)
|
||||
{
|
||||
|
@ -1934,9 +2370,16 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
|
||||
attribute_gradients = allocator.allocate<QuadricGrad>(vertex_count * attribute_count);
|
||||
memset(attribute_gradients, 0, vertex_count * attribute_count * sizeof(QuadricGrad));
|
||||
|
||||
if (options & meshopt_SimplifyInternalSolve)
|
||||
{
|
||||
volume_gradients = allocator.allocate<QuadricGrad>(vertex_count);
|
||||
memset(volume_gradients, 0, vertex_count * sizeof(QuadricGrad));
|
||||
}
|
||||
}
|
||||
|
||||
fillFaceQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap);
|
||||
fillFaceQuadrics(vertex_quadrics, volume_gradients, result, index_count, vertex_positions, remap);
|
||||
fillVertexQuadrics(vertex_quadrics, vertex_positions, vertex_count, remap, options);
|
||||
fillEdgeQuadrics(vertex_quadrics, result, index_count, vertex_positions, remap, vertex_kind, loop, loopback);
|
||||
|
||||
if (attribute_count)
|
||||
|
@ -2016,23 +2459,26 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
if (collapses == 0)
|
||||
break;
|
||||
|
||||
updateQuadrics(collapse_remap, vertex_count, vertex_quadrics, attribute_quadrics, attribute_gradients, attribute_count, vertex_positions, remap, vertex_error);
|
||||
updateQuadrics(collapse_remap, vertex_count, vertex_quadrics, volume_gradients, attribute_quadrics, attribute_gradients, attribute_count, vertex_positions, remap, vertex_error);
|
||||
|
||||
// updateQuadrics will update vertex error if we use attributes, but if we don't then result_error and vertex_error are equivalent
|
||||
vertex_error = attribute_count == 0 ? result_error : vertex_error;
|
||||
|
||||
// note: we update loops following edge collapses, but after this we might still have stale loop data
|
||||
// this can happen when a triangle with a loop edge gets collapsed along a non-loop edge
|
||||
// that works since a loop that points to a vertex that is no longer connected is not affecting collapse logic
|
||||
remapEdgeLoops(loop, vertex_count, collapse_remap);
|
||||
remapEdgeLoops(loopback, vertex_count, collapse_remap);
|
||||
|
||||
size_t new_count = remapIndexBuffer(result, result_count, collapse_remap);
|
||||
assert(new_count < result_count);
|
||||
|
||||
result_count = new_count;
|
||||
result_count = remapIndexBuffer(result, result_count, collapse_remap, remap);
|
||||
|
||||
if ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= vertex_error)
|
||||
result_count = pruneComponents(result, result_count, components, component_errors, component_count, vertex_error, component_nexterror);
|
||||
}
|
||||
|
||||
// at this point, component_nexterror might be stale: component it references may have been removed through a series of edge collapses
|
||||
bool component_nextstale = true;
|
||||
|
||||
// we're done with the regular simplification but we're still short of the target; try pruning more aggressively towards error_limit
|
||||
while ((options & meshopt_SimplifyPrune) && result_count > target_index_count && component_nexterror <= error_limit)
|
||||
{
|
||||
|
@ -2049,18 +2495,42 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
component_maxerror = component_errors[i];
|
||||
|
||||
size_t new_count = pruneComponents(result, result_count, components, component_errors, component_count, component_cutoff, component_nexterror);
|
||||
if (new_count == result_count)
|
||||
if (new_count == result_count && !component_nextstale)
|
||||
break;
|
||||
|
||||
component_nextstale = false; // pruneComponents guarantees next error is up to date
|
||||
result_count = new_count;
|
||||
result_error = result_error < component_maxerror ? component_maxerror : result_error;
|
||||
vertex_error = vertex_error < component_maxerror ? component_maxerror : vertex_error;
|
||||
}
|
||||
|
||||
#if TRACE
|
||||
printf("result: %d triangles, error: %e; total %d passes\n", int(result_count / 3), sqrtf(result_error), int(pass_count));
|
||||
printf("result: %d triangles, error: %e (pos %.3e); total %d passes\n", int(result_count / 3), sqrtf(result_error), sqrtf(vertex_error), int(pass_count));
|
||||
#endif
|
||||
|
||||
// if solve is requested, update input buffers destructively from internal data
|
||||
if (options & meshopt_SimplifyInternalSolve)
|
||||
{
|
||||
unsigned char* vertex_update = collapse_locked; // reuse as scratch space
|
||||
memset(vertex_update, 0, vertex_count);
|
||||
|
||||
// limit quadric solve to vertices that are still used in the result
|
||||
for (size_t i = 0; i < result_count; ++i)
|
||||
{
|
||||
unsigned int v = result[i];
|
||||
|
||||
// recomputing externally locked vertices may result in floating point drift
|
||||
vertex_update[v] = vertex_kind[v] != Kind_Locked;
|
||||
}
|
||||
|
||||
// edge adjacency may be stale as we haven't updated it after last series of edge collapses
|
||||
updateEdgeAdjacency(adjacency, result, result_count, vertex_count, remap);
|
||||
|
||||
solveQuadrics(vertex_positions, vertex_attributes, vertex_count, vertex_quadrics, volume_gradients, attribute_quadrics, attribute_gradients, attribute_count, remap, wedge, adjacency, vertex_kind, vertex_update);
|
||||
|
||||
finalizeVertices(const_cast<float*>(vertex_positions_data), vertex_positions_stride, const_cast<float*>(vertex_attributes_data), vertex_attributes_stride, attribute_weights, attribute_count, vertex_count, vertex_positions, vertex_attributes, sparse_remap, attribute_remap, vertex_scale, vertex_offset, vertex_update);
|
||||
}
|
||||
|
||||
// if debug visualization data is requested, fill it instead of index data; for simplicity, this doesn't work with sparsity
|
||||
if ((options & meshopt_SimplifyInternalDebug) && !sparse_remap)
|
||||
{
|
||||
|
@ -2090,15 +2560,24 @@ size_t meshopt_simplifyEdge(unsigned int* destination, const unsigned int* indic
|
|||
|
||||
size_t meshopt_simplify(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
|
||||
{
|
||||
assert((options & meshopt_SimplifyInternalSolve) == 0); // use meshopt_simplifyWithUpdate instead
|
||||
|
||||
return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, NULL, 0, NULL, 0, NULL, target_index_count, target_error, options, out_result_error);
|
||||
}
|
||||
|
||||
size_t meshopt_simplifyWithAttributes(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
|
||||
{
|
||||
assert((options & meshopt_SimplifyInternalSolve) == 0); // use meshopt_simplifyWithUpdate instead
|
||||
|
||||
return meshopt_simplifyEdge(destination, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options, out_result_error);
|
||||
}
|
||||
|
||||
size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, size_t target_index_count, float target_error, float* out_result_error)
|
||||
size_t meshopt_simplifyWithUpdate(unsigned int* indices, size_t index_count, float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, float* vertex_attributes_data, size_t vertex_attributes_stride, const float* attribute_weights, size_t attribute_count, const unsigned char* vertex_lock, size_t target_index_count, float target_error, unsigned int options, float* out_result_error)
|
||||
{
|
||||
return meshopt_simplifyEdge(indices, indices, index_count, vertex_positions_data, vertex_count, vertex_positions_stride, vertex_attributes_data, vertex_attributes_stride, attribute_weights, attribute_count, vertex_lock, target_index_count, target_error, options | meshopt_SimplifyInternalSolve, out_result_error);
|
||||
}
|
||||
|
||||
size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* indices, size_t index_count, const float* vertex_positions_data, size_t vertex_count, size_t vertex_positions_stride, const unsigned char* vertex_lock, size_t target_index_count, float target_error, float* out_result_error)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
|
@ -2126,15 +2605,15 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
|
|||
const int kInterpolationPasses = 5;
|
||||
|
||||
// invariant: # of triangles in min_grid <= target_count
|
||||
int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : target_error));
|
||||
int min_grid = int(1.f / (target_error < 1e-3f ? 1e-3f : (target_error < 1.f ? target_error : 1.f)));
|
||||
int max_grid = 1025;
|
||||
size_t min_triangles = 0;
|
||||
size_t max_triangles = index_count / 3;
|
||||
|
||||
// when we're error-limited, we compute the triangle count for the min. size; this accelerates convergence and provides the correct answer when we can't use a larger grid
|
||||
if (min_grid > 1)
|
||||
if (min_grid > 1 || vertex_lock)
|
||||
{
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, min_grid);
|
||||
min_triangles = countTriangles(vertex_ids, indices, index_count);
|
||||
}
|
||||
|
||||
|
@ -2150,7 +2629,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
|
|||
int grid_size = next_grid_size;
|
||||
grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size);
|
||||
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, grid_size);
|
||||
size_t triangles = countTriangles(vertex_ids, indices, index_count);
|
||||
|
||||
#if TRACE
|
||||
|
@ -2192,7 +2671,7 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
|
|||
|
||||
unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
|
||||
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_lock, vertex_count, min_grid);
|
||||
size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
|
||||
|
||||
// build a quadric for each target cell
|
||||
|
@ -2213,15 +2692,15 @@ size_t meshopt_simplifySloppy(unsigned int* destination, const unsigned int* ind
|
|||
for (size_t i = 0; i < cell_count; ++i)
|
||||
result_error = result_error < cell_errors[i] ? cell_errors[i] : result_error;
|
||||
|
||||
// collapse triangles!
|
||||
// note that we need to filter out triangles that we've already output because we very frequently generate redundant triangles between cells :(
|
||||
// vertex collapses often result in duplicate triangles; we need a table to filter them out
|
||||
size_t tritable_size = hashBuckets2(min_triangles);
|
||||
unsigned int* tritable = allocator.allocate<unsigned int>(tritable_size);
|
||||
|
||||
// note: this is the first and last write to destination, which allows aliasing destination with indices
|
||||
size_t write = filterTriangles(destination, tritable, tritable_size, indices, index_count, vertex_cells, cell_remap);
|
||||
|
||||
#if TRACE
|
||||
printf("result: %d cells, %d triangles (%d unfiltered), error %e\n", int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error));
|
||||
printf("result: grid size %d, %d cells, %d triangles (%d unfiltered), error %e\n", min_grid, int(cell_count), int(write / 3), int(min_triangles), sqrtf(result_error));
|
||||
#endif
|
||||
|
||||
if (out_result_error)
|
||||
|
@ -2316,7 +2795,7 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos
|
|||
int grid_size = next_grid_size;
|
||||
grid_size = (grid_size <= min_grid) ? min_grid + 1 : (grid_size >= max_grid ? max_grid - 1 : grid_size);
|
||||
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_count, grid_size);
|
||||
computeVertexIds(vertex_ids, vertex_positions, NULL, vertex_count, grid_size);
|
||||
size_t vertices = countVertexCells(table, table_size, vertex_ids, vertex_count);
|
||||
|
||||
#if TRACE
|
||||
|
@ -2353,7 +2832,7 @@ size_t meshopt_simplifyPoints(unsigned int* destination, const float* vertex_pos
|
|||
// build vertex->cell association by mapping all vertices with the same quantized position to the same cell
|
||||
unsigned int* vertex_cells = allocator.allocate<unsigned int>(vertex_count);
|
||||
|
||||
computeVertexIds(vertex_ids, vertex_positions, vertex_count, min_grid);
|
||||
computeVertexIds(vertex_ids, vertex_positions, NULL, vertex_count, min_grid);
|
||||
size_t cell_count = fillVertexCells(table, table_size, vertex_cells, vertex_ids, vertex_count);
|
||||
|
||||
// accumulate points into a reservoir for each target cell
|
||||
|
|
426
thirdparty/meshoptimizer/vertexfilter.cpp
vendored
426
thirdparty/meshoptimizer/vertexfilter.cpp
vendored
|
@ -165,6 +165,47 @@ static void decodeFilterExp(unsigned int* data, size_t count)
|
|||
data[i] = u.ui;
|
||||
}
|
||||
}
|
||||
|
||||
template <typename ST, typename T>
|
||||
static void decodeFilterColor(T* data, size_t count)
|
||||
{
|
||||
const float max = float((1 << (sizeof(T) * 8)) - 1);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
// recover scale from alpha high bit
|
||||
int as = data[i * 4 + 3];
|
||||
as |= as >> 1;
|
||||
as |= as >> 2;
|
||||
as |= as >> 4;
|
||||
as |= as >> 8; // noop for 8-bit
|
||||
|
||||
// convert to RGB in fixed point (co/cg are sign extended)
|
||||
int y = data[i * 4 + 0], co = ST(data[i * 4 + 1]), cg = ST(data[i * 4 + 2]);
|
||||
|
||||
int r = y + co - cg;
|
||||
int g = y + cg;
|
||||
int b = y - co - cg;
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
int a = data[i * 4 + 3];
|
||||
a = ((a << 1) & as) | (a & 1);
|
||||
|
||||
// compute scaling factor
|
||||
float ss = max / float(as);
|
||||
|
||||
// rounded float->int
|
||||
int rf = int(float(r) * ss + 0.5f);
|
||||
int gf = int(float(g) * ss + 0.5f);
|
||||
int bf = int(float(b) * ss + 0.5f);
|
||||
int af = int(float(a) * ss + 0.5f);
|
||||
|
||||
data[i * 4 + 0] = T(rf);
|
||||
data[i * 4 + 1] = T(gf);
|
||||
data[i * 4 + 2] = T(bf);
|
||||
data[i * 4 + 3] = T(af);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
|
@ -386,6 +427,105 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
|||
_mm_storeu_ps(reinterpret_cast<float*>(&data[i]), r);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd8(unsigned char* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128i c4 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[i * 4]));
|
||||
|
||||
// unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts)
|
||||
__m128i yf = _mm_and_si128(c4, _mm_set1_epi32(0xff));
|
||||
__m128i cof = _mm_srai_epi32(_mm_slli_epi32(c4, 16), 24);
|
||||
__m128i cgf = _mm_srai_epi32(_mm_slli_epi32(c4, 8), 24);
|
||||
__m128i af = _mm_srli_epi32(c4, 24);
|
||||
|
||||
// recover scale from alpha high bit
|
||||
__m128i as = af;
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 1));
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 2));
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 4));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(af, 1), as), _mm_and_si128(af, _mm_set1_epi32(1)));
|
||||
|
||||
// compute scaling factor
|
||||
__m128 ss = _mm_mul_ps(_mm_set1_ps(255.f), _mm_rcp_ps(_mm_cvtepi32_ps(as)));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
__m128i rf = _mm_add_epi32(yf, _mm_sub_epi32(cof, cgf));
|
||||
__m128i gf = _mm_add_epi32(yf, cgf);
|
||||
__m128i bf = _mm_sub_epi32(yf, _mm_add_epi32(cof, cgf));
|
||||
|
||||
// rounded signed float->int
|
||||
__m128i rr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(rf), ss));
|
||||
__m128i gr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(gf), ss));
|
||||
__m128i br = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(bf), ss));
|
||||
__m128i ar = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(af), ss));
|
||||
|
||||
// repack rgba into final value
|
||||
__m128i res = rr;
|
||||
res = _mm_or_si128(res, _mm_slli_epi32(gr, 8));
|
||||
res = _mm_or_si128(res, _mm_slli_epi32(br, 16));
|
||||
res = _mm_or_si128(res, _mm_slli_epi32(ar, 24));
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[i * 4]), res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd16(unsigned short* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
__m128i c4_0 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]));
|
||||
__m128i c4_1 = _mm_loadu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both y/co 16-bit pairs in each 32-bit lane
|
||||
__m128i c4_yco = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(c4_0), _mm_castsi128_ps(c4_1), _MM_SHUFFLE(2, 0, 2, 0)));
|
||||
__m128i c4_cga = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(c4_0), _mm_castsi128_ps(c4_1), _MM_SHUFFLE(3, 1, 3, 1)));
|
||||
|
||||
// unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts)
|
||||
__m128i yf = _mm_and_si128(c4_yco, _mm_set1_epi32(0xffff));
|
||||
__m128i cof = _mm_srai_epi32(c4_yco, 16);
|
||||
__m128i cgf = _mm_srai_epi32(_mm_slli_epi32(c4_cga, 16), 16);
|
||||
__m128i af = _mm_srli_epi32(c4_cga, 16);
|
||||
|
||||
// recover scale from alpha high bit
|
||||
__m128i as = af;
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 1));
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 2));
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 4));
|
||||
as = _mm_or_si128(as, _mm_srli_epi32(as, 8));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = _mm_or_si128(_mm_and_si128(_mm_slli_epi32(af, 1), as), _mm_and_si128(af, _mm_set1_epi32(1)));
|
||||
|
||||
// compute scaling factor
|
||||
__m128 ss = _mm_div_ps(_mm_set1_ps(65535.f), _mm_cvtepi32_ps(as));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
__m128i rf = _mm_add_epi32(yf, _mm_sub_epi32(cof, cgf));
|
||||
__m128i gf = _mm_add_epi32(yf, cgf);
|
||||
__m128i bf = _mm_sub_epi32(yf, _mm_add_epi32(cof, cgf));
|
||||
|
||||
// rounded signed float->int
|
||||
__m128i rr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(rf), ss));
|
||||
__m128i gr = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(gf), ss));
|
||||
__m128i br = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(bf), ss));
|
||||
__m128i ar = _mm_cvtps_epi32(_mm_mul_ps(_mm_cvtepi32_ps(af), ss));
|
||||
|
||||
// mix r/b and g/a to make 16-bit unpack easier
|
||||
__m128i rbr = _mm_or_si128(_mm_and_si128(rr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(br, 16));
|
||||
__m128i gar = _mm_or_si128(_mm_and_si128(gr, _mm_set1_epi32(0xffff)), _mm_slli_epi32(ar, 16));
|
||||
|
||||
// pack r/g/b/a using 16-bit unpacks
|
||||
__m128i res_0 = _mm_unpacklo_epi16(rbr, gar);
|
||||
__m128i res_1 = _mm_unpackhi_epi16(rbr, gar);
|
||||
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 0) * 4]), res_0);
|
||||
_mm_storeu_si128(reinterpret_cast<__m128i*>(&data[(i + 2) * 4]), res_1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(SIMD_NEON) && !defined(__aarch64__) && !defined(_M_ARM64)
|
||||
|
@ -596,6 +736,111 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
|||
vst1q_f32(reinterpret_cast<float*>(&data[i]), r);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd8(unsigned char* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t c4 = vld1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]));
|
||||
|
||||
// unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts)
|
||||
int32x4_t yf = vandq_s32(c4, vdupq_n_s32(0xff));
|
||||
int32x4_t cof = vshrq_n_s32(vshlq_n_s32(c4, 16), 24);
|
||||
int32x4_t cgf = vshrq_n_s32(vshlq_n_s32(c4, 8), 24);
|
||||
int32x4_t af = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(c4), 24));
|
||||
|
||||
// recover scale from alpha high bit
|
||||
int32x4_t as = af;
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 1));
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 2));
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 4));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = vorrq_s32(vandq_s32(vshlq_n_s32(af, 1), as), vandq_s32(af, vdupq_n_s32(1)));
|
||||
|
||||
// compute scaling factor
|
||||
float32x4_t ss = vmulq_f32(vdupq_n_f32(255.f), vrecpeq_f32(vcvtq_f32_s32(as)));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
int32x4_t rf = vaddq_s32(yf, vsubq_s32(cof, cgf));
|
||||
int32x4_t gf = vaddq_s32(yf, cgf);
|
||||
int32x4_t bf = vsubq_s32(yf, vaddq_s32(cof, cgf));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const float32x4_t fsnap = vdupq_n_f32(3 << 22);
|
||||
|
||||
int32x4_t rr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(rf), ss), fsnap));
|
||||
int32x4_t gr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(gf), ss), fsnap));
|
||||
int32x4_t br = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(bf), ss), fsnap));
|
||||
int32x4_t ar = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(af), ss), fsnap));
|
||||
|
||||
// repack rgba into final value
|
||||
int32x4_t res = vandq_s32(rr, vdupq_n_s32(0xff));
|
||||
res = vorrq_s32(res, vshlq_n_s32(vandq_s32(gr, vdupq_n_s32(0xff)), 8));
|
||||
res = vorrq_s32(res, vshlq_n_s32(vandq_s32(br, vdupq_n_s32(0xff)), 16));
|
||||
res = vorrq_s32(res, vshlq_n_s32(ar, 24));
|
||||
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[i * 4]), res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd16(unsigned short* data, size_t count)
|
||||
{
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
int32x4_t c4_0 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]));
|
||||
int32x4_t c4_1 = vld1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]));
|
||||
|
||||
// gather both y/co 16-bit pairs in each 32-bit lane
|
||||
int32x4_t c4_yco = vuzpq_s32(c4_0, c4_1).val[0];
|
||||
int32x4_t c4_cga = vuzpq_s32(c4_0, c4_1).val[1];
|
||||
|
||||
// unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts)
|
||||
int32x4_t yf = vandq_s32(c4_yco, vdupq_n_s32(0xffff));
|
||||
int32x4_t cof = vshrq_n_s32(c4_yco, 16);
|
||||
int32x4_t cgf = vshrq_n_s32(vshlq_n_s32(c4_cga, 16), 16);
|
||||
int32x4_t af = vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(c4_cga), 16));
|
||||
|
||||
// recover scale from alpha high bit
|
||||
int32x4_t as = af;
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 1));
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 2));
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 4));
|
||||
as = vorrq_s32(as, vshrq_n_s32(as, 8));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = vorrq_s32(vandq_s32(vshlq_n_s32(af, 1), as), vandq_s32(af, vdupq_n_s32(1)));
|
||||
|
||||
// compute scaling factor
|
||||
float32x4_t ss = vdivq_f32(vdupq_n_f32(65535.f), vcvtq_f32_s32(as));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
int32x4_t rf = vaddq_s32(yf, vsubq_s32(cof, cgf));
|
||||
int32x4_t gf = vaddq_s32(yf, cgf);
|
||||
int32x4_t bf = vsubq_s32(yf, vaddq_s32(cof, cgf));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 16 bits so we can omit the subtraction
|
||||
const float32x4_t fsnap = vdupq_n_f32(3 << 22);
|
||||
|
||||
int32x4_t rr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(rf), ss), fsnap));
|
||||
int32x4_t gr = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(gf), ss), fsnap));
|
||||
int32x4_t br = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(bf), ss), fsnap));
|
||||
int32x4_t ar = vreinterpretq_s32_f32(vaddq_f32(vmulq_f32(vcvtq_f32_s32(af), ss), fsnap));
|
||||
|
||||
// mix r/b and g/a to make 16-bit unpack easier
|
||||
int32x4_t rbr = vorrq_s32(vandq_s32(rr, vdupq_n_s32(0xffff)), vshlq_n_s32(br, 16));
|
||||
int32x4_t gar = vorrq_s32(vandq_s32(gr, vdupq_n_s32(0xffff)), vshlq_n_s32(ar, 16));
|
||||
|
||||
// pack r/g/b/a using 16-bit unpacks
|
||||
int32x4_t res_0 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(rbr), vreinterpretq_s16_s32(gar)).val[0]);
|
||||
int32x4_t res_1 = vreinterpretq_s32_s16(vzipq_s16(vreinterpretq_s16_s32(rbr), vreinterpretq_s16_s32(gar)).val[1]);
|
||||
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 0) * 4]), res_0);
|
||||
vst1q_s32(reinterpret_cast<int32_t*>(&data[(i + 2) * 4]), res_1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef SIMD_WASM
|
||||
|
@ -651,7 +896,8 @@ static void decodeFilterOctSimd8(signed char* data, size_t count)
|
|||
static void decodeFilterOctSimd16(short* data, size_t count)
|
||||
{
|
||||
const v128_t sign = wasm_f32x4_splat(-0.f);
|
||||
const v128_t zmask = wasm_i32x4_splat(0x7fff);
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457
|
||||
volatile v128_t zmask = wasm_i32x4_splat(0x7fff);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
|
@ -763,8 +1009,7 @@ static void decodeFilterQuatSimd(short* data, size_t count)
|
|||
v128_t res_1 = wasmx_unpackhi_v16x8(wyr, xzr);
|
||||
|
||||
// compute component index shifted left by 4 (and moved into i32x4 slot)
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/emscripten-core/emscripten/issues/11449
|
||||
volatile v128_t cm = wasm_i32x4_shl(cf, 4);
|
||||
v128_t cm = wasm_i32x4_shl(cf, 4);
|
||||
|
||||
// rotate and store
|
||||
uint64_t* out = reinterpret_cast<uint64_t*>(&data[i * 4]);
|
||||
|
@ -795,6 +1040,117 @@ static void decodeFilterExpSimd(unsigned int* data, size_t count)
|
|||
wasm_v128_store(&data[i], r);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd8(unsigned char* data, size_t count)
|
||||
{
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457
|
||||
volatile v128_t zero = wasm_i32x4_splat(0);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t c4 = wasm_v128_load(&data[i * 4]);
|
||||
|
||||
// unpack y/co/cg/a (co/cg are sign extended with arithmetic shifts)
|
||||
v128_t yf = wasm_v128_and(c4, wasm_i32x4_splat(0xff));
|
||||
v128_t cof = wasm_i32x4_shr(wasm_i32x4_shl(c4, 16), 24);
|
||||
v128_t cgf = wasm_i32x4_shr(wasm_i32x4_shl(c4, 8), 24);
|
||||
v128_t af = wasm_v128_or(zero, wasm_u32x4_shr(c4, 24));
|
||||
|
||||
// recover scale from alpha high bit
|
||||
v128_t as = af;
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 1));
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 2));
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 4));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = wasm_v128_or(wasm_v128_and(wasm_i32x4_shl(af, 1), as), wasm_v128_and(af, wasm_i32x4_splat(1)));
|
||||
|
||||
// compute scaling factor
|
||||
v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(255.f), wasm_f32x4_convert_i32x4(as));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
v128_t rf = wasm_i32x4_add(yf, wasm_i32x4_sub(cof, cgf));
|
||||
v128_t gf = wasm_i32x4_add(yf, cgf);
|
||||
v128_t bf = wasm_i32x4_sub(yf, wasm_i32x4_add(cof, cgf));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction
|
||||
const v128_t fsnap = wasm_f32x4_splat(3 << 22);
|
||||
|
||||
v128_t rr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(rf), ss), fsnap);
|
||||
v128_t gr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(gf), ss), fsnap);
|
||||
v128_t br = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(bf), ss), fsnap);
|
||||
v128_t ar = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(af), ss), fsnap);
|
||||
|
||||
// repack rgba into final value
|
||||
v128_t res = wasm_v128_and(rr, wasm_i32x4_splat(0xff));
|
||||
res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(gr, wasm_i32x4_splat(0xff)), 8));
|
||||
res = wasm_v128_or(res, wasm_i32x4_shl(wasm_v128_and(br, wasm_i32x4_splat(0xff)), 16));
|
||||
res = wasm_v128_or(res, wasm_i32x4_shl(ar, 24));
|
||||
|
||||
wasm_v128_store(&data[i * 4], res);
|
||||
}
|
||||
}
|
||||
|
||||
static void decodeFilterColorSimd16(unsigned short* data, size_t count)
|
||||
{
|
||||
// TODO: volatile here works around LLVM mis-optimizing code; https://github.com/llvm/llvm-project/issues/149457
|
||||
volatile v128_t zero = wasm_i32x4_splat(0);
|
||||
|
||||
for (size_t i = 0; i < count; i += 4)
|
||||
{
|
||||
v128_t c4_0 = wasm_v128_load(&data[(i + 0) * 4]);
|
||||
v128_t c4_1 = wasm_v128_load(&data[(i + 2) * 4]);
|
||||
|
||||
// gather both y/co 16-bit pairs in each 32-bit lane
|
||||
v128_t c4_yco = wasmx_unziplo_v32x4(c4_0, c4_1);
|
||||
v128_t c4_cga = wasmx_unziphi_v32x4(c4_0, c4_1);
|
||||
|
||||
// unpack y/co/cg/a components (co/cg are sign extended with arithmetic shifts)
|
||||
v128_t yf = wasm_v128_and(c4_yco, wasm_i32x4_splat(0xffff));
|
||||
v128_t cof = wasm_i32x4_shr(c4_yco, 16);
|
||||
v128_t cgf = wasm_i32x4_shr(wasm_i32x4_shl(c4_cga, 16), 16);
|
||||
v128_t af = wasm_v128_or(zero, wasm_u32x4_shr(c4_cga, 16));
|
||||
|
||||
// recover scale from alpha high bit
|
||||
v128_t as = af;
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 1));
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 2));
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 4));
|
||||
as = wasm_v128_or(as, wasm_i32x4_shr(as, 8));
|
||||
|
||||
// expand alpha by one bit to match other components
|
||||
af = wasm_v128_or(wasm_v128_and(wasm_i32x4_shl(af, 1), as), wasm_v128_and(af, wasm_i32x4_splat(1)));
|
||||
|
||||
// compute scaling factor
|
||||
v128_t ss = wasm_f32x4_div(wasm_f32x4_splat(65535.f), wasm_f32x4_convert_i32x4(as));
|
||||
|
||||
// convert to RGB in fixed point
|
||||
v128_t rf = wasm_i32x4_add(yf, wasm_i32x4_sub(cof, cgf));
|
||||
v128_t gf = wasm_i32x4_add(yf, cgf);
|
||||
v128_t bf = wasm_i32x4_sub(yf, wasm_i32x4_add(cof, cgf));
|
||||
|
||||
// fast rounded signed float->int: addition triggers renormalization after which mantissa stores the integer value
|
||||
// note: the result is offset by 0x4B40_0000, but we only need the low 8 bits so we can omit the subtraction
|
||||
const v128_t fsnap = wasm_f32x4_splat(3 << 22);
|
||||
|
||||
v128_t rr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(rf), ss), fsnap);
|
||||
v128_t gr = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(gf), ss), fsnap);
|
||||
v128_t br = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(bf), ss), fsnap);
|
||||
v128_t ar = wasm_f32x4_add(wasm_f32x4_mul(wasm_f32x4_convert_i32x4(af), ss), fsnap);
|
||||
|
||||
// mix r/b and g/a to make 16-bit unpack easier
|
||||
v128_t rbr = wasm_v128_or(wasm_v128_and(rr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(br, 16));
|
||||
v128_t gar = wasm_v128_or(wasm_v128_and(gr, wasm_i32x4_splat(0xffff)), wasm_i32x4_shl(ar, 16));
|
||||
|
||||
// pack r/g/b/a using 16-bit unpacks
|
||||
v128_t res_0 = wasmx_unpacklo_v16x8(rbr, gar);
|
||||
v128_t res_1 = wasmx_unpackhi_v16x8(rbr, gar);
|
||||
|
||||
wasm_v128_store(&data[(i + 0) * 4], res_0);
|
||||
wasm_v128_store(&data[(i + 2) * 4], res_1);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// optimized variant of frexp
|
||||
|
@ -872,6 +1228,25 @@ void meshopt_decodeFilterExp(void* buffer, size_t count, size_t stride)
|
|||
#endif
|
||||
}
|
||||
|
||||
void meshopt_decodeFilterColor(void* buffer, size_t count, size_t stride)
|
||||
{
|
||||
using namespace meshopt;
|
||||
|
||||
assert(stride == 4 || stride == 8);
|
||||
|
||||
#if defined(SIMD_SSE) || defined(SIMD_NEON) || defined(SIMD_WASM)
|
||||
if (stride == 4)
|
||||
dispatchSimd(decodeFilterColorSimd8, static_cast<unsigned char*>(buffer), count, 4);
|
||||
else
|
||||
dispatchSimd(decodeFilterColorSimd16, static_cast<unsigned short*>(buffer), count, 4);
|
||||
#else
|
||||
if (stride == 4)
|
||||
decodeFilterColor<signed char>(static_cast<unsigned char*>(buffer), count);
|
||||
else
|
||||
decodeFilterColor<short>(static_cast<unsigned short*>(buffer), count);
|
||||
#endif
|
||||
}
|
||||
|
||||
void meshopt_encodeFilterOct(void* destination, size_t count, size_t stride, int bits, const float* data)
|
||||
{
|
||||
assert(stride == 4 || stride == 8);
|
||||
|
@ -1042,6 +1417,51 @@ void meshopt_encodeFilterExp(void* destination_, size_t count, size_t stride, in
|
|||
}
|
||||
}
|
||||
|
||||
void meshopt_encodeFilterColor(void* destination, size_t count, size_t stride, int bits, const float* data)
|
||||
{
|
||||
assert(stride == 4 || stride == 8);
|
||||
assert(bits >= 2 && bits <= 16);
|
||||
|
||||
unsigned char* d8 = static_cast<unsigned char*>(destination);
|
||||
unsigned short* d16 = static_cast<unsigned short*>(destination);
|
||||
|
||||
for (size_t i = 0; i < count; ++i)
|
||||
{
|
||||
const float* c = &data[i * 4];
|
||||
|
||||
int fr = meshopt_quantizeUnorm(c[0], bits);
|
||||
int fg = meshopt_quantizeUnorm(c[1], bits);
|
||||
int fb = meshopt_quantizeUnorm(c[2], bits);
|
||||
|
||||
// YCoCg-R encoding with truncated Co/Cg ensures that decoding can be done using integers
|
||||
int fco = (fr - fb) / 2;
|
||||
int tmp = fb + fco;
|
||||
int fcg = (fg - tmp) / 2;
|
||||
int fy = tmp + fcg;
|
||||
|
||||
// validate that R/G/B can be reconstructed with K bit integers
|
||||
assert(unsigned((fy + fco - fcg) | (fy + fcg) | (fy - fco - fcg)) < (1u << bits));
|
||||
|
||||
// alpha: K-1-bit encoding with high bit set to 1
|
||||
int fa = meshopt_quantizeUnorm(c[3], bits - 1) | (1 << (bits - 1));
|
||||
|
||||
if (stride == 4)
|
||||
{
|
||||
d8[i * 4 + 0] = (unsigned char)(fy);
|
||||
d8[i * 4 + 1] = (unsigned char)(fco);
|
||||
d8[i * 4 + 2] = (unsigned char)(fcg);
|
||||
d8[i * 4 + 3] = (unsigned char)(fa);
|
||||
}
|
||||
else
|
||||
{
|
||||
d16[i * 4 + 0] = (unsigned short)(fy);
|
||||
d16[i * 4 + 1] = (unsigned short)(fco);
|
||||
d16[i * 4 + 2] = (unsigned short)(fcg);
|
||||
d16[i * 4 + 3] = (unsigned short)(fa);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#undef SIMD_SSE
|
||||
#undef SIMD_NEON
|
||||
#undef SIMD_WASM
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue