mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-31 13:41:03 +00:00 
			
		
		
		
	
		
			
	
	
		
			1428 lines
		
	
	
	
		
			43 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			1428 lines
		
	
	
	
		
			43 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | // SPDX-License-Identifier: Apache-2.0
 | ||
|  | // ----------------------------------------------------------------------------
 | ||
|  | // Copyright 2011-2023 Arm Limited
 | ||
|  | //
 | ||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 | ||
|  | // use this file except in compliance with the License. You may obtain a copy
 | ||
|  | // of the License at:
 | ||
|  | //
 | ||
|  | //     http://www.apache.org/licenses/LICENSE-2.0
 | ||
|  | //
 | ||
|  | // Unless required by applicable law or agreed to in writing, software
 | ||
|  | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 | ||
|  | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 | ||
|  | // License for the specific language governing permissions and limitations
 | ||
|  | // under the License.
 | ||
|  | // ----------------------------------------------------------------------------
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Functions for the library entrypoint. | ||
|  |  */ | ||
|  | 
 | ||
|  | #include <array>
 | ||
|  | #include <cstring>
 | ||
|  | #include <new>
 | ||
|  | 
 | ||
|  | #include "astcenc.h"
 | ||
|  | #include "astcenc_internal_entry.h"
 | ||
|  | #include "astcenc_diagnostic_trace.h"
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Record of the quality tuning parameter values. | ||
|  |  * | ||
|  |  * See the @c astcenc_config structure for detailed parameter documentation. | ||
|  |  * | ||
|  |  * Note that the mse_overshoot entries are scaling factors relative to the base MSE to hit db_limit. | ||
|  |  * A 20% overshoot is harder to hit for a higher base db_limit, so we may actually use lower ratios | ||
|  |  * for the more through search presets because the underlying db_limit is so much higher. | ||
|  |  */ | ||
|  | struct astcenc_preset_config | ||
|  | { | ||
|  | 	float quality; | ||
|  | 	unsigned int tune_partition_count_limit; | ||
|  | 	unsigned int tune_2partition_index_limit; | ||
|  | 	unsigned int tune_3partition_index_limit; | ||
|  | 	unsigned int tune_4partition_index_limit; | ||
|  | 	unsigned int tune_block_mode_limit; | ||
|  | 	unsigned int tune_refinement_limit; | ||
|  | 	unsigned int tune_candidate_limit; | ||
|  | 	unsigned int tune_2partitioning_candidate_limit; | ||
|  | 	unsigned int tune_3partitioning_candidate_limit; | ||
|  | 	unsigned int tune_4partitioning_candidate_limit; | ||
|  | 	float tune_db_limit_a_base; | ||
|  | 	float tune_db_limit_b_base; | ||
|  | 	float tune_mse_overshoot; | ||
|  | 	float tune_2_partition_early_out_limit_factor; | ||
|  | 	float tune_3_partition_early_out_limit_factor; | ||
|  | 	float tune_2_plane_early_out_limit_correlation; | ||
|  | }; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief The static presets for high bandwidth encodings (x < 25 texels per block). | ||
|  |  */ | ||
|  | static const std::array<astcenc_preset_config, 6> preset_configs_high {{ | ||
|  | 	{ | ||
|  | 		ASTCENC_PRE_FASTEST, | ||
|  | 		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_FAST, | ||
|  | 		3, 18, 10, 8, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.90f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_MEDIUM, | ||
|  | 		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 2.5f, 1.1f, 1.05f, 0.95f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_THOROUGH, | ||
|  | 		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.35f, 1.15f, 0.97f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_VERYTHOROUGH, | ||
|  | 		4, 256, 128, 64, 98, 4, 6, 20, 14, 8, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_EXHAUSTIVE, | ||
|  | 		4, 512, 512, 512, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f | ||
|  | 	} | ||
|  | }}; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief The static presets for medium bandwidth encodings (25 <= x < 64 texels per block). | ||
|  |  */ | ||
|  | static const std::array<astcenc_preset_config, 6> preset_configs_mid {{ | ||
|  | 	{ | ||
|  | 		ASTCENC_PRE_FASTEST, | ||
|  | 		2, 10, 6, 4, 43, 2, 2, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.80f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_FAST, | ||
|  | 		3, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.2f, 63.2f, 3.5f, 1.0f, 1.0f, 0.85f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_MEDIUM, | ||
|  | 		4, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.0f, 1.1f, 1.05f, 0.90f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_THOROUGH, | ||
|  | 		4, 82, 60, 30, 94, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.4f, 1.2f, 0.95f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_VERYTHOROUGH, | ||
|  | 		4, 256, 128, 64, 98, 4, 6, 12, 8, 3, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_EXHAUSTIVE, | ||
|  | 		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f | ||
|  | 	} | ||
|  | }}; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief The static presets for low bandwidth encodings (64 <= x texels per block). | ||
|  |  */ | ||
|  | static const std::array<astcenc_preset_config, 6> preset_configs_low {{ | ||
|  | 	{ | ||
|  | 		ASTCENC_PRE_FASTEST, | ||
|  | 		2, 10, 6, 4, 40, 2, 2, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.80f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_FAST, | ||
|  | 		2, 18, 12, 10, 55, 3, 3, 2, 2, 2, 85.0f, 63.0f, 3.5f, 1.0f, 1.0f, 0.85f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_MEDIUM, | ||
|  | 		3, 34, 28, 16, 77, 3, 3, 2, 2, 2, 95.0f, 70.0f, 3.5f, 1.1f, 1.05f, 0.90f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_THOROUGH, | ||
|  | 		4, 82, 60, 30, 93, 4, 4, 3, 2, 2, 105.0f, 77.0f, 10.0f, 1.3f, 1.2f, 0.97f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_VERYTHOROUGH, | ||
|  | 		4, 256, 128, 64, 98, 4, 6, 9, 5, 2, 200.0f, 200.0f, 10.0f, 1.6f, 1.4f, 0.98f | ||
|  | 	}, { | ||
|  | 		ASTCENC_PRE_EXHAUSTIVE, | ||
|  | 		4, 256, 256, 256, 100, 4, 8, 32, 32, 32, 200.0f, 200.0f, 10.0f, 2.0f, 2.0f, 0.99f | ||
|  | 	} | ||
|  | }}; | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate CPU floating point meets assumptions made in the codec. | ||
|  |  * | ||
|  |  * The codec is written with the assumption that a float threaded through the @c if32 union will be | ||
|  |  * stored and reloaded as a 32-bit IEEE-754 float with round-to-nearest rounding. This is always the | ||
|  |  * case in an IEEE-754 compliant system, however not every system or compilation mode is actually | ||
|  |  * IEEE-754 compliant. This normally fails if the code is compiled with fast math enabled. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_cpu_float() | ||
|  | { | ||
|  | 	if32 p; | ||
|  | 	volatile float xprec_testval = 2.51f; | ||
|  | 	p.f = xprec_testval + 12582912.0f; | ||
|  | 	float q = p.f - 12582912.0f; | ||
|  | 
 | ||
|  | 	if (q != 3.0f) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_CPU_FLOAT; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate CPU ISA support meets the requirements of this build of the library. | ||
|  |  * | ||
|  |  * Each library build is statically compiled for a particular set of CPU ISA features, such as the | ||
|  |  * SIMD support or other ISA extensions such as POPCNT. This function checks that the host CPU | ||
|  |  * actually supports everything this build needs. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_cpu_isa() | ||
|  | { | ||
|  | 	#if ASTCENC_SSE >= 41
 | ||
|  | 		if (!cpu_supports_sse41()) | ||
|  | 		{ | ||
|  | 			return ASTCENC_ERR_BAD_CPU_ISA; | ||
|  | 		} | ||
|  | 	#endif
 | ||
|  | 
 | ||
|  | 	#if ASTCENC_POPCNT >= 1
 | ||
|  | 		if (!cpu_supports_popcnt()) | ||
|  | 		{ | ||
|  | 			return ASTCENC_ERR_BAD_CPU_ISA; | ||
|  | 		} | ||
|  | 	#endif
 | ||
|  | 
 | ||
|  | 	#if ASTCENC_F16C >= 1
 | ||
|  | 		if (!cpu_supports_f16c()) | ||
|  | 		{ | ||
|  | 			return ASTCENC_ERR_BAD_CPU_ISA; | ||
|  | 		} | ||
|  | 	#endif
 | ||
|  | 
 | ||
|  | 	#if ASTCENC_AVX >= 2
 | ||
|  | 		if (!cpu_supports_avx2()) | ||
|  | 		{ | ||
|  | 			return ASTCENC_ERR_BAD_CPU_ISA; | ||
|  | 		} | ||
|  | 	#endif
 | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate config profile. | ||
|  |  * | ||
|  |  * @param profile   The profile to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_profile( | ||
|  | 	astcenc_profile profile | ||
|  | ) { | ||
|  | 	// Values in this enum are from an external user, so not guaranteed to be
 | ||
|  | 	// bounded to the enum values
 | ||
|  | 	switch (static_cast<int>(profile)) | ||
|  | 	{ | ||
|  | 	case ASTCENC_PRF_LDR_SRGB: | ||
|  | 	case ASTCENC_PRF_LDR: | ||
|  | 	case ASTCENC_PRF_HDR_RGB_LDR_A: | ||
|  | 	case ASTCENC_PRF_HDR: | ||
|  | 		return ASTCENC_SUCCESS; | ||
|  | 	default: | ||
|  | 		return ASTCENC_ERR_BAD_PROFILE; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate block size. | ||
|  |  * | ||
|  |  * @param block_x   The block x dimensions. | ||
|  |  * @param block_y   The block y dimensions. | ||
|  |  * @param block_z   The block z dimensions. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_block_size( | ||
|  | 	unsigned int block_x, | ||
|  | 	unsigned int block_y, | ||
|  | 	unsigned int block_z | ||
|  | ) { | ||
|  | 	// Test if this is a legal block size at all
 | ||
|  | 	bool is_legal = (((block_z <= 1) && is_legal_2d_block_size(block_x, block_y)) || | ||
|  | 	                 ((block_z >= 2) && is_legal_3d_block_size(block_x, block_y, block_z))); | ||
|  | 	if (!is_legal) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_BLOCK_SIZE; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Test if this build has sufficient capacity for this block size
 | ||
|  | 	bool have_capacity = (block_x * block_y * block_z) <= BLOCK_MAX_TEXELS; | ||
|  | 	if (!have_capacity) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_NOT_IMPLEMENTED; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate flags. | ||
|  |  * | ||
|  |  * @param flags   The flags to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_flags( | ||
|  | 	unsigned int flags | ||
|  | ) { | ||
|  | 	// Flags field must not contain any unknown flag bits
 | ||
|  | 	unsigned int exMask = ~ASTCENC_ALL_FLAGS; | ||
|  | 	if (popcount(flags & exMask) != 0) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_FLAGS; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Flags field must only contain at most a single map type
 | ||
|  | 	exMask = ASTCENC_FLG_MAP_NORMAL | ||
|  | 	       | ASTCENC_FLG_MAP_RGBM; | ||
|  | 	if (popcount(flags & exMask) > 1) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_FLAGS; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | #if !defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate single channel compression swizzle. | ||
|  |  * | ||
|  |  * @param swizzle   The swizzle to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_compression_swz( | ||
|  | 	astcenc_swz swizzle | ||
|  | ) { | ||
|  | 	// Not all enum values are handled; SWZ_Z is invalid for compression
 | ||
|  | 	switch (static_cast<int>(swizzle)) | ||
|  | 	{ | ||
|  | 	case ASTCENC_SWZ_R: | ||
|  | 	case ASTCENC_SWZ_G: | ||
|  | 	case ASTCENC_SWZ_B: | ||
|  | 	case ASTCENC_SWZ_A: | ||
|  | 	case ASTCENC_SWZ_0: | ||
|  | 	case ASTCENC_SWZ_1: | ||
|  | 		return ASTCENC_SUCCESS; | ||
|  | 	default: | ||
|  | 		return ASTCENC_ERR_BAD_SWIZZLE; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate overall compression swizzle. | ||
|  |  * | ||
|  |  * @param swizzle   The swizzle to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_compression_swizzle( | ||
|  | 	const astcenc_swizzle& swizzle | ||
|  | ) { | ||
|  | 	if (validate_compression_swz(swizzle.r) || | ||
|  | 	    validate_compression_swz(swizzle.g) || | ||
|  | 	    validate_compression_swz(swizzle.b) || | ||
|  | 	    validate_compression_swz(swizzle.a)) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_SWIZZLE; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | #endif
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate single channel decompression swizzle. | ||
|  |  * | ||
|  |  * @param swizzle   The swizzle to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_decompression_swz( | ||
|  | 	astcenc_swz swizzle | ||
|  | ) { | ||
|  | 	// Values in this enum are from an external user, so not guaranteed to be
 | ||
|  | 	// bounded to the enum values
 | ||
|  | 	switch (static_cast<int>(swizzle)) | ||
|  | 	{ | ||
|  | 	case ASTCENC_SWZ_R: | ||
|  | 	case ASTCENC_SWZ_G: | ||
|  | 	case ASTCENC_SWZ_B: | ||
|  | 	case ASTCENC_SWZ_A: | ||
|  | 	case ASTCENC_SWZ_0: | ||
|  | 	case ASTCENC_SWZ_1: | ||
|  | 	case ASTCENC_SWZ_Z: | ||
|  | 		return ASTCENC_SUCCESS; | ||
|  | 	default: | ||
|  | 		return ASTCENC_ERR_BAD_SWIZZLE; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Validate overall decompression swizzle. | ||
|  |  * | ||
|  |  * @param swizzle   The swizzle to check. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_decompression_swizzle( | ||
|  | 	const astcenc_swizzle& swizzle | ||
|  | ) { | ||
|  | 	if (validate_decompression_swz(swizzle.r) || | ||
|  | 	    validate_decompression_swz(swizzle.g) || | ||
|  | 	    validate_decompression_swz(swizzle.b) || | ||
|  | 	    validate_decompression_swz(swizzle.a)) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_SWIZZLE; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * Validate that an incoming configuration is in-spec. | ||
|  |  * | ||
|  |  * This function can respond in two ways: | ||
|  |  * | ||
|  |  *   * Numerical inputs that have valid ranges are clamped to those valid ranges. No error is thrown | ||
|  |  *     for out-of-range inputs in this case. | ||
|  |  *   * Numerical inputs and logic inputs are are logically invalid and which make no sense | ||
|  |  *     algorithmically will return an error. | ||
|  |  * | ||
|  |  * @param[in,out] config   The input compressor configuration. | ||
|  |  * | ||
|  |  * @return Return @c ASTCENC_SUCCESS if validated, otherwise an error on failure. | ||
|  |  */ | ||
|  | static astcenc_error validate_config( | ||
|  | 	astcenc_config &config | ||
|  | ) { | ||
|  | 	astcenc_error status; | ||
|  | 
 | ||
|  | 	status = validate_profile(config.profile); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_flags(config.flags); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_block_size(config.block_x, config.block_y, config.block_z); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | #if defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	// Decompress-only builds only support decompress-only contexts
 | ||
|  | 	if (!(config.flags & ASTCENC_FLG_DECOMPRESS_ONLY)) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 	config.rgbm_m_scale = astc::max(config.rgbm_m_scale, 1.0f); | ||
|  | 
 | ||
|  | 	config.tune_partition_count_limit = astc::clamp(config.tune_partition_count_limit, 1u, 4u); | ||
|  | 	config.tune_2partition_index_limit = astc::clamp(config.tune_2partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); | ||
|  | 	config.tune_3partition_index_limit = astc::clamp(config.tune_3partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); | ||
|  | 	config.tune_4partition_index_limit = astc::clamp(config.tune_4partition_index_limit, 1u, BLOCK_MAX_PARTITIONINGS); | ||
|  | 	config.tune_block_mode_limit = astc::clamp(config.tune_block_mode_limit, 1u, 100u); | ||
|  | 	config.tune_refinement_limit = astc::max(config.tune_refinement_limit, 1u); | ||
|  | 	config.tune_candidate_limit = astc::clamp(config.tune_candidate_limit, 1u, TUNE_MAX_TRIAL_CANDIDATES); | ||
|  | 	config.tune_2partitioning_candidate_limit = astc::clamp(config.tune_2partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 	config.tune_3partitioning_candidate_limit = astc::clamp(config.tune_3partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 	config.tune_4partitioning_candidate_limit = astc::clamp(config.tune_4partitioning_candidate_limit, 1u, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 	config.tune_db_limit = astc::max(config.tune_db_limit, 0.0f); | ||
|  | 	config.tune_mse_overshoot = astc::max(config.tune_mse_overshoot, 1.0f); | ||
|  | 	config.tune_2_partition_early_out_limit_factor = astc::max(config.tune_2_partition_early_out_limit_factor, 0.0f); | ||
|  | 	config.tune_3_partition_early_out_limit_factor = astc::max(config.tune_3_partition_early_out_limit_factor, 0.0f); | ||
|  | 	config.tune_2_plane_early_out_limit_correlation = astc::max(config.tune_2_plane_early_out_limit_correlation, 0.0f); | ||
|  | 
 | ||
|  | 	// Specifying a zero weight color component is not allowed; force to small value
 | ||
|  | 	float max_weight = astc::max(astc::max(config.cw_r_weight, config.cw_g_weight), | ||
|  | 	                             astc::max(config.cw_b_weight, config.cw_a_weight)); | ||
|  | 	if (max_weight > 0.0f) | ||
|  | 	{ | ||
|  | 		max_weight /= 1000.0f; | ||
|  | 		config.cw_r_weight = astc::max(config.cw_r_weight, max_weight); | ||
|  | 		config.cw_g_weight = astc::max(config.cw_g_weight, max_weight); | ||
|  | 		config.cw_b_weight = astc::max(config.cw_b_weight, max_weight); | ||
|  | 		config.cw_a_weight = astc::max(config.cw_a_weight, max_weight); | ||
|  | 	} | ||
|  | 	// If all color components error weights are zero then return an error
 | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_config_init( | ||
|  | 	astcenc_profile profile, | ||
|  | 	unsigned int block_x, | ||
|  | 	unsigned int block_y, | ||
|  | 	unsigned int block_z, | ||
|  | 	float quality, | ||
|  | 	unsigned int flags, | ||
|  | 	astcenc_config* configp | ||
|  | ) { | ||
|  | 	astcenc_error status; | ||
|  | 
 | ||
|  | 	// Check basic library compatibility options here so they are checked early. Note, these checks
 | ||
|  | 	// are repeated in context_alloc for cases where callers use a manually defined config struct
 | ||
|  | 	status = validate_cpu_isa(); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_cpu_float(); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Zero init all config fields; although most of will be over written
 | ||
|  | 	astcenc_config& config = *configp; | ||
|  | 	std::memset(&config, 0, sizeof(config)); | ||
|  | 
 | ||
|  | 	// Process the block size
 | ||
|  | 	block_z = astc::max(block_z, 1u); // For 2D blocks Z==0 is accepted, but convert to 1
 | ||
|  | 	status = validate_block_size(block_x, block_y, block_z); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	config.block_x = block_x; | ||
|  | 	config.block_y = block_y; | ||
|  | 	config.block_z = block_z; | ||
|  | 
 | ||
|  | 	float texels = static_cast<float>(block_x * block_y * block_z); | ||
|  | 	float ltexels = logf(texels) / logf(10.0f); | ||
|  | 
 | ||
|  | 	// Process the performance quality level or preset; note that this must be done before we
 | ||
|  | 	// process any additional settings, such as color profile and flags, which may replace some of
 | ||
|  | 	// these settings with more use case tuned values
 | ||
|  | 	if (quality < ASTCENC_PRE_FASTEST || | ||
|  | 	    quality > ASTCENC_PRE_EXHAUSTIVE) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_QUALITY; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	static const std::array<astcenc_preset_config, 6>* preset_configs; | ||
|  | 	int texels_int = block_x * block_y * block_z; | ||
|  | 	if (texels_int < 25) | ||
|  | 	{ | ||
|  | 		preset_configs = &preset_configs_high; | ||
|  | 	} | ||
|  | 	else if (texels_int < 64) | ||
|  | 	{ | ||
|  | 		preset_configs = &preset_configs_mid; | ||
|  | 	} | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		preset_configs = &preset_configs_low; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Determine which preset to use, or which pair to interpolate
 | ||
|  | 	size_t start; | ||
|  | 	size_t end; | ||
|  | 	for (end = 0; end < preset_configs->size(); end++) | ||
|  | 	{ | ||
|  | 		if ((*preset_configs)[end].quality >= quality) | ||
|  | 		{ | ||
|  | 			break; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	start = end == 0 ? 0 : end - 1; | ||
|  | 
 | ||
|  | 	// Start and end node are the same - so just transfer the values.
 | ||
|  | 	if (start == end) | ||
|  | 	{ | ||
|  | 		config.tune_partition_count_limit = (*preset_configs)[start].tune_partition_count_limit; | ||
|  | 		config.tune_2partition_index_limit = (*preset_configs)[start].tune_2partition_index_limit; | ||
|  | 		config.tune_3partition_index_limit = (*preset_configs)[start].tune_3partition_index_limit; | ||
|  | 		config.tune_4partition_index_limit = (*preset_configs)[start].tune_4partition_index_limit; | ||
|  | 		config.tune_block_mode_limit = (*preset_configs)[start].tune_block_mode_limit; | ||
|  | 		config.tune_refinement_limit = (*preset_configs)[start].tune_refinement_limit; | ||
|  | 		config.tune_candidate_limit = astc::min((*preset_configs)[start].tune_candidate_limit, TUNE_MAX_TRIAL_CANDIDATES); | ||
|  | 		config.tune_2partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_2partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 		config.tune_3partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_3partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 		config.tune_4partitioning_candidate_limit = astc::min((*preset_configs)[start].tune_4partitioning_candidate_limit, TUNE_MAX_PARTITIONING_CANDIDATES); | ||
|  | 		config.tune_db_limit = astc::max((*preset_configs)[start].tune_db_limit_a_base - 35 * ltexels, | ||
|  | 		                                 (*preset_configs)[start].tune_db_limit_b_base - 19 * ltexels); | ||
|  | 
 | ||
|  | 		config.tune_mse_overshoot = (*preset_configs)[start].tune_mse_overshoot; | ||
|  | 
 | ||
|  | 		config.tune_2_partition_early_out_limit_factor = (*preset_configs)[start].tune_2_partition_early_out_limit_factor; | ||
|  | 		config.tune_3_partition_early_out_limit_factor =(*preset_configs)[start].tune_3_partition_early_out_limit_factor; | ||
|  | 		config.tune_2_plane_early_out_limit_correlation = (*preset_configs)[start].tune_2_plane_early_out_limit_correlation; | ||
|  | 	} | ||
|  | 	// Start and end node are not the same - so interpolate between them
 | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		auto& node_a = (*preset_configs)[start]; | ||
|  | 		auto& node_b = (*preset_configs)[end]; | ||
|  | 
 | ||
|  | 		float wt_range = node_b.quality - node_a.quality; | ||
|  | 		assert(wt_range > 0); | ||
|  | 
 | ||
|  | 		// Compute interpolation factors
 | ||
|  | 		float wt_node_a = (node_b.quality - quality) / wt_range; | ||
|  | 		float wt_node_b = (quality - node_a.quality) / wt_range; | ||
|  | 
 | ||
|  | 		#define LERP(param) ((node_a.param * wt_node_a) + (node_b.param * wt_node_b))
 | ||
|  | 		#define LERPI(param) astc::flt2int_rtn(\
 | ||
|  | 		                         (static_cast<float>(node_a.param) * wt_node_a) + \ | ||
|  | 		                         (static_cast<float>(node_b.param) * wt_node_b)) | ||
|  | 		#define LERPUI(param) static_cast<unsigned int>(LERPI(param))
 | ||
|  | 
 | ||
|  | 		config.tune_partition_count_limit = LERPI(tune_partition_count_limit); | ||
|  | 		config.tune_2partition_index_limit = LERPI(tune_2partition_index_limit); | ||
|  | 		config.tune_3partition_index_limit = LERPI(tune_3partition_index_limit); | ||
|  | 		config.tune_4partition_index_limit = LERPI(tune_4partition_index_limit); | ||
|  | 		config.tune_block_mode_limit = LERPI(tune_block_mode_limit); | ||
|  | 		config.tune_refinement_limit = LERPI(tune_refinement_limit); | ||
|  | 		config.tune_candidate_limit = astc::min(LERPUI(tune_candidate_limit), | ||
|  | 		                                        TUNE_MAX_TRIAL_CANDIDATES); | ||
|  | 		config.tune_2partitioning_candidate_limit = astc::min(LERPUI(tune_2partitioning_candidate_limit), | ||
|  | 		                                                      BLOCK_MAX_PARTITIONINGS); | ||
|  | 		config.tune_3partitioning_candidate_limit = astc::min(LERPUI(tune_3partitioning_candidate_limit), | ||
|  | 		                                                      BLOCK_MAX_PARTITIONINGS); | ||
|  | 		config.tune_4partitioning_candidate_limit = astc::min(LERPUI(tune_4partitioning_candidate_limit), | ||
|  | 		                                                      BLOCK_MAX_PARTITIONINGS); | ||
|  | 		config.tune_db_limit = astc::max(LERP(tune_db_limit_a_base) - 35 * ltexels, | ||
|  | 		                                 LERP(tune_db_limit_b_base) - 19 * ltexels); | ||
|  | 
 | ||
|  | 		config.tune_mse_overshoot = LERP(tune_mse_overshoot); | ||
|  | 
 | ||
|  | 		config.tune_2_partition_early_out_limit_factor = LERP(tune_2_partition_early_out_limit_factor); | ||
|  | 		config.tune_3_partition_early_out_limit_factor = LERP(tune_3_partition_early_out_limit_factor); | ||
|  | 		config.tune_2_plane_early_out_limit_correlation = LERP(tune_2_plane_early_out_limit_correlation); | ||
|  | 		#undef LERP
 | ||
|  | 		#undef LERPI
 | ||
|  | 		#undef LERPUI
 | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Set heuristics to the defaults for each color profile
 | ||
|  | 	config.cw_r_weight = 1.0f; | ||
|  | 	config.cw_g_weight = 1.0f; | ||
|  | 	config.cw_b_weight = 1.0f; | ||
|  | 	config.cw_a_weight = 1.0f; | ||
|  | 
 | ||
|  | 	config.a_scale_radius = 0; | ||
|  | 
 | ||
|  | 	config.rgbm_m_scale = 0.0f; | ||
|  | 
 | ||
|  | 	config.profile = profile; | ||
|  | 
 | ||
|  | 	// Values in this enum are from an external user, so not guaranteed to be
 | ||
|  | 	// bounded to the enum values
 | ||
|  | 	switch (static_cast<int>(profile)) | ||
|  | 	{ | ||
|  | 	case ASTCENC_PRF_LDR: | ||
|  | 	case ASTCENC_PRF_LDR_SRGB: | ||
|  | 		break; | ||
|  | 	case ASTCENC_PRF_HDR_RGB_LDR_A: | ||
|  | 	case ASTCENC_PRF_HDR: | ||
|  | 		config.tune_db_limit = 999.0f; | ||
|  | 		break; | ||
|  | 	default: | ||
|  | 		return ASTCENC_ERR_BAD_PROFILE; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Flags field must not contain any unknown flag bits
 | ||
|  | 	status = validate_flags(flags); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (flags & ASTCENC_FLG_MAP_NORMAL) | ||
|  | 	{ | ||
|  | 		// Normal map encoding uses L+A blocks, so allow one more partitioning
 | ||
|  | 		// than normal. We need need fewer bits for endpoints, so more likely
 | ||
|  | 		// to be able to use more partitions than an RGB/RGBA block
 | ||
|  | 		config.tune_partition_count_limit = astc::min(config.tune_partition_count_limit + 1u, 4u); | ||
|  | 
 | ||
|  | 		config.cw_g_weight = 0.0f; | ||
|  | 		config.cw_b_weight = 0.0f; | ||
|  | 		config.tune_2_partition_early_out_limit_factor *= 1.5f; | ||
|  | 		config.tune_3_partition_early_out_limit_factor *= 1.5f; | ||
|  | 		config.tune_2_plane_early_out_limit_correlation = 0.99f; | ||
|  | 
 | ||
|  | 		// Normals are prone to blocking artifacts on smooth curves
 | ||
|  | 		// so force compressor to try harder here ...
 | ||
|  | 		config.tune_db_limit *= 1.03f; | ||
|  | 	} | ||
|  | 	else if (flags & ASTCENC_FLG_MAP_RGBM) | ||
|  | 	{ | ||
|  | 		config.rgbm_m_scale = 5.0f; | ||
|  | 		config.cw_a_weight = 2.0f * config.rgbm_m_scale; | ||
|  | 	} | ||
|  | 	else // (This is color data)
 | ||
|  | 	{ | ||
|  | 		// This is a very basic perceptual metric for RGB color data, which weights error
 | ||
|  | 		// significance by the perceptual luminance contribution of each color channel. For
 | ||
|  | 		// luminance the usual weights to compute luminance from a linear RGB value are as
 | ||
|  | 		// follows:
 | ||
|  | 		//
 | ||
|  | 		//     l = r * 0.3 + g * 0.59 + b * 0.11
 | ||
|  | 		//
 | ||
|  | 		// ... but we scale these up to keep a better balance between color and alpha. Note
 | ||
|  | 		// that if the content is using alpha we'd recommend using the -a option to weight
 | ||
|  | 		// the color contribution by the alpha transparency.
 | ||
|  | 		if (flags & ASTCENC_FLG_USE_PERCEPTUAL) | ||
|  | 		{ | ||
|  | 			config.cw_r_weight = 0.30f * 2.25f; | ||
|  | 			config.cw_g_weight = 0.59f * 2.25f; | ||
|  | 			config.cw_b_weight = 0.11f * 2.25f; | ||
|  | 		} | ||
|  | 	} | ||
|  | 	config.flags = flags; | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_context_alloc( | ||
|  | 	const astcenc_config* configp, | ||
|  | 	unsigned int thread_count, | ||
|  | 	astcenc_context** context | ||
|  | ) { | ||
|  | 	astcenc_error status; | ||
|  | 	const astcenc_config& config = *configp; | ||
|  | 
 | ||
|  | 	status = validate_cpu_isa(); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_cpu_float(); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (thread_count == 0) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | 
 | ||
|  | #if defined(ASTCENC_DIAGNOSTICS)
 | ||
|  | 	// Force single threaded compressor use in diagnostic mode.
 | ||
|  | 	if (thread_count != 1) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 	astcenc_context* ctxo = new astcenc_context; | ||
|  | 	astcenc_contexti* ctx = &ctxo->context; | ||
|  | 	ctx->thread_count = thread_count; | ||
|  | 	ctx->config = config; | ||
|  | 	ctx->working_buffers = nullptr; | ||
|  | 
 | ||
|  | 	// These are allocated per-compress, as they depend on image size
 | ||
|  | 	ctx->input_alpha_averages = nullptr; | ||
|  | 
 | ||
|  | 	// Copy the config first and validate the copy (we may modify it)
 | ||
|  | 	status = validate_config(ctx->config); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		delete ctxo; | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	ctx->bsd = aligned_malloc<block_size_descriptor>(sizeof(block_size_descriptor), ASTCENC_VECALIGN); | ||
|  | 	bool can_omit_modes = static_cast<bool>(config.flags & ASTCENC_FLG_SELF_DECOMPRESS_ONLY); | ||
|  | 	init_block_size_descriptor(config.block_x, config.block_y, config.block_z, | ||
|  | 	                           can_omit_modes, | ||
|  | 	                           config.tune_partition_count_limit, | ||
|  | 	                           static_cast<float>(config.tune_block_mode_limit) / 100.0f, | ||
|  | 	                           *ctx->bsd); | ||
|  | 
 | ||
|  | #if !defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	// Do setup only needed by compression
 | ||
|  | 	if (!(status & ASTCENC_FLG_DECOMPRESS_ONLY)) | ||
|  | 	{ | ||
|  | 		// Turn a dB limit into a per-texel error for faster use later
 | ||
|  | 		if ((ctx->config.profile == ASTCENC_PRF_LDR) || (ctx->config.profile == ASTCENC_PRF_LDR_SRGB)) | ||
|  | 		{ | ||
|  | 			ctx->config.tune_db_limit = astc::pow(0.1f, ctx->config.tune_db_limit * 0.1f) * 65535.0f * 65535.0f; | ||
|  | 		} | ||
|  | 		else | ||
|  | 		{ | ||
|  | 			ctx->config.tune_db_limit = 0.0f; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		size_t worksize = sizeof(compression_working_buffers) * thread_count; | ||
|  | 		ctx->working_buffers = aligned_malloc<compression_working_buffers>(worksize, ASTCENC_VECALIGN); | ||
|  | 		static_assert((sizeof(compression_working_buffers) % ASTCENC_VECALIGN) == 0, | ||
|  | 		              "compression_working_buffers size must be multiple of vector alignment"); | ||
|  | 		if (!ctx->working_buffers) | ||
|  | 		{ | ||
|  | 			aligned_free<block_size_descriptor>(ctx->bsd); | ||
|  | 			delete ctxo; | ||
|  | 			*context = nullptr; | ||
|  | 			return ASTCENC_ERR_OUT_OF_MEM; | ||
|  | 		} | ||
|  | 	} | ||
|  | #endif
 | ||
|  | 
 | ||
|  | #if defined(ASTCENC_DIAGNOSTICS)
 | ||
|  | 	ctx->trace_log = new TraceLog(ctx->config.trace_file_path); | ||
|  | 	if (!ctx->trace_log->m_file) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_DTRACE_FAILURE; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	trace_add_data("block_x", config.block_x); | ||
|  | 	trace_add_data("block_y", config.block_y); | ||
|  | 	trace_add_data("block_z", config.block_z); | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 	*context = ctxo; | ||
|  | 
 | ||
|  | #if !defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	prepare_angular_tables(); | ||
|  | #endif
 | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /* See header dor documentation. */ | ||
|  | void astcenc_context_free( | ||
|  | 	astcenc_context* ctxo | ||
|  | ) { | ||
|  | 	if (ctxo) | ||
|  | 	{ | ||
|  | 		astcenc_contexti* ctx = &ctxo->context; | ||
|  | 		aligned_free<compression_working_buffers>(ctx->working_buffers); | ||
|  | 		aligned_free<block_size_descriptor>(ctx->bsd); | ||
|  | #if defined(ASTCENC_DIAGNOSTICS)
 | ||
|  | 		delete ctx->trace_log; | ||
|  | #endif
 | ||
|  | 		delete ctxo; | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | #if !defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Compress an image, after any preflight has completed. | ||
|  |  * | ||
|  |  * @param[out] ctxo           The compressor context. | ||
|  |  * @param      thread_index   The thread index. | ||
|  |  * @param      image          The intput image. | ||
|  |  * @param      swizzle        The input swizzle. | ||
|  |  * @param[out] buffer         The output array for the compressed data. | ||
|  |  */ | ||
|  | static void compress_image( | ||
|  | 	astcenc_context& ctxo, | ||
|  | 	unsigned int thread_index, | ||
|  | 	const astcenc_image& image, | ||
|  | 	const astcenc_swizzle& swizzle, | ||
|  | 	uint8_t* buffer | ||
|  | ) { | ||
|  | 	astcenc_contexti& ctx = ctxo.context; | ||
|  | 	const block_size_descriptor& bsd = *ctx.bsd; | ||
|  | 	astcenc_profile decode_mode = ctx.config.profile; | ||
|  | 
 | ||
|  | 	image_block blk; | ||
|  | 
 | ||
|  | 	int block_x = bsd.xdim; | ||
|  | 	int block_y = bsd.ydim; | ||
|  | 	int block_z = bsd.zdim; | ||
|  | 	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); | ||
|  | 
 | ||
|  | 	int dim_x = image.dim_x; | ||
|  | 	int dim_y = image.dim_y; | ||
|  | 	int dim_z = image.dim_z; | ||
|  | 
 | ||
|  | 	int xblocks = (dim_x + block_x - 1) / block_x; | ||
|  | 	int yblocks = (dim_y + block_y - 1) / block_y; | ||
|  | 	int zblocks = (dim_z + block_z - 1) / block_z; | ||
|  | 	int block_count = zblocks * yblocks * xblocks; | ||
|  | 
 | ||
|  | 	int row_blocks = xblocks; | ||
|  | 	int plane_blocks = xblocks * yblocks; | ||
|  | 
 | ||
|  | 	// Populate the block channel weights
 | ||
|  | 	blk.channel_weight = vfloat4(ctx.config.cw_r_weight, | ||
|  | 	                             ctx.config.cw_g_weight, | ||
|  | 	                             ctx.config.cw_b_weight, | ||
|  | 	                             ctx.config.cw_a_weight); | ||
|  | 
 | ||
|  | 	// Use preallocated scratch buffer
 | ||
|  | 	auto& temp_buffers = ctx.working_buffers[thread_index]; | ||
|  | 
 | ||
|  | 	// Only the first thread actually runs the initializer
 | ||
|  | 	ctxo.manage_compress.init(block_count); | ||
|  | 
 | ||
|  | 	// Determine if we can use an optimized load function
 | ||
|  | 	bool needs_swz = (swizzle.r != ASTCENC_SWZ_R) || (swizzle.g != ASTCENC_SWZ_G) || | ||
|  | 	                 (swizzle.b != ASTCENC_SWZ_B) || (swizzle.a != ASTCENC_SWZ_A); | ||
|  | 
 | ||
|  | 	bool needs_hdr = (decode_mode == ASTCENC_PRF_HDR) || | ||
|  | 	                 (decode_mode == ASTCENC_PRF_HDR_RGB_LDR_A); | ||
|  | 
 | ||
|  | 	bool use_fast_load = !needs_swz && !needs_hdr && | ||
|  | 	                     block_z == 1 && image.data_type == ASTCENC_TYPE_U8; | ||
|  | 
 | ||
|  | 	auto load_func = load_image_block; | ||
|  | 	if (use_fast_load) | ||
|  | 	{ | ||
|  | 		load_func = load_image_block_fast_ldr; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// All threads run this processing loop until there is no work remaining
 | ||
|  | 	while (true) | ||
|  | 	{ | ||
|  | 		unsigned int count; | ||
|  | 		unsigned int base = ctxo.manage_compress.get_task_assignment(16, count); | ||
|  | 		if (!count) | ||
|  | 		{ | ||
|  | 			break; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		for (unsigned int i = base; i < base + count; i++) | ||
|  | 		{ | ||
|  | 			// Decode i into x, y, z block indices
 | ||
|  | 			int z = i / plane_blocks; | ||
|  | 			unsigned int rem = i - (z * plane_blocks); | ||
|  | 			int y = rem / row_blocks; | ||
|  | 			int x = rem - (y * row_blocks); | ||
|  | 
 | ||
|  | 			// Test if we can apply some basic alpha-scale RDO
 | ||
|  | 			bool use_full_block = true; | ||
|  | 			if (ctx.config.a_scale_radius != 0 && block_z == 1) | ||
|  | 			{ | ||
|  | 				int start_x = x * block_x; | ||
|  | 				int end_x = astc::min(dim_x, start_x + block_x); | ||
|  | 
 | ||
|  | 				int start_y = y * block_y; | ||
|  | 				int end_y = astc::min(dim_y, start_y + block_y); | ||
|  | 
 | ||
|  | 				// SATs accumulate error, so don't test exactly zero. Test for
 | ||
|  | 				// less than 1 alpha in the expanded block footprint that
 | ||
|  | 				// includes the alpha radius.
 | ||
|  | 				int x_footprint = block_x + 2 * (ctx.config.a_scale_radius - 1); | ||
|  | 
 | ||
|  | 				int y_footprint = block_y + 2 * (ctx.config.a_scale_radius - 1); | ||
|  | 
 | ||
|  | 				float footprint = static_cast<float>(x_footprint * y_footprint); | ||
|  | 				float threshold = 0.9f / (255.0f * footprint); | ||
|  | 
 | ||
|  | 				// Do we have any alpha values?
 | ||
|  | 				use_full_block = false; | ||
|  | 				for (int ay = start_y; ay < end_y; ay++) | ||
|  | 				{ | ||
|  | 					for (int ax = start_x; ax < end_x; ax++) | ||
|  | 					{ | ||
|  | 						float a_avg = ctx.input_alpha_averages[ay * dim_x + ax]; | ||
|  | 						if (a_avg > threshold) | ||
|  | 						{ | ||
|  | 							use_full_block = true; | ||
|  | 							ax = end_x; | ||
|  | 							ay = end_y; | ||
|  | 						} | ||
|  | 					} | ||
|  | 				} | ||
|  | 			} | ||
|  | 
 | ||
|  | 			// Fetch the full block for compression
 | ||
|  | 			if (use_full_block) | ||
|  | 			{ | ||
|  | 				load_func(decode_mode, image, blk, bsd, x * block_x, y * block_y, z * block_z, swizzle); | ||
|  | 
 | ||
|  | 				// Scale RGB error contribution by the maximum alpha in the block
 | ||
|  | 				// This encourages preserving alpha accuracy in regions with high
 | ||
|  | 				// transparency, and can buy up to 0.5 dB PSNR.
 | ||
|  | 				if (ctx.config.flags & ASTCENC_FLG_USE_ALPHA_WEIGHT) | ||
|  | 				{ | ||
|  | 					float alpha_scale = blk.data_max.lane<3>() * (1.0f / 65535.0f); | ||
|  | 					blk.channel_weight = vfloat4(ctx.config.cw_r_weight * alpha_scale, | ||
|  | 					                             ctx.config.cw_g_weight * alpha_scale, | ||
|  | 					                             ctx.config.cw_b_weight * alpha_scale, | ||
|  | 					                             ctx.config.cw_a_weight); | ||
|  | 				} | ||
|  | 			} | ||
|  | 			// Apply alpha scale RDO - substitute constant color block
 | ||
|  | 			else | ||
|  | 			{ | ||
|  | 				blk.origin_texel = vfloat4::zero(); | ||
|  | 				blk.data_min = vfloat4::zero(); | ||
|  | 				blk.data_mean = vfloat4::zero(); | ||
|  | 				blk.data_max = vfloat4::zero(); | ||
|  | 				blk.grayscale = true; | ||
|  | 			} | ||
|  | 
 | ||
|  | 			int offset = ((z * yblocks + y) * xblocks + x) * 16; | ||
|  | 			uint8_t *bp = buffer + offset; | ||
|  | 			physical_compressed_block* pcb = reinterpret_cast<physical_compressed_block*>(bp); | ||
|  | 			compress_block(ctx, blk, *pcb, temp_buffers); | ||
|  | 		} | ||
|  | 
 | ||
|  | 		ctxo.manage_compress.complete_task_assignment(count); | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Compute regional averages in an image. | ||
|  |  * | ||
|  |  * This function can be called by multiple threads, but only after a single | ||
|  |  * thread calls the setup function @c init_compute_averages(). | ||
|  |  * | ||
|  |  * Results are written back into @c img->input_alpha_averages. | ||
|  |  * | ||
|  |  * @param[out] ctx   The context. | ||
|  |  * @param      ag    The average and variance arguments created during setup. | ||
|  |  */ | ||
|  | static void compute_averages( | ||
|  | 	astcenc_context& ctx, | ||
|  | 	const avg_args &ag | ||
|  | ) { | ||
|  | 	pixel_region_args arg = ag.arg; | ||
|  | 	arg.work_memory = new vfloat4[ag.work_memory_size]; | ||
|  | 
 | ||
|  | 	int size_x = ag.img_size_x; | ||
|  | 	int size_y = ag.img_size_y; | ||
|  | 	int size_z = ag.img_size_z; | ||
|  | 
 | ||
|  | 	int step_xy = ag.blk_size_xy; | ||
|  | 	int step_z = ag.blk_size_z; | ||
|  | 
 | ||
|  | 	int y_tasks = (size_y + step_xy - 1) / step_xy; | ||
|  | 
 | ||
|  | 	// All threads run this processing loop until there is no work remaining
 | ||
|  | 	while (true) | ||
|  | 	{ | ||
|  | 		unsigned int count; | ||
|  | 		unsigned int base = ctx.manage_avg.get_task_assignment(16, count); | ||
|  | 		if (!count) | ||
|  | 		{ | ||
|  | 			break; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		for (unsigned int i = base; i < base + count; i++) | ||
|  | 		{ | ||
|  | 			int z = (i / (y_tasks)) * step_z; | ||
|  | 			int y = (i - (z * y_tasks)) * step_xy; | ||
|  | 
 | ||
|  | 			arg.size_z = astc::min(step_z, size_z - z); | ||
|  | 			arg.offset_z = z; | ||
|  | 
 | ||
|  | 			arg.size_y = astc::min(step_xy, size_y - y); | ||
|  | 			arg.offset_y = y; | ||
|  | 
 | ||
|  | 			for (int x = 0; x < size_x; x += step_xy) | ||
|  | 			{ | ||
|  | 				arg.size_x = astc::min(step_xy, size_x - x); | ||
|  | 				arg.offset_x = x; | ||
|  | 				compute_pixel_region_variance(ctx.context, arg); | ||
|  | 			} | ||
|  | 		} | ||
|  | 
 | ||
|  | 		ctx.manage_avg.complete_task_assignment(count); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	delete[] arg.work_memory; | ||
|  | } | ||
|  | 
 | ||
|  | #endif
 | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_compress_image( | ||
|  | 	astcenc_context* ctxo, | ||
|  | 	astcenc_image* imagep, | ||
|  | 	const astcenc_swizzle* swizzle, | ||
|  | 	uint8_t* data_out, | ||
|  | 	size_t data_len, | ||
|  | 	unsigned int thread_index | ||
|  | ) { | ||
|  | #if defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	(void)ctxo; | ||
|  | 	(void)imagep; | ||
|  | 	(void)swizzle; | ||
|  | 	(void)data_out; | ||
|  | 	(void)data_len; | ||
|  | 	(void)thread_index; | ||
|  | 	return ASTCENC_ERR_BAD_CONTEXT; | ||
|  | #else
 | ||
|  | 	astcenc_contexti* ctx = &ctxo->context; | ||
|  | 	astcenc_error status; | ||
|  | 	astcenc_image& image = *imagep; | ||
|  | 
 | ||
|  | 	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_CONTEXT; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_compression_swizzle(*swizzle); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (thread_index >= ctx->thread_count) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	unsigned int block_x = ctx->config.block_x; | ||
|  | 	unsigned int block_y = ctx->config.block_y; | ||
|  | 	unsigned int block_z = ctx->config.block_z; | ||
|  | 
 | ||
|  | 	unsigned int xblocks = (image.dim_x + block_x - 1) / block_x; | ||
|  | 	unsigned int yblocks = (image.dim_y + block_y - 1) / block_y; | ||
|  | 	unsigned int zblocks = (image.dim_z + block_z - 1) / block_z; | ||
|  | 
 | ||
|  | 	// Check we have enough output space (16 bytes per block)
 | ||
|  | 	size_t size_needed = xblocks * yblocks * zblocks * 16; | ||
|  | 	if (data_len < size_needed) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_OUT_OF_MEM; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// If context thread count is one then implicitly reset
 | ||
|  | 	if (ctx->thread_count == 1) | ||
|  | 	{ | ||
|  | 		astcenc_compress_reset(ctxo); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (ctx->config.a_scale_radius != 0) | ||
|  | 	{ | ||
|  | 		// First thread to enter will do setup, other threads will subsequently
 | ||
|  | 		// enter the critical section but simply skip over the initialization
 | ||
|  | 		auto init_avg = [ctx, &image, swizzle]() { | ||
|  | 			// Perform memory allocations for the destination buffers
 | ||
|  | 			size_t texel_count = image.dim_x * image.dim_y * image.dim_z; | ||
|  | 			ctx->input_alpha_averages = new float[texel_count]; | ||
|  | 
 | ||
|  | 			return init_compute_averages( | ||
|  | 				image, ctx->config.a_scale_radius, *swizzle, | ||
|  | 				ctx->avg_preprocess_args); | ||
|  | 		}; | ||
|  | 
 | ||
|  | 		// Only the first thread actually runs the initializer
 | ||
|  | 		ctxo->manage_avg.init(init_avg); | ||
|  | 
 | ||
|  | 		// All threads will enter this function and dynamically grab work
 | ||
|  | 		compute_averages(*ctxo, ctx->avg_preprocess_args); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Wait for compute_averages to complete before compressing
 | ||
|  | 	ctxo->manage_avg.wait(); | ||
|  | 
 | ||
|  | 	compress_image(*ctxo, thread_index, image, *swizzle, data_out); | ||
|  | 
 | ||
|  | 	// Wait for compress to complete before freeing memory
 | ||
|  | 	ctxo->manage_compress.wait(); | ||
|  | 
 | ||
|  | 	auto term_compress = [ctx]() { | ||
|  | 		delete[] ctx->input_alpha_averages; | ||
|  | 		ctx->input_alpha_averages = nullptr; | ||
|  | 	}; | ||
|  | 
 | ||
|  | 	// Only the first thread to arrive actually runs the term
 | ||
|  | 	ctxo->manage_compress.term(term_compress); | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_compress_reset( | ||
|  | 	astcenc_context* ctxo | ||
|  | ) { | ||
|  | #if defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	(void)ctxo; | ||
|  | 	return ASTCENC_ERR_BAD_CONTEXT; | ||
|  | #else
 | ||
|  | 	astcenc_contexti* ctx = &ctxo->context; | ||
|  | 	if (ctx->config.flags & ASTCENC_FLG_DECOMPRESS_ONLY) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_CONTEXT; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	ctxo->manage_avg.reset(); | ||
|  | 	ctxo->manage_compress.reset(); | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_decompress_image( | ||
|  | 	astcenc_context* ctxo, | ||
|  | 	const uint8_t* data, | ||
|  | 	size_t data_len, | ||
|  | 	astcenc_image* image_outp, | ||
|  | 	const astcenc_swizzle* swizzle, | ||
|  | 	unsigned int thread_index | ||
|  | ) { | ||
|  | 	astcenc_error status; | ||
|  | 	astcenc_image& image_out = *image_outp; | ||
|  | 	astcenc_contexti* ctx = &ctxo->context; | ||
|  | 
 | ||
|  | 	// Today this doesn't matter (working set on stack) but might in future ...
 | ||
|  | 	if (thread_index >= ctx->thread_count) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_BAD_PARAM; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	status = validate_decompression_swizzle(*swizzle); | ||
|  | 	if (status != ASTCENC_SUCCESS) | ||
|  | 	{ | ||
|  | 		return status; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	unsigned int block_x = ctx->config.block_x; | ||
|  | 	unsigned int block_y = ctx->config.block_y; | ||
|  | 	unsigned int block_z = ctx->config.block_z; | ||
|  | 
 | ||
|  | 	unsigned int xblocks = (image_out.dim_x + block_x - 1) / block_x; | ||
|  | 	unsigned int yblocks = (image_out.dim_y + block_y - 1) / block_y; | ||
|  | 	unsigned int zblocks = (image_out.dim_z + block_z - 1) / block_z; | ||
|  | 
 | ||
|  | 	int row_blocks = xblocks; | ||
|  | 	int plane_blocks = xblocks * yblocks; | ||
|  | 
 | ||
|  | 	// Check we have enough output space (16 bytes per block)
 | ||
|  | 	size_t size_needed = xblocks * yblocks * zblocks * 16; | ||
|  | 	if (data_len < size_needed) | ||
|  | 	{ | ||
|  | 		return ASTCENC_ERR_OUT_OF_MEM; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	image_block blk; | ||
|  | 	blk.texel_count = static_cast<uint8_t>(block_x * block_y * block_z); | ||
|  | 
 | ||
|  | 	// If context thread count is one then implicitly reset
 | ||
|  | 	if (ctx->thread_count == 1) | ||
|  | 	{ | ||
|  | 		astcenc_decompress_reset(ctxo); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Only the first thread actually runs the initializer
 | ||
|  | 	ctxo->manage_decompress.init(zblocks * yblocks * xblocks); | ||
|  | 
 | ||
|  | 	// All threads run this processing loop until there is no work remaining
 | ||
|  | 	while (true) | ||
|  | 	{ | ||
|  | 		unsigned int count; | ||
|  | 		unsigned int base = ctxo->manage_decompress.get_task_assignment(128, count); | ||
|  | 		if (!count) | ||
|  | 		{ | ||
|  | 			break; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		for (unsigned int i = base; i < base + count; i++) | ||
|  | 		{ | ||
|  | 			// Decode i into x, y, z block indices
 | ||
|  | 			int z = i / plane_blocks; | ||
|  | 			unsigned int rem = i - (z * plane_blocks); | ||
|  | 			int y = rem / row_blocks; | ||
|  | 			int x = rem - (y * row_blocks); | ||
|  | 
 | ||
|  | 			unsigned int offset = (((z * yblocks + y) * xblocks) + x) * 16; | ||
|  | 			const uint8_t* bp = data + offset; | ||
|  | 
 | ||
|  | 			const physical_compressed_block& pcb = *reinterpret_cast<const physical_compressed_block*>(bp); | ||
|  | 			symbolic_compressed_block scb; | ||
|  | 
 | ||
|  | 			physical_to_symbolic(*ctx->bsd, pcb, scb); | ||
|  | 
 | ||
|  | 			decompress_symbolic_block(ctx->config.profile, *ctx->bsd, | ||
|  | 			                          x * block_x, y * block_y, z * block_z, | ||
|  | 			                          scb, blk); | ||
|  | 
 | ||
|  | 			store_image_block(image_out, blk, *ctx->bsd, | ||
|  | 			                  x * block_x, y * block_y, z * block_z, *swizzle); | ||
|  | 		} | ||
|  | 
 | ||
|  | 		ctxo->manage_decompress.complete_task_assignment(count); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_decompress_reset( | ||
|  | 	astcenc_context* ctxo | ||
|  | ) { | ||
|  | 	ctxo->manage_decompress.reset(); | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | astcenc_error astcenc_get_block_info( | ||
|  | 	astcenc_context* ctxo, | ||
|  | 	const uint8_t data[16], | ||
|  | 	astcenc_block_info* info | ||
|  | ) { | ||
|  | #if defined(ASTCENC_DECOMPRESS_ONLY)
 | ||
|  | 	(void)ctxo; | ||
|  | 	(void)data; | ||
|  | 	(void)info; | ||
|  | 	return ASTCENC_ERR_BAD_CONTEXT; | ||
|  | #else
 | ||
|  | 	astcenc_contexti* ctx = &ctxo->context; | ||
|  | 
 | ||
|  | 	// Decode the compressed data into a symbolic form
 | ||
|  | 	const physical_compressed_block&pcb = *reinterpret_cast<const physical_compressed_block*>(data); | ||
|  | 	symbolic_compressed_block scb; | ||
|  | 	physical_to_symbolic(*ctx->bsd, pcb, scb); | ||
|  | 
 | ||
|  | 	// Fetch the appropriate partition and decimation tables
 | ||
|  | 	block_size_descriptor& bsd = *ctx->bsd; | ||
|  | 
 | ||
|  | 	// Start from a clean slate
 | ||
|  | 	memset(info, 0, sizeof(*info)); | ||
|  | 
 | ||
|  | 	// Basic info we can always populate
 | ||
|  | 	info->profile = ctx->config.profile; | ||
|  | 
 | ||
|  | 	info->block_x = ctx->config.block_x; | ||
|  | 	info->block_y = ctx->config.block_y; | ||
|  | 	info->block_z = ctx->config.block_z; | ||
|  | 	info->texel_count = bsd.texel_count; | ||
|  | 
 | ||
|  | 	// Check for error blocks first
 | ||
|  | 	info->is_error_block = scb.block_type == SYM_BTYPE_ERROR; | ||
|  | 	if (info->is_error_block) | ||
|  | 	{ | ||
|  | 		return ASTCENC_SUCCESS; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Check for constant color blocks second
 | ||
|  | 	info->is_constant_block = scb.block_type == SYM_BTYPE_CONST_F16 || | ||
|  | 	                          scb.block_type == SYM_BTYPE_CONST_U16; | ||
|  | 	if (info->is_constant_block) | ||
|  | 	{ | ||
|  | 		return ASTCENC_SUCCESS; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Otherwise handle a full block ; known to be valid after conditions above have been checked
 | ||
|  | 	int partition_count = scb.partition_count; | ||
|  | 	const auto& pi = bsd.get_partition_info(partition_count, scb.partition_index); | ||
|  | 
 | ||
|  | 	const block_mode& bm = bsd.get_block_mode(scb.block_mode); | ||
|  | 	const decimation_info& di = bsd.get_decimation_info(bm.decimation_mode); | ||
|  | 
 | ||
|  | 	info->weight_x = di.weight_x; | ||
|  | 	info->weight_y = di.weight_y; | ||
|  | 	info->weight_z = di.weight_z; | ||
|  | 
 | ||
|  | 	info->is_dual_plane_block = bm.is_dual_plane != 0; | ||
|  | 
 | ||
|  | 	info->partition_count = scb.partition_count; | ||
|  | 	info->partition_index = scb.partition_index; | ||
|  | 	info->dual_plane_component = scb.plane2_component; | ||
|  | 
 | ||
|  | 	info->color_level_count = get_quant_level(scb.get_color_quant_mode()); | ||
|  | 	info->weight_level_count = get_quant_level(bm.get_weight_quant_mode()); | ||
|  | 
 | ||
|  | 	// Unpack color endpoints for each active partition
 | ||
|  | 	for (unsigned int i = 0; i < scb.partition_count; i++) | ||
|  | 	{ | ||
|  | 		bool rgb_hdr; | ||
|  | 		bool a_hdr; | ||
|  | 		vint4 endpnt[2]; | ||
|  | 
 | ||
|  | 		unpack_color_endpoints(ctx->config.profile, | ||
|  | 		                       scb.color_formats[i], | ||
|  | 		                       scb.color_values[i], | ||
|  | 		                       rgb_hdr, a_hdr, | ||
|  | 		                       endpnt[0], endpnt[1]); | ||
|  | 
 | ||
|  | 		// Store the color endpoint mode info
 | ||
|  | 		info->color_endpoint_modes[i] = scb.color_formats[i]; | ||
|  | 		info->is_hdr_block = info->is_hdr_block || rgb_hdr || a_hdr; | ||
|  | 
 | ||
|  | 		// Store the unpacked and decoded color endpoint
 | ||
|  | 		vmask4 hdr_mask(rgb_hdr, rgb_hdr, rgb_hdr, a_hdr); | ||
|  | 		for (int j = 0; j < 2; j++) | ||
|  | 		{ | ||
|  | 			vint4 color_lns = lns_to_sf16(endpnt[j]); | ||
|  | 			vint4 color_unorm = unorm16_to_sf16(endpnt[j]); | ||
|  | 			vint4 datai = select(color_unorm, color_lns, hdr_mask); | ||
|  | 			store(float16_to_float(datai), info->color_endpoints[i][j]); | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Unpack weights for each texel
 | ||
|  | 	int weight_plane1[BLOCK_MAX_TEXELS]; | ||
|  | 	int weight_plane2[BLOCK_MAX_TEXELS]; | ||
|  | 
 | ||
|  | 	unpack_weights(bsd, scb, di, bm.is_dual_plane, weight_plane1, weight_plane2); | ||
|  | 	for (unsigned int i = 0; i < bsd.texel_count; i++) | ||
|  | 	{ | ||
|  | 		info->weight_values_plane1[i] = static_cast<float>(weight_plane1[i]) * (1.0f / WEIGHTS_TEXEL_SUM); | ||
|  | 		if (info->is_dual_plane_block) | ||
|  | 		{ | ||
|  | 			info->weight_values_plane2[i] = static_cast<float>(weight_plane2[i]) * (1.0f / WEIGHTS_TEXEL_SUM); | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Unpack partition assignments for each texel
 | ||
|  | 	for (unsigned int i = 0; i < bsd.texel_count; i++) | ||
|  | 	{ | ||
|  | 		info->partition_assignment[i] = pi.partition_of_texel[i]; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return ASTCENC_SUCCESS; | ||
|  | #endif
 | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | const char* astcenc_get_error_string( | ||
|  | 	astcenc_error status | ||
|  | ) { | ||
|  | 	// Values in this enum are from an external user, so not guaranteed to be
 | ||
|  | 	// bounded to the enum values
 | ||
|  | 	switch (static_cast<int>(status)) | ||
|  | 	{ | ||
|  | 	case ASTCENC_SUCCESS: | ||
|  | 		return "ASTCENC_SUCCESS"; | ||
|  | 	case ASTCENC_ERR_OUT_OF_MEM: | ||
|  | 		return "ASTCENC_ERR_OUT_OF_MEM"; | ||
|  | 	case ASTCENC_ERR_BAD_CPU_FLOAT: | ||
|  | 		return "ASTCENC_ERR_BAD_CPU_FLOAT"; | ||
|  | 	case ASTCENC_ERR_BAD_CPU_ISA: | ||
|  | 		return "ASTCENC_ERR_BAD_CPU_ISA"; | ||
|  | 	case ASTCENC_ERR_BAD_PARAM: | ||
|  | 		return "ASTCENC_ERR_BAD_PARAM"; | ||
|  | 	case ASTCENC_ERR_BAD_BLOCK_SIZE: | ||
|  | 		return "ASTCENC_ERR_BAD_BLOCK_SIZE"; | ||
|  | 	case ASTCENC_ERR_BAD_PROFILE: | ||
|  | 		return "ASTCENC_ERR_BAD_PROFILE"; | ||
|  | 	case ASTCENC_ERR_BAD_QUALITY: | ||
|  | 		return "ASTCENC_ERR_BAD_QUALITY"; | ||
|  | 	case ASTCENC_ERR_BAD_FLAGS: | ||
|  | 		return "ASTCENC_ERR_BAD_FLAGS"; | ||
|  | 	case ASTCENC_ERR_BAD_SWIZZLE: | ||
|  | 		return "ASTCENC_ERR_BAD_SWIZZLE"; | ||
|  | 	case ASTCENC_ERR_BAD_CONTEXT: | ||
|  | 		return "ASTCENC_ERR_BAD_CONTEXT"; | ||
|  | 	case ASTCENC_ERR_NOT_IMPLEMENTED: | ||
|  | 		return "ASTCENC_ERR_NOT_IMPLEMENTED"; | ||
|  | #if defined(ASTCENC_DIAGNOSTICS)
 | ||
|  | 	case ASTCENC_ERR_DTRACE_FAILURE: | ||
|  | 		return "ASTCENC_ERR_DTRACE_FAILURE"; | ||
|  | #endif
 | ||
|  | 	default: | ||
|  | 		return nullptr; | ||
|  | 	} | ||
|  | } |