mirror of
				https://github.com/godotengine/godot.git
				synced 2025-10-31 21:51:22 +00:00 
			
		
		
		
	
		
			
	
	
		
			482 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
		
		
			
		
	
	
			482 lines
		
	
	
	
		
			13 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
|   | // SPDX-License-Identifier: Apache-2.0
 | ||
|  | // ----------------------------------------------------------------------------
 | ||
|  | // Copyright 2011-2023 Arm Limited
 | ||
|  | //
 | ||
|  | // Licensed under the Apache License, Version 2.0 (the "License"); you may not
 | ||
|  | // use this file except in compliance with the License. You may obtain a copy
 | ||
|  | // of the License at:
 | ||
|  | //
 | ||
|  | //     http://www.apache.org/licenses/LICENSE-2.0
 | ||
|  | //
 | ||
|  | // Unless required by applicable law or agreed to in writing, software
 | ||
|  | // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 | ||
|  | // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 | ||
|  | // License for the specific language governing permissions and limitations
 | ||
|  | // under the License.
 | ||
|  | // ----------------------------------------------------------------------------
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Functions for generating partition tables on demand. | ||
|  |  */ | ||
|  | 
 | ||
|  | #include "astcenc_internal.h"
 | ||
|  | 
 | ||
|  | /** @brief The number of 64-bit words needed to represent a canonical partition bit pattern. */ | ||
|  | #define BIT_PATTERN_WORDS (((ASTCENC_BLOCK_MAX_TEXELS * 2) + 63) / 64)
 | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Generate a canonical representation of a partition pattern. | ||
|  |  * | ||
|  |  * The returned value stores two bits per texel, for up to 6x6x6 texels, where the two bits store | ||
|  |  * the remapped texel index. Remapping ensures that we only match on the partition pattern, | ||
|  |  * independent of the partition order generated by the hash. | ||
|  |  * | ||
|  |  * @param      texel_count          The number of texels in the block. | ||
|  |  * @param      partition_of_texel   The partition assignments, in hash order. | ||
|  |  * @param[out] bit_pattern          The output bit pattern representation. | ||
|  |  */ | ||
|  | static void generate_canonical_partitioning( | ||
|  | 	unsigned int texel_count, | ||
|  | 	const uint8_t* partition_of_texel, | ||
|  | 	uint64_t bit_pattern[BIT_PATTERN_WORDS] | ||
|  | ) { | ||
|  | 	// Clear the pattern
 | ||
|  | 	for (unsigned int i = 0; i < BIT_PATTERN_WORDS; i++) | ||
|  | 	{ | ||
|  | 		bit_pattern[i] = 0; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Store a mapping to reorder the raw partitions so that the partitions are ordered such
 | ||
|  | 	// that the lowest texel index in partition N is smaller than the lowest texel index in
 | ||
|  | 	// partition N + 1.
 | ||
|  | 	int mapped_index[BLOCK_MAX_PARTITIONS]; | ||
|  | 	int map_weight_count = 0; | ||
|  | 
 | ||
|  | 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) | ||
|  | 	{ | ||
|  | 		mapped_index[i] = -1; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for (unsigned int i = 0; i < texel_count; i++) | ||
|  | 	{ | ||
|  | 		int index = partition_of_texel[i]; | ||
|  | 		if (mapped_index[index] < 0) | ||
|  | 		{ | ||
|  | 			mapped_index[index] = map_weight_count++; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		uint64_t xlat_index = mapped_index[index]; | ||
|  | 		bit_pattern[i >> 5] |= xlat_index << (2 * (i & 0x1F)); | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Compare two canonical patterns to see if they are the same. | ||
|  |  * | ||
|  |  * @param part1   The first canonical bit pattern to check. | ||
|  |  * @param part2   The second canonical bit pattern to check. | ||
|  |  * | ||
|  |  * @return @c true if the patterns are the same, @c false otherwise. | ||
|  |  */ | ||
|  | static bool compare_canonical_partitionings( | ||
|  | 	const uint64_t part1[BIT_PATTERN_WORDS], | ||
|  | 	const uint64_t part2[BIT_PATTERN_WORDS] | ||
|  | ) { | ||
|  | 	return (part1[0] == part2[0]) | ||
|  | #if BIT_PATTERN_WORDS > 1
 | ||
|  | 	    && (part1[1] == part2[1]) | ||
|  | #endif
 | ||
|  | #if BIT_PATTERN_WORDS > 2
 | ||
|  | 	    && (part1[2] == part2[2]) | ||
|  | #endif
 | ||
|  | #if BIT_PATTERN_WORDS > 3
 | ||
|  | 	    && (part1[3] == part2[3]) | ||
|  | #endif
 | ||
|  | #if BIT_PATTERN_WORDS > 4
 | ||
|  | 	    && (part1[4] == part2[4]) | ||
|  | #endif
 | ||
|  | #if BIT_PATTERN_WORDS > 5
 | ||
|  | 	    && (part1[5] == part2[5]) | ||
|  | #endif
 | ||
|  | #if BIT_PATTERN_WORDS > 6
 | ||
|  | 	    && (part1[6] == part2[6]) | ||
|  | #endif
 | ||
|  | 	    ; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Hash function used for procedural partition assignment. | ||
|  |  * | ||
|  |  * @param inp   The hash seed. | ||
|  |  * | ||
|  |  * @return The hashed value. | ||
|  |  */ | ||
|  | static uint32_t hash52( | ||
|  | 	uint32_t inp | ||
|  | ) { | ||
|  | 	inp ^= inp >> 15; | ||
|  | 
 | ||
|  | 	// (2^4 + 1) * (2^7 + 1) * (2^17 - 1)
 | ||
|  | 	inp *= 0xEEDE0891; | ||
|  | 	inp ^= inp >> 5; | ||
|  | 	inp += inp << 16; | ||
|  | 	inp ^= inp >> 7; | ||
|  | 	inp ^= inp >> 3; | ||
|  | 	inp ^= inp << 6; | ||
|  | 	inp ^= inp >> 17; | ||
|  | 	return inp; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Select texel assignment for a single coordinate. | ||
|  |  * | ||
|  |  * @param seed              The seed - the partition index from the block. | ||
|  |  * @param x                 The texel X coordinate in the block. | ||
|  |  * @param y                 The texel Y coordinate in the block. | ||
|  |  * @param z                 The texel Z coordinate in the block. | ||
|  |  * @param partition_count   The total partition count of this encoding. | ||
|  |  * @param small_block       @c true if the block has fewer than 32 texels. | ||
|  |  * | ||
|  |  * @return The assigned partition index for this texel. | ||
|  |  */ | ||
|  | static uint8_t select_partition( | ||
|  | 	int seed, | ||
|  | 	int x, | ||
|  | 	int y, | ||
|  | 	int z, | ||
|  | 	int partition_count, | ||
|  | 	bool small_block | ||
|  | ) { | ||
|  | 	// For small blocks bias the coordinates to get better distribution
 | ||
|  | 	if (small_block) | ||
|  | 	{ | ||
|  | 		x <<= 1; | ||
|  | 		y <<= 1; | ||
|  | 		z <<= 1; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	seed += (partition_count - 1) * 1024; | ||
|  | 
 | ||
|  | 	uint32_t rnum = hash52(seed); | ||
|  | 
 | ||
|  | 	uint8_t seed1 = rnum & 0xF; | ||
|  | 	uint8_t seed2 = (rnum >> 4) & 0xF; | ||
|  | 	uint8_t seed3 = (rnum >> 8) & 0xF; | ||
|  | 	uint8_t seed4 = (rnum >> 12) & 0xF; | ||
|  | 	uint8_t seed5 = (rnum >> 16) & 0xF; | ||
|  | 	uint8_t seed6 = (rnum >> 20) & 0xF; | ||
|  | 	uint8_t seed7 = (rnum >> 24) & 0xF; | ||
|  | 	uint8_t seed8 = (rnum >> 28) & 0xF; | ||
|  | 	uint8_t seed9 = (rnum >> 18) & 0xF; | ||
|  | 	uint8_t seed10 = (rnum >> 22) & 0xF; | ||
|  | 	uint8_t seed11 = (rnum >> 26) & 0xF; | ||
|  | 	uint8_t seed12 = ((rnum >> 30) | (rnum << 2)) & 0xF; | ||
|  | 
 | ||
|  | 	// Squaring all the seeds in order to bias their distribution towards lower values.
 | ||
|  | 	seed1 *= seed1; | ||
|  | 	seed2 *= seed2; | ||
|  | 	seed3 *= seed3; | ||
|  | 	seed4 *= seed4; | ||
|  | 	seed5 *= seed5; | ||
|  | 	seed6 *= seed6; | ||
|  | 	seed7 *= seed7; | ||
|  | 	seed8 *= seed8; | ||
|  | 	seed9 *= seed9; | ||
|  | 	seed10 *= seed10; | ||
|  | 	seed11 *= seed11; | ||
|  | 	seed12 *= seed12; | ||
|  | 
 | ||
|  | 	int sh1, sh2; | ||
|  | 	if (seed & 1) | ||
|  | 	{ | ||
|  | 		sh1 = (seed & 2 ? 4 : 5); | ||
|  | 		sh2 = (partition_count == 3 ? 6 : 5); | ||
|  | 	} | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		sh1 = (partition_count == 3 ? 6 : 5); | ||
|  | 		sh2 = (seed & 2 ? 4 : 5); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	int sh3 = (seed & 0x10) ? sh1 : sh2; | ||
|  | 
 | ||
|  | 	seed1 >>= sh1; | ||
|  | 	seed2 >>= sh2; | ||
|  | 	seed3 >>= sh1; | ||
|  | 	seed4 >>= sh2; | ||
|  | 	seed5 >>= sh1; | ||
|  | 	seed6 >>= sh2; | ||
|  | 	seed7 >>= sh1; | ||
|  | 	seed8 >>= sh2; | ||
|  | 
 | ||
|  | 	seed9 >>= sh3; | ||
|  | 	seed10 >>= sh3; | ||
|  | 	seed11 >>= sh3; | ||
|  | 	seed12 >>= sh3; | ||
|  | 
 | ||
|  | 	int a = seed1 * x + seed2 * y + seed11 * z + (rnum >> 14); | ||
|  | 	int b = seed3 * x + seed4 * y + seed12 * z + (rnum >> 10); | ||
|  | 	int c = seed5 * x + seed6 * y + seed9 * z + (rnum >> 6); | ||
|  | 	int d = seed7 * x + seed8 * y + seed10 * z + (rnum >> 2); | ||
|  | 
 | ||
|  | 	// Apply the saw
 | ||
|  | 	a &= 0x3F; | ||
|  | 	b &= 0x3F; | ||
|  | 	c &= 0x3F; | ||
|  | 	d &= 0x3F; | ||
|  | 
 | ||
|  | 	// Remove some of the components if we are to output < 4 partitions.
 | ||
|  | 	if (partition_count <= 3) | ||
|  | 	{ | ||
|  | 		d = 0; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (partition_count <= 2) | ||
|  | 	{ | ||
|  | 		c = 0; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	if (partition_count <= 1) | ||
|  | 	{ | ||
|  | 		b = 0; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	uint8_t partition; | ||
|  | 	if (a >= b && a >= c && a >= d) | ||
|  | 	{ | ||
|  | 		partition = 0; | ||
|  | 	} | ||
|  | 	else if (b >= c && b >= d) | ||
|  | 	{ | ||
|  | 		partition = 1; | ||
|  | 	} | ||
|  | 	else if (c >= d) | ||
|  | 	{ | ||
|  | 		partition = 2; | ||
|  | 	} | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		partition = 3; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return partition; | ||
|  | } | ||
|  | 
 | ||
|  | /**
 | ||
|  |  * @brief Generate a single partition info structure. | ||
|  |  * | ||
|  |  * @param[out] bsd                     The block size information. | ||
|  |  * @param      partition_count         The partition count of this partitioning. | ||
|  |  * @param      partition_index         The partition index / seed of this partitioning. | ||
|  |  * @param      partition_remap_index   The remapped partition index of this partitioning. | ||
|  |  * @param[out] pi                      The partition info structure to populate. | ||
|  |  * | ||
|  |  * @return True if this is a useful partition index, False if we can skip it. | ||
|  |  */ | ||
|  | static bool generate_one_partition_info_entry( | ||
|  | 	block_size_descriptor& bsd, | ||
|  | 	unsigned int partition_count, | ||
|  | 	unsigned int partition_index, | ||
|  | 	unsigned int partition_remap_index, | ||
|  | 	partition_info& pi | ||
|  | ) { | ||
|  | 	int texels_per_block = bsd.texel_count; | ||
|  | 	bool small_block = texels_per_block < 32; | ||
|  | 
 | ||
|  | 	uint8_t *partition_of_texel = pi.partition_of_texel; | ||
|  | 
 | ||
|  | 	// Assign texels to partitions
 | ||
|  | 	int texel_idx = 0; | ||
|  | 	int counts[BLOCK_MAX_PARTITIONS] { 0 }; | ||
|  | 	for (unsigned int z = 0; z < bsd.zdim; z++) | ||
|  | 	{ | ||
|  | 		for (unsigned int y = 0; y <  bsd.ydim; y++) | ||
|  | 		{ | ||
|  | 			for (unsigned int x = 0; x <  bsd.xdim; x++) | ||
|  | 			{ | ||
|  | 				uint8_t part = select_partition(partition_index, x, y, z, partition_count, small_block); | ||
|  | 				pi.texels_of_partition[part][counts[part]++] = static_cast<uint8_t>(texel_idx++); | ||
|  | 				*partition_of_texel++ = part; | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Fill loop tail so we can overfetch later
 | ||
|  | 	for (unsigned int i = 0; i < partition_count; i++) | ||
|  | 	{ | ||
|  | 		int ptex_count = counts[i]; | ||
|  | 		int ptex_count_simd = round_up_to_simd_multiple_vla(ptex_count); | ||
|  | 		for (int j = ptex_count; j < ptex_count_simd; j++) | ||
|  | 		{ | ||
|  | 			pi.texels_of_partition[i][j] = pi.texels_of_partition[i][ptex_count - 1]; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Populate the actual procedural partition count
 | ||
|  | 	if (counts[0] == 0) | ||
|  | 	{ | ||
|  | 		pi.partition_count = 0; | ||
|  | 	} | ||
|  | 	else if (counts[1] == 0) | ||
|  | 	{ | ||
|  | 		pi.partition_count = 1; | ||
|  | 	} | ||
|  | 	else if (counts[2] == 0) | ||
|  | 	{ | ||
|  | 		pi.partition_count = 2; | ||
|  | 	} | ||
|  | 	else if (counts[3] == 0) | ||
|  | 	{ | ||
|  | 		pi.partition_count = 3; | ||
|  | 	} | ||
|  | 	else | ||
|  | 	{ | ||
|  | 		pi.partition_count = 4; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Populate the partition index
 | ||
|  | 	pi.partition_index = static_cast<uint16_t>(partition_index); | ||
|  | 
 | ||
|  | 	// Populate the coverage bitmaps for 2/3/4 partitions
 | ||
|  | 	uint64_t* bitmaps { nullptr }; | ||
|  | 	if (partition_count == 2) | ||
|  | 	{ | ||
|  | 		bitmaps = bsd.coverage_bitmaps_2[partition_remap_index]; | ||
|  | 	} | ||
|  | 	else if (partition_count == 3) | ||
|  | 	{ | ||
|  | 		bitmaps = bsd.coverage_bitmaps_3[partition_remap_index]; | ||
|  | 	} | ||
|  | 	else if (partition_count == 4) | ||
|  | 	{ | ||
|  | 		bitmaps = bsd.coverage_bitmaps_4[partition_remap_index]; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONS; i++) | ||
|  | 	{ | ||
|  | 		pi.partition_texel_count[i] = static_cast<uint8_t>(counts[i]); | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Valid partitionings have texels in all of the requested partitions
 | ||
|  | 	bool valid = pi.partition_count == partition_count; | ||
|  | 
 | ||
|  | 	if (bitmaps) | ||
|  | 	{ | ||
|  | 		// Populate the partition coverage bitmap
 | ||
|  | 		for (unsigned int i = 0; i < partition_count; i++) | ||
|  | 		{ | ||
|  | 			bitmaps[i] = 0ULL; | ||
|  | 		} | ||
|  | 
 | ||
|  | 		unsigned int texels_to_process = astc::min(bsd.texel_count, BLOCK_MAX_KMEANS_TEXELS); | ||
|  | 		for (unsigned int i = 0; i < texels_to_process; i++) | ||
|  | 		{ | ||
|  | 			unsigned int idx = bsd.kmeans_texels[i]; | ||
|  | 			bitmaps[pi.partition_of_texel[idx]] |= 1ULL << i; | ||
|  | 		} | ||
|  | 	} | ||
|  | 
 | ||
|  | 	return valid; | ||
|  | } | ||
|  | 
 | ||
|  | static void build_partition_table_for_one_partition_count( | ||
|  | 	block_size_descriptor& bsd, | ||
|  | 	bool can_omit_partitionings, | ||
|  | 	unsigned int partition_count_cutoff, | ||
|  | 	unsigned int partition_count, | ||
|  | 	partition_info* ptab, | ||
|  | 	uint64_t* canonical_patterns | ||
|  | ) { | ||
|  | 	unsigned int next_index = 0; | ||
|  | 	bsd.partitioning_count_selected[partition_count - 1] = 0; | ||
|  | 	bsd.partitioning_count_all[partition_count - 1] = 0; | ||
|  | 
 | ||
|  | 	// Skip tables larger than config max partition count if we can omit modes
 | ||
|  | 	if (can_omit_partitionings && (partition_count > partition_count_cutoff)) | ||
|  | 	{ | ||
|  | 		return; | ||
|  | 	} | ||
|  | 
 | ||
|  | 	// Iterate through twice
 | ||
|  | 	//   - Pass 0: Keep selected partitionings
 | ||
|  | 	//   - Pass 1: Keep non-selected partitionings (skip if in omit mode)
 | ||
|  | 	unsigned int max_iter = can_omit_partitionings ? 1 : 2; | ||
|  | 
 | ||
|  | 	// Tracker for things we built in the first iteration
 | ||
|  | 	uint8_t build[BLOCK_MAX_PARTITIONINGS] { 0 }; | ||
|  | 	for (unsigned int x = 0; x < max_iter; x++) | ||
|  | 	{ | ||
|  | 		for (unsigned int i = 0; i < BLOCK_MAX_PARTITIONINGS; i++) | ||
|  | 		{ | ||
|  | 			// Don't include things we built in the first pass
 | ||
|  | 			if ((x == 1) && build[i]) | ||
|  | 			{ | ||
|  | 				continue; | ||
|  | 			} | ||
|  | 
 | ||
|  | 			bool keep_useful = generate_one_partition_info_entry(bsd, partition_count, i, next_index, ptab[next_index]); | ||
|  | 			if ((x == 0) && !keep_useful) | ||
|  | 			{ | ||
|  | 				continue; | ||
|  | 			} | ||
|  | 
 | ||
|  | 			generate_canonical_partitioning(bsd.texel_count, ptab[next_index].partition_of_texel, canonical_patterns + next_index * BIT_PATTERN_WORDS); | ||
|  | 			bool keep_canonical = true; | ||
|  | 			for (unsigned int j = 0; j < next_index; j++) | ||
|  | 			{ | ||
|  | 				bool match = compare_canonical_partitionings(canonical_patterns + next_index * BIT_PATTERN_WORDS, canonical_patterns +  j * BIT_PATTERN_WORDS); | ||
|  | 				if (match) | ||
|  | 				{ | ||
|  | 					keep_canonical = false; | ||
|  | 					break; | ||
|  | 				} | ||
|  | 			} | ||
|  | 
 | ||
|  | 			if (keep_useful && keep_canonical) | ||
|  | 			{ | ||
|  | 				if (x == 0) | ||
|  | 				{ | ||
|  | 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); | ||
|  | 					bsd.partitioning_count_selected[partition_count - 1]++; | ||
|  | 					bsd.partitioning_count_all[partition_count - 1]++; | ||
|  | 					build[i] = 1; | ||
|  | 					next_index++; | ||
|  | 				} | ||
|  | 			} | ||
|  | 			else | ||
|  | 			{ | ||
|  | 				if (x == 1) | ||
|  | 				{ | ||
|  | 					bsd.partitioning_packed_index[partition_count - 2][i] = static_cast<uint16_t>(next_index); | ||
|  | 					bsd.partitioning_count_all[partition_count - 1]++; | ||
|  | 					next_index++; | ||
|  | 				} | ||
|  | 			} | ||
|  | 		} | ||
|  | 	} | ||
|  | } | ||
|  | 
 | ||
|  | /* See header for documentation. */ | ||
|  | void init_partition_tables( | ||
|  | 	block_size_descriptor& bsd, | ||
|  | 	bool can_omit_partitionings, | ||
|  | 	unsigned int partition_count_cutoff | ||
|  | ) { | ||
|  | 	partition_info* par_tab2 = bsd.partitionings; | ||
|  | 	partition_info* par_tab3 = par_tab2 + BLOCK_MAX_PARTITIONINGS; | ||
|  | 	partition_info* par_tab4 = par_tab3 + BLOCK_MAX_PARTITIONINGS; | ||
|  | 	partition_info* par_tab1 = par_tab4 + BLOCK_MAX_PARTITIONINGS; | ||
|  | 
 | ||
|  | 	generate_one_partition_info_entry(bsd, 1, 0, 0, *par_tab1); | ||
|  | 	bsd.partitioning_count_selected[0] = 1; | ||
|  | 	bsd.partitioning_count_all[0] = 1; | ||
|  | 
 | ||
|  | 	uint64_t* canonical_patterns = new uint64_t[BLOCK_MAX_PARTITIONINGS * BIT_PATTERN_WORDS]; | ||
|  | 
 | ||
|  | 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 2, par_tab2, canonical_patterns); | ||
|  | 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 3, par_tab3, canonical_patterns); | ||
|  | 	build_partition_table_for_one_partition_count(bsd, can_omit_partitionings, partition_count_cutoff, 4, par_tab4, canonical_patterns); | ||
|  | 
 | ||
|  | 	delete[] canonical_patterns; | ||
|  | } |