basis_universal: Update to 1.60

This commit is contained in:
Jakub Marcowski 2025-03-11 10:51:10 +01:00
parent 78c9f8ddd9
commit 246b062bd6
No known key found for this signature in database
GPG key ID: 10D9E07CFFBC0E6F
43 changed files with 29563 additions and 6572 deletions

View file

@ -353,9 +353,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -388,15 +386,11 @@ namespace basisu
m_block_selector_cluster_index[block_index] = best_index;
}
#ifndef __EMSCRIPTEN__
});
#endif
}
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
m_encoded_blocks.resize(m_total_blocks);
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
@ -425,9 +419,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -475,15 +467,11 @@ namespace basisu
} // block_index
#ifndef __EMSCRIPTEN__
});
#endif
}
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
m_endpoint_clusters.resize(0);
m_endpoint_clusters.resize(endpoints.size());
@ -501,9 +489,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -535,15 +521,11 @@ namespace basisu
m_block_selector_cluster_index[block_index] = best_index;
}
#ifndef __EMSCRIPTEN__
});
#endif
}
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
m_encoded_blocks.resize(m_total_blocks);
for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
@ -573,7 +555,7 @@ namespace basisu
debug_printf("introduce_special_selector_clusters\n");
uint32_t total_blocks_relocated = 0;
const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
const uint32_t initial_selector_clusters = m_selector_cluster_block_indices.size_u32();
bool_vec block_relocated_flags(m_total_blocks);
@ -595,7 +577,7 @@ namespace basisu
debug_printf("Introducing sel %u\n", sel);
const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size();
const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32();
m_optimized_cluster_selectors.push_back(blk);
@ -675,7 +657,7 @@ namespace basisu
{
debug_printf("optimize_selector_codebook\n");
const uint32_t orig_total_selector_clusters = (uint32_t)m_optimized_cluster_selectors.size();
const uint32_t orig_total_selector_clusters = m_optimized_cluster_selectors.size_u32();
bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
for (uint32_t i = 0; i < m_total_blocks; i++)
@ -787,9 +769,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -830,15 +810,11 @@ namespace basisu
blk.set_selector(x, y, selectors[x + y * 4]);
}
#ifndef __EMSCRIPTEN__
});
#endif
}
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
} // use_cpu
@ -859,9 +835,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -883,15 +857,11 @@ namespace basisu
} // block_index;
#ifndef __EMSCRIPTEN__
} );
#endif
} // block_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
}
void basisu_frontend::generate_endpoint_clusters()
@ -970,7 +940,7 @@ namespace basisu
}
if (m_params.m_debug_stats)
debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size());
debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32());
}
// Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
@ -1040,11 +1010,9 @@ namespace basisu
for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
{
const uint32_t first_index = cluster_index_iter;
const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
const uint32_t last_index = minimum<uint32_t>(m_endpoint_clusters.size_u32(), cluster_index_iter + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
#endif
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
{
@ -1112,15 +1080,11 @@ namespace basisu
}
} // cluster_index
#ifndef __EMSCRIPTEN__
} );
#endif
} // cluster_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
vector_sort(m_subblock_endpoint_quant_err_vec);
}
@ -1131,19 +1095,19 @@ namespace basisu
generate_block_endpoint_clusters();
int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - (uint32_t)m_endpoint_clusters.size();
int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - m_endpoint_clusters.size_u32();
if (num_new_endpoint_clusters <= 0)
return;
compute_endpoint_subblock_error_vec();
const uint32_t num_orig_endpoint_clusters = (uint32_t)m_endpoint_clusters.size();
const uint32_t num_orig_endpoint_clusters = m_endpoint_clusters.size_u32();
std::unordered_set<uint32_t> training_vector_was_relocated;
uint_vec cluster_sizes(num_orig_endpoint_clusters);
for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
cluster_sizes[i] = m_endpoint_clusters[i].size_u32();
std::unordered_set<uint32_t> ignore_cluster;
@ -1259,7 +1223,7 @@ namespace basisu
// TODO: Get this working when step>0
if (m_params.m_pOpenCL_context && !step)
{
const uint32_t total_clusters = m_endpoint_clusters.size();
const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size();
basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
@ -1342,7 +1306,7 @@ namespace basisu
} // cluster_indices_iter
uint32_t* pSorted = radix_sort(colors.size(), colors.data(), colors2.data(), 0, 3);
uint32_t* pSorted = radix_sort((uint32_t)colors.size(), colors.data(), colors2.data(), 0, 3);
const uint64_t first_pixel_index = input_pixels.size();
@ -1522,9 +1486,7 @@ namespace basisu
const uint32_t first_index = cluster_index_iter;
const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
#endif
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
{
@ -1643,15 +1605,11 @@ namespace basisu
} // cluster_index
#ifndef __EMSCRIPTEN__
});
#endif
} // cluster_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
}
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
@ -1726,14 +1684,14 @@ namespace basisu
{
// For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
// We also prepare an array of block info structs that point into this new parent endpoint cluster array.
const uint32_t total_parent_clusters = m_endpoint_clusters_within_each_parent_cluster.size();
const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size();
basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
// the size of each parent cluster, in total clusters
uint_vec parent_cluster_sizes(total_parent_clusters);
for (uint32_t i = 0; i < total_parent_clusters; i++)
parent_cluster_sizes[i] = m_endpoint_clusters_within_each_parent_cluster[i].size();
parent_cluster_sizes[i] = (uint32_t)m_endpoint_clusters_within_each_parent_cluster[i].size();
uint_vec first_parent_cluster_ofs(total_parent_clusters);
uint32_t cur_ofs = 0;
@ -1818,9 +1776,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -1951,15 +1907,11 @@ namespace basisu
} // block_index
#ifndef __EMSCRIPTEN__
});
#endif
} // block_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
} // use_cpu
@ -2104,9 +2056,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -2131,15 +2081,11 @@ namespace basisu
} // block_index
#ifndef __EMSCRIPTEN__
});
#endif
} // block_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
} // use_cpu
@ -2204,9 +2150,7 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
#endif
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
{
@ -2233,15 +2177,11 @@ namespace basisu
} // block_index
#ifndef __EMSCRIPTEN__
} );
#endif
} // block_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
vec16F_clusterizer selector_clusterizer;
for (uint32_t i = 0; i < m_total_blocks; i++)
@ -2335,9 +2275,7 @@ namespace basisu
const uint32_t first_index = cluster_index_iter;
const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job([this, first_index, last_index] {
#endif
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
{
@ -2406,15 +2344,11 @@ namespace basisu
} // cluster_index
#ifndef __EMSCRIPTEN__
});
#endif
} // cluster_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
@ -2506,7 +2440,7 @@ namespace basisu
if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
{
const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size();
const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size_u32();
basisu::vector<fosc_selector_struct> selector_structs;
selector_structs.reserve(m_optimized_cluster_selectors.size());
@ -2534,7 +2468,7 @@ namespace basisu
selector_cluster_indices.push_back(selector_cluster_index);
}
cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size();
cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size_u32();
}
const uint32_t total_input_selectors = cur_ofs;
@ -2549,7 +2483,7 @@ namespace basisu
block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size();
block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size_u32();
}
uint_vec output_selector_cluster_indices(m_total_blocks);
@ -2615,10 +2549,8 @@ namespace basisu
const uint32_t first_index = block_index_iter;
const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
#endif
int prev_best_cluster_index = 0;
for (uint32_t block_index = first_index; block_index < last_index; block_index++)
@ -2756,15 +2688,11 @@ namespace basisu
} // block_index
#ifndef __EMSCRIPTEN__
} );
#endif
} // block_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
{
@ -3081,9 +3009,7 @@ namespace basisu
const uint32_t first_index = cluster_index_iter;
const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
#endif
for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
{
@ -3173,15 +3099,11 @@ namespace basisu
} // cluster_index
#ifndef __EMSCRIPTEN__
} );
#endif
} // cluster_index_iter
#ifndef __EMSCRIPTEN__
m_params.m_pJob_pool->wait_for_all();
#endif
uint32_t total_unused_clusters = 0;
uint32_t total_improved_clusters = 0;