2025-03-11 10:51:10 +01:00
// File: basisu_astc_hdr_6x6_enc.cpp
# include "basisu_astc_hdr_6x6_enc.h"
# include "basisu_enc.h"
# include "basisu_astc_hdr_common.h"
# include "basisu_math.h"
# include "basisu_resampler.h"
# include "basisu_resampler_filters.h"
# define MINIZ_HEADER_FILE_ONLY
# define MINIZ_NO_ZLIB_COMPATIBLE_NAMES
# include "basisu_miniz.h"
# include "3rdparty/android_astc_decomp.h"
# include <array>
using namespace basisu ;
using namespace buminiz ;
using namespace basist : : astc_6x6_hdr ;
namespace astc_6x6_hdr
{
static void atomic_max ( std : : atomic < uint32_t > & atomic_var , uint32_t new_value )
{
uint32_t current = atomic_var . load ( std : : memory_order_relaxed ) ;
for ( ; ; )
{
uint32_t new_max = std : : max ( current , new_value ) ;
if ( atomic_var . compare_exchange_weak ( current , new_max , std : : memory_order_relaxed , std : : memory_order_relaxed ) )
break ;
}
}
void astc_hdr_6x6_global_config : : set_user_level ( int level )
{
level = basisu : : clamp < int > ( level , 0 , ASTC_HDR_6X6_MAX_USER_COMP_LEVEL ) ;
m_master_comp_level = 0 ;
m_highest_comp_level = 0 ;
m_num_reuse_xy_deltas = NUM_REUSE_XY_DELTAS ;
m_extra_patterns_flag = false ;
m_brute_force_partition_matching = false ;
switch ( level )
{
case 0 :
{
// Both reduce compression a lot when lambda>0
m_favor_higher_compression = false ;
m_num_reuse_xy_deltas = NUM_REUSE_XY_DELTAS / 2 ;
break ;
}
case 1 :
{
m_master_comp_level = 0 ;
m_highest_comp_level = 0 ;
break ;
}
case 2 :
{
m_master_comp_level = 0 ;
m_highest_comp_level = 1 ;
break ;
}
case 3 :
{
m_master_comp_level = 1 ;
m_highest_comp_level = 1 ;
break ;
}
case 4 :
{
m_master_comp_level = 1 ;
m_highest_comp_level = 2 ;
break ;
}
case 5 :
{
m_master_comp_level = 1 ;
m_highest_comp_level = 3 ;
break ;
}
case 6 :
{
m_master_comp_level = 1 ;
m_highest_comp_level = 4 ;
break ;
}
case 7 :
{
m_master_comp_level = 2 ;
m_highest_comp_level = 2 ;
break ;
}
case 8 :
{
m_master_comp_level = 2 ;
m_highest_comp_level = 3 ;
break ;
}
case 9 :
{
m_master_comp_level = 2 ;
m_highest_comp_level = 4 ;
break ;
}
case 10 :
{
m_master_comp_level = 3 ;
m_highest_comp_level = 3 ;
break ;
}
case 11 :
{
m_master_comp_level = 3 ;
m_highest_comp_level = 4 ;
break ;
}
case 12 :
default :
{
m_master_comp_level = 4 ;
m_highest_comp_level = 4 ;
m_extra_patterns_flag = true ;
m_brute_force_partition_matching = true ;
break ;
}
}
}
const float m1 = 0.1593017578125f ; // (2610 / 2^14) * (1/100)
const float m2 = 78.84375f ; // (2523 / 32) * (1/100)
const float c1 = 0.8359375f ; // 3424 / (2^12)
const float c2 = 18.8515625f ; // (2413 / 128)
const float c3 = 18.6875f ; // (2392 / 128)
static float forwardPQ ( float Y )
{
// 10,000 here is an absolute scale - it's in nits (cd per square meter)
float L = Y * ( 1.0f / 10000.0f ) ;
float num = powf ( L , m1 ) ;
float N = powf ( ( c1 + c2 * num ) / ( 1 + c3 * num ) , m2 ) ;
return N ;
}
#if 0
static float inversePQ ( float E )
{
float N = powf ( E , 1.0f / m2 ) ;
float num = basisu : : maximum < float > ( ( N - c1 ) , 0.0f ) / ( c2 - c3 * N ) ;
float L = powf ( num , 1.0f / m1 ) ;
return L * 10000.0f ;
}
# endif
// PQ function approximation: convert input to bfloat16, look up in tables, bilinear interpolation between table entries.
// max_er: 0.000023007392883, max_rel_er: 0.000023472490284, avg_er: 0.000004330495689, 6-7x faster on x86
// Highest error is for values less than SMALLEST_PQ_VAL_IN.
//
// Approximation is round trip lossless for 10-12 bits at [0,10000] nits:
// for x [0,1024] (SCALE=1023) or for x [0,4095] (SCALE=4096):
// round(forwardPQTab(inversePQ(x / SCALE)) * SCALE) == x
//
// bfloat16 has enough precision to handle 8-bit sRGB to linear conversions:
// round(linear_to_srgb(bfloat16_to_float(float_to_bfloat16(srgb_to_linear(isRGB/255.0f))))*255.0) is lossless
const int PQ_APPROX_MIN_EXP = - 16 , PQ_APPROX_MAX_EXP = 16 ;
const int PQ_APPROX_EXP_RANGE = ( PQ_APPROX_MAX_EXP - PQ_APPROX_MIN_EXP + 1 ) ;
const float SMALLEST_PQ_VAL_IN = 0.000015258829080f ;
const float SMALLEST_PQ_VAL = 0.000551903737f ; // forwardPQ(SMALLEST_PQ_VAL_IN)
const float LARGEST_PQ_VAL = 1.251312f ;
float g_pq_approx_tabs [ PQ_APPROX_EXP_RANGE ] [ 128 ] ;
static void init_pq_tables ( )
{
for ( int exp = PQ_APPROX_MIN_EXP ; exp < = PQ_APPROX_MAX_EXP ; exp + + )
{
for ( int mant = 0 ; mant < 128 ; mant + + )
{
bfloat16 b = bfloat16_init ( 1 , exp , mant ) ;
float bf = bfloat16_to_float ( b ) ;
float pq = forwardPQ ( bf ) ;
g_pq_approx_tabs [ exp - PQ_APPROX_MIN_EXP ] [ mant ] = pq ;
}
}
//fmt_printf("{.15} {.15}\n", g_pq_approx_tabs[0][0], inversePQ(g_pq_approx_tabs[0][0]));
//fmt_printf("{.15}\n", forwardPQ(SMALLEST_PQ_VAL_IN));
}
static inline float forwardPQTab ( float v )
{
assert ( g_pq_approx_tabs [ 0 ] [ 0 ] ) ;
assert ( v > = 0.0f ) ;
if ( v = = 0.0f )
return 0.0f ;
bfloat16 bf = float_to_bfloat16 ( v , false ) ;
assert ( v > = bfloat16_to_float ( bf ) ) ;
int exp = bfloat16_get_exp ( bf ) ;
if ( exp < PQ_APPROX_MIN_EXP )
{
// not accurate but should be good enough for our uses
return lerp ( 0.0f , SMALLEST_PQ_VAL , minimum ( 1.0f , v / SMALLEST_PQ_VAL_IN ) ) ;
}
else if ( exp > PQ_APPROX_MAX_EXP )
return LARGEST_PQ_VAL ;
int mant = bfloat16_get_mantissa ( bf ) ;
float a = g_pq_approx_tabs [ exp - PQ_APPROX_MIN_EXP ] [ mant ] ;
float bf_f32 = bfloat16_to_float ( bf ) ;
int next_mant = mant + 1 ;
int next_exp = exp ;
if ( next_mant = = 128 )
{
next_mant = 0 ;
next_exp + + ;
if ( next_exp > PQ_APPROX_MAX_EXP )
return a ;
}
float b = g_pq_approx_tabs [ next_exp - PQ_APPROX_MIN_EXP ] [ next_mant ] ;
bfloat16 next_bf = bfloat16_init ( 1 , next_exp , next_mant ) ;
float next_bf_f32 = bfloat16_to_float ( next_bf ) ;
assert ( v < = next_bf_f32 ) ;
float lerp_factor = ( v - bf_f32 ) / ( next_bf_f32 - bf_f32 ) ;
assert ( ( lerp_factor > = 0 ) & & ( lerp_factor < = 1.0f ) ) ;
return lerp ( a , b , lerp_factor ) ;
}
// 100 nits = ~.5 i
// This converts absolute linear RGB light in either REC 709 or REC2020/BT2100 color gamut to ICtCp, a coding space where Ct is scaled by 2.
// To convert to perceptual ITP for error/distance calculations, multiply the result Ct by .5 (or set itp_flag to true).
// Assumes REC 709 input, or REC 2020/BT.2100 RGB input if rec2020_bt2100_color_gamut is true.
//
// ITP info:
// https://www.portrait.com/resource-center/ictcp-color-difference-metric/
// https://professional.dolby.com/siteassets/pdfs/measuringperceptualcolorvolume_v07.253.pdf (see scale to JND's)
// This also converts from a ICtCp coding space to threshold or perceptually uniform space ITP.
//
// Linear REC709 to REC2020/BT.2100 gamut conversion:
// rgb_2100[0] = rgb_in[0] * 0.6274f + rgb_in[1] * 0.3293f + rgb_in[2] * 0.0433f;
// rgb_2100[1] = rgb_in[0] * 0.0691f + rgb_in[1] * 0.9195f + rgb_in[2] * 0.0114f;
// rgb_2100[2] = rgb_in[0] * 0.0164f + rgb_in[1] * 0.0880f + rgb_in[2] * 0.8956f;
// const float S = 1.0f / 4096.0f;
// l = (1688.0f * S) * rgb_2100[0] + (2146.0f * S) * rgb_2100[1] + (262.0f * S) * rgb_2100[2];
// m = (683.0f * S) * rgb_2100[0] + (2951.0f * S) * rgb_2100[1] + (462.0f * S) * rgb_2100[2];
// s = (99.0f * S) * rgb_2100[0] + (309.0f * S) * rgb_2100[1] + (3688.0f * S) * rgb_2100[2];
static void linear_rgb_to_ictcp ( const vec3F & rgb_in , vec3F & ictcp , bool itp_flag = false , bool rec2020_bt2100_color_gamut = false )
{
vec3F rgb_2100 ( rgb_in ) ;
float l , m , s ;
if ( ! rec2020_bt2100_color_gamut )
{
// Assume REC 709 input color gamut
// (REC2020_to_LMS * REC709_to_2020) * input_color
l = rgb_2100 [ 0 ] * 0.2958097f + rgb_2100 [ 1 ] * 0.6230863f + rgb_2100 [ 2 ] * 0.0811040f ;
m = rgb_2100 [ 0 ] * 0.1562512f + rgb_2100 [ 1 ] * 0.7272980f + rgb_2100 [ 2 ] * 0.1164508f ;
s = rgb_2100 [ 0 ] * 0.0351435f + rgb_2100 [ 1 ] * 0.1565601f + rgb_2100 [ 2 ] * 0.8082964f ;
}
else
{
// Assumes REC2020/BT.2100 input color gamut (this is from the spec)
l = 0.412109375f * rgb_2100 [ 0 ] + 0.52392578125f * rgb_2100 [ 1 ] + 0.06396484375f * rgb_2100 [ 2 ] ;
m = 0.166748046875f * rgb_2100 [ 0 ] + 0.720458984375f * rgb_2100 [ 1 ] + 0.11279296875f * rgb_2100 [ 2 ] ;
s = 0.024169921875f * rgb_2100 [ 0 ] + 0.075439453125f * rgb_2100 [ 1 ] + 0.900390625f * rgb_2100 [ 2 ] ;
}
float ld = forwardPQTab ( l ) ;
float md = forwardPQTab ( m ) ;
float sd = forwardPQTab ( s ) ;
ictcp [ 0 ] = .5f * ld + .5f * md ;
// if ITP scale Ct by .5 (the ICtCp spec scaled Ct to better exploit the full scaled output, which is not perceptually linear)
if ( itp_flag )
ictcp [ 1 ] = 0.806884765625f * ld + - 1.6617431640625f * md + 0.8548583984375f * sd ;
else
ictcp [ 1 ] = 1.61376953125f * ld + - 3.323486328125f * md + 1.709716796875f * sd ;
ictcp [ 2 ] = 4.378173828125f * ld + - 4.24560546875f * md + - 0.132568359375f * sd ;
}
static inline void linear_rgb_to_itp ( const vec3F & rgb_in , vec3F & itp , const astc_hdr_6x6_global_config & cfg )
{
linear_rgb_to_ictcp ( rgb_in , itp , true , cfg . m_rec2020_bt2100_color_gamut ) ;
}
#if 0
// Outputs rec2020/bt2100 color gamut (i.e. this doesn't convert back to REC709 gamut).
static void ictcp_to_linear_rgb ( const vec3F & ictcp , vec3F & rgb , bool itp_flag = false )
{
float ct = ictcp [ 1 ] ;
if ( itp_flag )
ct * = 2.0f ;
float ld = ictcp [ 0 ] + ct * 0.008609037037932726f + ictcp [ 2 ] * 0.11102962500302596f ;
float md = ictcp [ 0 ] + ct * - 0.008609037037932726f + ictcp [ 2 ] * - 0.11102962500302596f ;
float sd = ictcp [ 0 ] + ct * 0.5600313357106792f + ictcp [ 2 ] * - 0.32062717498731885f ;
float l = inversePQ ( ld ) ;
float m = inversePQ ( md ) ;
float s = inversePQ ( sd ) ;
rgb [ 0 ] = l * 3.436606694333079f + m * - 2.5064521186562705f + s * 0.06984542432319149f ;
rgb [ 1 ] = l * - 0.7913295555989289f + m * 1.983600451792291f + s * - 0.192270896193362f ;
rgb [ 2 ] = l * - 0.025949899690592672f + m * - 0.09891371471172646f + s * 1.1248636144023192f ;
}
# endif
struct half_vec3
{
basist : : half_float m_vals [ 3 ] ;
inline half_vec3 ( ) { }
inline half_vec3 ( basist : : half_float x , basist : : half_float y , basist : : half_float z )
{
m_vals [ 0 ] = x ;
m_vals [ 1 ] = y ;
m_vals [ 2 ] = z ;
}
inline half_vec3 ( const half_vec3 & other )
{
* this = other ;
}
inline half_vec3 & operator = ( const half_vec3 & rhs )
{
m_vals [ 0 ] = rhs . m_vals [ 0 ] ;
m_vals [ 1 ] = rhs . m_vals [ 1 ] ;
m_vals [ 2 ] = rhs . m_vals [ 2 ] ;
return * this ;
}
inline void clear ( )
{
clear_obj ( m_vals ) ;
}
inline half_vec3 & set ( basist : : half_float x , basist : : half_float y , basist : : half_float z )
{
m_vals [ 0 ] = x ;
m_vals [ 1 ] = y ;
m_vals [ 2 ] = z ;
return * this ;
}
inline half_vec3 & set ( float x , float y , float z )
{
m_vals [ 0 ] = basist : : float_to_half ( x ) ;
m_vals [ 1 ] = basist : : float_to_half ( y ) ;
m_vals [ 2 ] = basist : : float_to_half ( z ) ;
return * this ;
}
template < typename T >
inline half_vec3 & set_vec ( const T & vec )
{
m_vals [ 0 ] = basist : : float_to_half ( vec [ 0 ] ) ;
m_vals [ 1 ] = basist : : float_to_half ( vec [ 1 ] ) ;
m_vals [ 2 ] = basist : : float_to_half ( vec [ 2 ] ) ;
return * this ;
}
template < typename T >
inline T get_vec ( ) const
{
return T ( basist : : half_to_float ( m_vals [ 0 ] ) , basist : : half_to_float ( m_vals [ 1 ] ) , basist : : half_to_float ( m_vals [ 2 ] ) ) ;
}
inline basist : : half_float operator [ ] ( uint32_t c ) const { assert ( c < 3 ) ; return m_vals [ c ] ; }
inline basist : : half_float & operator [ ] ( uint32_t c ) { assert ( c < 3 ) ; return m_vals [ c ] ; }
float get_float_comp ( uint32_t c ) const
{
assert ( c < 3 ) ;
return basist : : half_to_float ( m_vals [ c ] ) ;
}
half_vec3 & set_float_comp ( uint32_t c , float v )
{
assert ( c < 3 ) ;
m_vals [ c ] = basist : : float_to_half ( v ) ;
return * this ;
}
} ;
struct half_vec4
{
basist : : half_float m_vals [ 4 ] ;
inline half_vec4 ( ) { }
inline half_vec4 ( basist : : half_float x , basist : : half_float y , basist : : half_float z , basist : : half_float w )
{
m_vals [ 0 ] = x ;
m_vals [ 1 ] = y ;
m_vals [ 2 ] = z ;
m_vals [ 3 ] = w ;
}
inline half_vec4 ( const half_vec4 & other )
{
* this = other ;
}
inline half_vec4 & operator = ( const half_vec4 & rhs )
{
m_vals [ 0 ] = rhs . m_vals [ 0 ] ;
m_vals [ 1 ] = rhs . m_vals [ 1 ] ;
m_vals [ 2 ] = rhs . m_vals [ 2 ] ;
m_vals [ 3 ] = rhs . m_vals [ 3 ] ;
return * this ;
}
inline void clear ( )
{
clear_obj ( m_vals ) ;
}
inline half_vec4 & set ( basist : : half_float x , basist : : half_float y , basist : : half_float z , basist : : half_float w )
{
m_vals [ 0 ] = x ;
m_vals [ 1 ] = y ;
m_vals [ 2 ] = z ;
m_vals [ 3 ] = w ;
return * this ;
}
inline half_vec4 & set ( float x , float y , float z , float w )
{
m_vals [ 0 ] = basist : : float_to_half ( x ) ;
m_vals [ 1 ] = basist : : float_to_half ( y ) ;
m_vals [ 2 ] = basist : : float_to_half ( z ) ;
m_vals [ 3 ] = basist : : float_to_half ( w ) ;
return * this ;
}
template < typename T >
inline half_vec4 & set_vec ( const T & vec )
{
m_vals [ 0 ] = basist : : float_to_half ( vec [ 0 ] ) ;
m_vals [ 1 ] = basist : : float_to_half ( vec [ 1 ] ) ;
m_vals [ 2 ] = basist : : float_to_half ( vec [ 2 ] ) ;
m_vals [ 3 ] = basist : : float_to_half ( vec [ 3 ] ) ;
return * this ;
}
template < typename T >
inline T get_vec ( ) const
{
return T ( basist : : half_to_float ( m_vals [ 0 ] ) , basist : : half_to_float ( m_vals [ 1 ] ) , basist : : half_to_float ( m_vals [ 2 ] ) , basist : : half_to_float ( m_vals [ 3 ] ) ) ;
}
inline basist : : half_float operator [ ] ( uint32_t c ) const { assert ( c < 4 ) ; return m_vals [ c ] ; }
inline basist : : half_float & operator [ ] ( uint32_t c ) { assert ( c < 4 ) ; return m_vals [ c ] ; }
float get_float_comp ( uint32_t c ) const
{
assert ( c < 4 ) ;
return basist : : half_to_float ( m_vals [ c ] ) ;
}
half_vec4 & set_float_comp ( uint32_t c , float v )
{
assert ( c < 4 ) ;
m_vals [ c ] = basist : : float_to_half ( v ) ;
return * this ;
}
} ;
const uint32_t MAX_BLOCK_W = 6 , MAX_BLOCK_H = 6 ;
struct trial_result
{
astc_helpers : : log_astc_block m_log_blk ;
double m_err ;
bool m_valid ;
} ;
//----------------------------------------------------------
const uint32_t NUM_PART3_MAPPINGS = 6 ;
static uint8_t g_part3_mapping [ NUM_PART3_MAPPINGS ] [ 3 ] =
{
{ 0 , 1 , 2 } ,
{ 1 , 2 , 0 } ,
{ 2 , 0 , 1 } ,
{ 0 , 2 , 1 } ,
{ 1 , 0 , 2 } ,
{ 2 , 1 , 0 }
} ;
struct partition_pattern_vec
{
uint8_t m_parts [ 6 * 6 ] ;
partition_pattern_vec ( )
{
clear ( ) ;
}
partition_pattern_vec ( const partition_pattern_vec & other )
{
* this = other ;
}
void clear ( )
{
memset ( m_parts , 0 , sizeof ( m_parts ) ) ;
}
partition_pattern_vec & operator = ( const partition_pattern_vec & rhs )
{
if ( this = = & rhs )
return * this ;
memcpy ( m_parts , rhs . m_parts , 36 ) ;
return * this ;
}
uint8_t operator [ ] ( uint32_t i ) const { assert ( i < 36 ) ; return m_parts [ i ] ; }
uint8_t & operator [ ] ( uint32_t i ) { assert ( i < 36 ) ; return m_parts [ i ] ; }
uint8_t operator ( ) ( uint32_t x , uint32_t y ) const { assert ( ( x < 6 ) & & ( y < 6 ) ) ; return m_parts [ x + y * 6 ] ; }
uint8_t & operator ( ) ( uint32_t x , uint32_t y ) { assert ( ( x < 6 ) & & ( y < 6 ) ) ; return m_parts [ x + y * 6 ] ; }
int get_squared_distance ( const partition_pattern_vec & other ) const
{
int total_dist = 0 ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
total_dist + = iabs ( ( int ) m_parts [ i ] - ( int ) other . m_parts [ i ] ) ;
return total_dist ;
}
float get_distance ( const partition_pattern_vec & other ) const
{
return sqrtf ( ( float ) get_squared_distance ( other ) ) ;
}
partition_pattern_vec get_permuted2 ( uint32_t permute_index ) const
{
assert ( permute_index < = 1 ) ;
partition_pattern_vec res ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
{
assert ( m_parts [ i ] < = 1 ) ;
res . m_parts [ i ] = ( uint8_t ) ( m_parts [ i ] ^ permute_index ) ;
}
return res ;
}
partition_pattern_vec get_permuted3 ( uint32_t permute_index ) const
{
assert ( permute_index < = 5 ) ;
partition_pattern_vec res ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
{
assert ( m_parts [ i ] < = 2 ) ;
res . m_parts [ i ] = g_part3_mapping [ permute_index ] [ m_parts [ i ] ] ;
}
return res ;
}
partition_pattern_vec get_canonicalized ( ) const
{
partition_pattern_vec res ;
int new_labels [ 3 ] = { - 1 , - 1 , - 1 } ;
uint32_t next_index = 0 ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
{
uint32_t p = m_parts [ i ] ;
if ( new_labels [ p ] = = - 1 )
new_labels [ p ] = next_index + + ;
res . m_parts [ i ] = ( uint8_t ) new_labels [ p ] ;
}
return res ;
}
bool operator = = ( const partition_pattern_vec & rhs ) const
{
return memcmp ( m_parts , rhs . m_parts , sizeof ( m_parts ) ) = = 0 ;
}
operator size_t ( ) const
{
return basisu : : hash_hsieh ( m_parts , sizeof ( m_parts ) ) ;
}
} ;
struct vp_tree_node
{
partition_pattern_vec m_vantage_point ;
uint32_t m_point_index ;
float m_dist ;
int m_inner_node , m_outer_node ;
} ;
# define BRUTE_FORCE_PART_SEARCH (0)
class vp_tree
{
public :
vp_tree ( )
{
}
void clear ( )
{
m_nodes . clear ( ) ;
}
// This requires no redundant patterns, i.e. all must be unique.
bool init ( uint32_t n , const partition_pattern_vec * pUnique_pats )
{
clear ( ) ;
uint_vec pat_indices ( n ) ;
for ( uint32_t i = 0 ; i < n ; i + + )
pat_indices [ i ] = i ;
std : : pair < int , float > root_idx = find_best_vantage_point ( n , pUnique_pats , pat_indices ) ;
if ( root_idx . first = = - 1 )
return false ;
m_nodes . resize ( 1 ) ;
m_nodes [ 0 ] . m_vantage_point = pUnique_pats [ root_idx . first ] ;
m_nodes [ 0 ] . m_point_index = root_idx . first ;
m_nodes [ 0 ] . m_dist = root_idx . second ;
m_nodes [ 0 ] . m_inner_node = - 1 ;
m_nodes [ 0 ] . m_outer_node = - 1 ;
uint_vec inner_list , outer_list ;
inner_list . reserve ( n / 2 ) ;
outer_list . reserve ( n / 2 ) ;
for ( uint32_t pat_index = 0 ; pat_index < n ; pat_index + + )
{
if ( ( int ) pat_index = = root_idx . first )
continue ;
const float dist = m_nodes [ 0 ] . m_vantage_point . get_distance ( pUnique_pats [ pat_index ] ) ;
if ( dist < = root_idx . second )
inner_list . push_back ( pat_index ) ;
else
outer_list . push_back ( pat_index ) ;
}
if ( inner_list . size ( ) )
{
m_nodes [ 0 ] . m_inner_node = create_node ( n , pUnique_pats , inner_list ) ;
if ( m_nodes [ 0 ] . m_inner_node < 0 )
return false ;
}
if ( outer_list . size ( ) )
{
m_nodes [ 0 ] . m_outer_node = create_node ( n , pUnique_pats , outer_list ) ;
if ( m_nodes [ 0 ] . m_outer_node < 0 )
return false ;
}
return true ;
}
struct result
{
uint32_t m_pat_index ;
uint32_t m_mapping_index ;
float m_dist ;
bool operator < ( const result & rhs ) const { return m_dist < rhs . m_dist ; }
bool operator > ( const result & rhs ) const { return m_dist > rhs . m_dist ; }
} ;
class result_queue
{
enum { MaxSupportedSize = 256 + 1 } ;
public :
result_queue ( ) :
m_cur_size ( 0 )
{
}
size_t get_size ( ) const
{
return m_cur_size ;
}
bool empty ( ) const
{
return ! m_cur_size ;
}
typedef std : : array < result , MaxSupportedSize + 1 > result_array_type ;
const result_array_type & get_elements ( ) const { return m_elements ; }
result_array_type & get_elements ( ) { return m_elements ; }
void clear ( )
{
m_cur_size = 0 ;
}
void reserve ( uint32_t n )
{
BASISU_NOTE_UNUSED ( n ) ;
}
const result & top ( ) const
{
assert ( m_cur_size ) ;
return m_elements [ 1 ] ;
}
bool insert ( const result & val , uint32_t max_size )
{
assert ( max_size < MaxSupportedSize ) ;
if ( m_cur_size > = MaxSupportedSize )
return false ;
m_elements [ + + m_cur_size ] = val ;
up_heap ( m_cur_size ) ;
if ( m_cur_size > max_size )
pop ( ) ;
return true ;
}
bool pop ( )
{
if ( m_cur_size = = 0 )
return false ;
m_elements [ 1 ] = m_elements [ m_cur_size - - ] ;
down_heap ( 1 ) ;
return true ;
}
float get_highest_dist ( ) const
{
if ( ! m_cur_size )
return 0.0f ;
return top ( ) . m_dist ;
}
private :
result_array_type m_elements ;
size_t m_cur_size ;
void up_heap ( size_t index )
{
while ( ( index > 1 ) & & ( m_elements [ index ] > m_elements [ index > > 1 ] ) )
{
std : : swap ( m_elements [ index ] , m_elements [ index > > 1 ] ) ;
index > > = 1 ;
}
}
void down_heap ( size_t index )
{
for ( ; ; )
{
size_t largest = index , left_child = 2 * index , right_child = 2 * index + 1 ;
if ( ( left_child < = m_cur_size ) & & ( m_elements [ left_child ] > m_elements [ largest ] ) )
largest = left_child ;
if ( ( right_child < = m_cur_size ) & & ( m_elements [ right_child ] > m_elements [ largest ] ) )
largest = right_child ;
if ( largest = = index )
break ;
std : : swap ( m_elements [ index ] , m_elements [ largest ] ) ;
index = largest ;
}
}
} ;
void find_nearest ( uint32_t num_subsets , const partition_pattern_vec & desired_pat , result_queue & results , uint32_t max_results )
{
assert ( ( num_subsets > = 2 ) & & ( num_subsets < = 3 ) ) ;
results . clear ( ) ;
if ( ! m_nodes . size ( ) )
return ;
uint32_t num_desired_pats ;
partition_pattern_vec desired_pats [ NUM_PART3_MAPPINGS ] ;
if ( num_subsets = = 2 )
{
num_desired_pats = 2 ;
for ( uint32_t i = 0 ; i < 2 ; i + + )
desired_pats [ i ] = desired_pat . get_permuted2 ( i ) ;
}
else
{
num_desired_pats = NUM_PART3_MAPPINGS ;
for ( uint32_t i = 0 ; i < NUM_PART3_MAPPINGS ; i + + )
desired_pats [ i ] = desired_pat . get_permuted3 ( i ) ;
}
#if 0
find_nearest_at_node ( 0 , num_desired_pats , desired_pats , results , max_results ) ;
# else
find_nearest_at_node_non_recursive ( 0 , num_desired_pats , desired_pats , results , max_results ) ;
# endif
}
private :
basisu : : vector < vp_tree_node > m_nodes ;
void find_nearest_at_node ( int node_index , uint32_t num_desired_pats , const partition_pattern_vec * pDesired_pats , result_queue & results , uint32_t max_results )
{
float best_dist_to_vantage = BIG_FLOAT_VAL ;
uint32_t best_mapping = 0 ;
for ( uint32_t i = 0 ; i < num_desired_pats ; i + + )
{
float dist = pDesired_pats [ i ] . get_distance ( m_nodes [ node_index ] . m_vantage_point ) ;
if ( dist < best_dist_to_vantage )
{
best_dist_to_vantage = dist ;
best_mapping = i ;
}
}
result r ;
r . m_dist = best_dist_to_vantage ;
r . m_mapping_index = best_mapping ;
r . m_pat_index = m_nodes [ node_index ] . m_point_index ;
results . insert ( r , max_results ) ;
if ( best_dist_to_vantage < = m_nodes [ node_index ] . m_dist )
{
// inner first
if ( m_nodes [ node_index ] . m_inner_node > = 0 )
find_nearest_at_node ( m_nodes [ node_index ] . m_inner_node , num_desired_pats , pDesired_pats , results , max_results ) ;
if ( m_nodes [ node_index ] . m_outer_node > = 0 )
{
if ( ( results . get_size ( ) < max_results ) | |
( ( m_nodes [ node_index ] . m_dist - best_dist_to_vantage ) < = results . get_highest_dist ( ) )
)
{
find_nearest_at_node ( m_nodes [ node_index ] . m_outer_node , num_desired_pats , pDesired_pats , results , max_results ) ;
}
}
}
else
{
// outer first
if ( m_nodes [ node_index ] . m_outer_node > = 0 )
find_nearest_at_node ( m_nodes [ node_index ] . m_outer_node , num_desired_pats , pDesired_pats , results , max_results ) ;
if ( m_nodes [ node_index ] . m_inner_node > = 0 )
{
if ( ( results . get_size ( ) < max_results ) | |
( ( best_dist_to_vantage - m_nodes [ node_index ] . m_dist ) < = results . get_highest_dist ( ) )
)
{
find_nearest_at_node ( m_nodes [ node_index ] . m_inner_node , num_desired_pats , pDesired_pats , results , max_results ) ;
}
}
}
}
void find_nearest_at_node_non_recursive ( int init_node_index , uint32_t num_desired_pats , const partition_pattern_vec * pDesired_pats , result_queue & results , uint32_t max_results )
{
uint_vec node_stack ;
node_stack . reserve ( 16 ) ;
node_stack . push_back ( init_node_index ) ;
do
{
const uint32_t node_index = node_stack . back ( ) ;
node_stack . pop_back ( ) ;
float best_dist_to_vantage = BIG_FLOAT_VAL ;
uint32_t best_mapping = 0 ;
for ( uint32_t i = 0 ; i < num_desired_pats ; i + + )
{
float dist = pDesired_pats [ i ] . get_distance ( m_nodes [ node_index ] . m_vantage_point ) ;
if ( dist < best_dist_to_vantage )
{
best_dist_to_vantage = dist ;
best_mapping = i ;
}
}
result r ;
r . m_dist = best_dist_to_vantage ;
r . m_mapping_index = best_mapping ;
r . m_pat_index = m_nodes [ node_index ] . m_point_index ;
results . insert ( r , max_results ) ;
if ( best_dist_to_vantage < = m_nodes [ node_index ] . m_dist )
{
if ( m_nodes [ node_index ] . m_outer_node > = 0 )
{
if ( ( results . get_size ( ) < max_results ) | |
( ( m_nodes [ node_index ] . m_dist - best_dist_to_vantage ) < = results . get_highest_dist ( ) )
)
{
node_stack . push_back ( m_nodes [ node_index ] . m_outer_node ) ;
}
}
// inner first
if ( m_nodes [ node_index ] . m_inner_node > = 0 )
{
node_stack . push_back ( m_nodes [ node_index ] . m_inner_node ) ;
}
}
else
{
if ( m_nodes [ node_index ] . m_inner_node > = 0 )
{
if ( ( results . get_size ( ) < max_results ) | |
( ( best_dist_to_vantage - m_nodes [ node_index ] . m_dist ) < = results . get_highest_dist ( ) )
)
{
node_stack . push_back ( m_nodes [ node_index ] . m_inner_node ) ;
}
}
// outer first
if ( m_nodes [ node_index ] . m_outer_node > = 0 )
{
node_stack . push_back ( m_nodes [ node_index ] . m_outer_node ) ;
}
}
} while ( ! node_stack . empty ( ) ) ;
}
// returns the index of the new node, or -1 on error
int create_node ( uint32_t n , const partition_pattern_vec * pUnique_pats , const uint_vec & pat_indices )
{
std : : pair < int , float > root_idx = find_best_vantage_point ( n , pUnique_pats , pat_indices ) ;
if ( root_idx . first < 0 )
return - 1 ;
m_nodes . resize ( m_nodes . size ( ) + 1 ) ;
const uint32_t new_node_index = m_nodes . size_u32 ( ) - 1 ;
m_nodes [ new_node_index ] . m_vantage_point = pUnique_pats [ root_idx . first ] ;
m_nodes [ new_node_index ] . m_point_index = root_idx . first ;
m_nodes [ new_node_index ] . m_dist = root_idx . second ;
m_nodes [ new_node_index ] . m_inner_node = - 1 ;
m_nodes [ new_node_index ] . m_outer_node = - 1 ;
uint_vec inner_list , outer_list ;
inner_list . reserve ( pat_indices . size_u32 ( ) / 2 ) ;
outer_list . reserve ( pat_indices . size_u32 ( ) / 2 ) ;
for ( uint32_t pat_indices_iter = 0 ; pat_indices_iter < pat_indices . size ( ) ; pat_indices_iter + + )
{
const uint32_t pat_index = pat_indices [ pat_indices_iter ] ;
if ( ( int ) pat_index = = root_idx . first )
continue ;
const float dist = m_nodes [ new_node_index ] . m_vantage_point . get_distance ( pUnique_pats [ pat_index ] ) ;
if ( dist < = root_idx . second )
inner_list . push_back ( pat_index ) ;
else
outer_list . push_back ( pat_index ) ;
}
if ( inner_list . size ( ) )
m_nodes [ new_node_index ] . m_inner_node = create_node ( n , pUnique_pats , inner_list ) ;
if ( outer_list . size ( ) )
m_nodes [ new_node_index ] . m_outer_node = create_node ( n , pUnique_pats , outer_list ) ;
return new_node_index ;
}
// returns the pattern index of the vantage point (-1 on error), and the optimal split distance
std : : pair < int , float > find_best_vantage_point ( uint32_t num_unique_pats , const partition_pattern_vec * pUnique_pats , const uint_vec & pat_indices )
{
BASISU_NOTE_UNUSED ( num_unique_pats ) ;
const uint32_t n = pat_indices . size_u32 ( ) ;
assert ( n ) ;
if ( n = = 1 )
return std : : pair ( pat_indices [ 0 ] , 0.0f ) ;
float best_split_metric = - 1.0f ;
int best_split_pat = - 1 ;
float best_split_dist = 0.0f ;
float best_split_var = 0.0f ;
basisu : : vector < std : : pair < float , uint32_t > > dists ;
dists . reserve ( n ) ;
float_vec float_dists ;
float_dists . reserve ( n ) ;
for ( uint32_t pat_indices_iter = 0 ; pat_indices_iter < n ; pat_indices_iter + + )
{
const uint32_t split_pat_index = pat_indices [ pat_indices_iter ] ;
assert ( split_pat_index < num_unique_pats ) ;
const partition_pattern_vec & trial_vantage = pUnique_pats [ split_pat_index ] ;
dists . resize ( 0 ) ;
float_dists . resize ( 0 ) ;
for ( uint32_t j = 0 ; j < n ; j + + )
{
const uint32_t pat_index = pat_indices [ j ] ;
assert ( pat_index < num_unique_pats ) ;
if ( pat_index = = split_pat_index )
continue ;
float dist = trial_vantage . get_distance ( pUnique_pats [ pat_index ] ) ;
dists . emplace_back ( std : : pair ( dist , pat_index ) ) ;
float_dists . push_back ( dist ) ;
}
stats < double > s ;
s . calc ( float_dists . size_u32 ( ) , float_dists . data ( ) ) ;
std : : sort ( dists . begin ( ) , dists . end ( ) , [ ] ( const auto & a , const auto & b ) {
return a . first < b . first ;
} ) ;
const uint32_t num_dists = dists . size_u32 ( ) ;
float split_dist = dists [ num_dists / 2 ] . first ;
if ( ( num_dists & 1 ) = = 0 )
split_dist = ( split_dist + dists [ ( num_dists / 2 ) - 1 ] . first ) * .5f ;
uint32_t total_inner = 0 , total_outer = 0 ;
for ( uint32_t j = 0 ; j < n ; j + + )
{
const uint32_t pat_index = pat_indices [ j ] ;
if ( pat_index = = split_pat_index )
continue ;
float dist = trial_vantage . get_distance ( pUnique_pats [ pat_index ] ) ;
if ( dist < = split_dist )
total_inner + + ;
else
total_outer + + ;
}
float split_metric = ( float ) minimum ( total_inner , total_outer ) / ( float ) maximum ( total_inner , total_outer ) ;
if ( ( split_metric > best_split_metric ) | |
( ( split_metric = = best_split_metric ) & & ( s . m_var > best_split_var ) ) )
{
best_split_metric = split_metric ;
best_split_dist = split_dist ;
best_split_pat = split_pat_index ;
best_split_var = ( float ) s . m_var ;
}
}
return std : : pair ( best_split_pat , best_split_dist ) ;
}
} ;
struct partition
{
uint64_t m_p ;
inline partition ( ) :
m_p ( 0 )
{
}
inline partition ( uint64_t p ) :
m_p ( p )
{
assert ( p < ( 1ULL < < 36 ) ) ;
}
inline partition & operator = ( uint64_t p )
{
assert ( p < ( 1ULL < < 36 ) ) ;
m_p = p ;
return * this ;
}
inline bool operator < ( const partition & p ) const
{
return m_p < p . m_p ;
}
inline bool operator = = ( const partition & p ) const
{
return m_p = = p . m_p ;
}
inline operator size_t ( ) const
{
return hash_hsieh ( ( const uint8_t * ) & m_p , sizeof ( m_p ) ) ;
}
} ;
partition_pattern_vec g_partitions2 [ NUM_UNIQUE_PARTITIONS2 ] ;
int g_part2_seed_to_unique_index [ 1024 ] ;
vp_tree g_part2_vp_tree ;
static inline vec3F vec3F_norm_approx ( vec3F axis )
{
float l = axis . norm ( ) ;
axis = ( fabs ( l ) > = SMALL_FLOAT_VAL ) ? ( axis * bu_math : : inv_sqrt ( l ) ) : vec3F ( 0.577350269f ) ;
return axis ;
}
static void init_partitions2_6x6 ( )
{
#if 0
// makes pattern bits to the 10-bit ASTC seed index
typedef basisu : : hash_map < uint64_t , uint32_t > partition2_hash_map ;
partition2_hash_map phash ;
phash . reserve ( 1024 ) ;
for ( uint32_t i = 0 ; i < 1024 ; i + + )
{
uint64_t p_bits = 0 ;
uint64_t p_bits_inv = 0 ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
uint64_t p = astc_helpers : : compute_texel_partition ( i , x , y , 0 , 2 , false ) ;
assert ( p < 2 ) ;
p_bits | = ( p < < ( x + y * 6 ) ) ;
p_bits_inv | = ( ( 1 - p ) < < ( x + y * 6 ) ) ;
}
}
if ( ! p_bits )
continue ;
if ( p_bits = = ( ( 1ULL < < 36 ) - 1 ) )
continue ;
assert ( p_bits < ( 1ULL < < 36 ) ) ;
assert ( p_bits_inv < ( 1ULL < < 36 ) ) ;
if ( phash . contains ( p_bits ) )
{
}
else if ( phash . contains ( p_bits_inv ) )
{
}
else
{
auto res = phash . insert ( p_bits , i ) ;
assert ( res . second ) ;
BASISU_NOTE_UNUSED ( res ) ;
}
}
uint32_t num_unique_partitions2 = 0 ;
for ( const auto & r : phash )
{
assert ( r . second < 1024 ) ;
const uint32_t unique_index = num_unique_partitions2 ;
assert ( unique_index < NUM_UNIQUE_PARTITIONS2 ) ;
partition_pattern_vec pat_vec ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
pat_vec [ i ] = ( uint8_t ) ( ( r . first > > i ) & 1 ) ;
g_partitions2 [ unique_index ] = pat_vec ;
assert ( g_part2_unique_index_to_seed [ unique_index ] = = r . second ) ;
g_part2_seed_to_unique_index [ r . second ] = unique_index ;
num_unique_partitions2 + + ;
}
assert ( num_unique_partitions2 = = NUM_UNIQUE_PARTITIONS2 ) ;
# else
for ( uint32_t unique_index = 0 ; unique_index < NUM_UNIQUE_PARTITIONS2 ; unique_index + + )
{
const uint32_t seed_index = g_part2_unique_index_to_seed [ unique_index ] ;
assert ( seed_index < 1024 ) ;
assert ( g_part2_seed_to_unique_index [ seed_index ] = = 0 ) ;
g_part2_seed_to_unique_index [ seed_index ] = unique_index ;
partition_pattern_vec & pat_vec = g_partitions2 [ unique_index ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
uint8_t p = ( uint8_t ) astc_helpers : : compute_texel_partition ( seed_index , x , y , 0 , 2 , false ) ;
assert ( p < 2 ) ;
pat_vec [ x + y * 6 ] = p ;
}
}
}
# endif
g_part2_vp_tree . init ( NUM_UNIQUE_PARTITIONS2 , g_partitions2 ) ;
}
static bool estimate_partition2_6x6 (
const basist : : half_float pBlock_pixels_half [ ] [ 3 ] ,
int * pBest_parts , uint32_t num_best_parts )
{
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 , BLOCK_T = BLOCK_W * BLOCK_H ;
vec3F training_vecs [ BLOCK_T ] , mean ( 0.0f ) ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F & v = training_vecs [ i ] ;
v [ 0 ] = ( float ) pBlock_pixels_half [ i ] [ 0 ] ;
v [ 1 ] = ( float ) pBlock_pixels_half [ i ] [ 1 ] ;
v [ 2 ] = ( float ) pBlock_pixels_half [ i ] [ 2 ] ;
mean + = v ;
}
mean * = ( 1.0f / ( float ) BLOCK_T ) ;
vec3F max_vals ( - BIG_FLOAT_VAL ) ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F & v = training_vecs [ i ] ;
max_vals = vec3F : : component_max ( max_vals , v ) ;
}
// Initialize principle axis approximation
vec3F axis ( max_vals - mean ) ;
// Incremental approx. PCA - only viable if we have a reasonably fast approximation for 1.0/sqrt(x).
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
axis = vec3F_norm_approx ( axis ) ;
vec3F color ( training_vecs [ i ] - mean ) ;
float d = color . dot ( axis ) ;
axis + = color * d ;
}
if ( axis . norm ( ) < SMALL_FLOAT_VAL )
axis . set ( 0.57735027f ) ;
else
axis . normalize_in_place ( ) ;
# if BRUTE_FORCE_PART_SEARCH
int desired_parts [ BLOCK_H ] [ BLOCK_W ] ; // [y][x]
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
float proj = ( training_vecs [ i ] - mean ) . dot ( axis ) ;
desired_parts [ i / BLOCK_W ] [ i % BLOCK_W ] = proj < 0.0f ;
}
# else
partition_pattern_vec desired_part ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
float proj = ( training_vecs [ i ] - mean ) . dot ( axis ) ;
desired_part . m_parts [ i ] = proj < 0.0f ;
}
# endif
//interval_timer tm;
//tm.start();
# if BRUTE_FORCE_PART_SEARCH
uint32_t part_similarity [ NUM_UNIQUE_PARTITIONS2 ] ;
for ( uint32_t part_index = 0 ; part_index < NUM_UNIQUE_PARTITIONS2 ; part_index + + )
{
const partition_pattern_vec & pat_vec = g_partitions2 [ part_index ] ;
int total_sim_non_inv = 0 ;
int total_sim_inv = 0 ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
int part = pat_vec [ x + y * 6 ] ;
if ( part = = desired_parts [ y ] [ x ] )
total_sim_non_inv + + ;
if ( ( part ^ 1 ) = = desired_parts [ y ] [ x ] )
total_sim_inv + + ;
}
}
int total_sim = maximum ( total_sim_non_inv , total_sim_inv ) ;
part_similarity [ part_index ] = ( total_sim < < 16 ) | part_index ;
} // part_index;
std : : sort ( part_similarity , part_similarity + NUM_UNIQUE_PARTITIONS2 ) ;
for ( uint32_t i = 0 ; i < num_best_parts ; i + + )
pBest_parts [ i ] = part_similarity [ ( NUM_UNIQUE_PARTITIONS2 - 1 ) - i ] & 0xFFFF ;
# else
vp_tree : : result_queue results ;
results . reserve ( num_best_parts ) ;
g_part2_vp_tree . find_nearest ( 2 , desired_part , results , num_best_parts ) ;
assert ( results . get_size ( ) = = num_best_parts ) ;
const auto & elements = results . get_elements ( ) ;
for ( uint32_t i = 0 ; i < results . get_size ( ) ; i + + )
pBest_parts [ i ] = elements [ 1 + i ] . m_pat_index ;
# endif
//fmt_printf("{} ", tm.get_elapsed_ms());
return true ;
}
const uint32_t MIN_REFINE_LEVEL = 0 ;
static bool encode_block_2_subsets (
trial_result res [ 2 ] ,
uint32_t grid_w , uint32_t grid_h ,
uint32_t cem ,
uint32_t weights_ise_range , uint32_t endpoints_ise_range ,
const half_vec3 * pBlock_pixels_half , const vec4F * pBlock_pixels_q16 ,
astc_hdr_codec_base_options & coptions ,
bool uber_mode_flag ,
int unique_pat_index ,
uint32_t comp_level ,
opt_mode_t mode11_opt_mode ,
bool refine_endpoints_flag )
{
const uint32_t num_endpoint_vals = ( cem = = 11 ) ? basist : : NUM_MODE11_ENDPOINTS : basist : : NUM_MODE7_ENDPOINTS ;
res [ 0 ] . m_valid = false ;
res [ 1 ] . m_valid = false ;
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 ;
astc_helpers : : log_astc_block best_log_blk ;
clear_obj ( best_log_blk ) ;
best_log_blk . m_num_partitions = 2 ;
best_log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) cem ;
best_log_blk . m_color_endpoint_modes [ 1 ] = ( uint8_t ) cem ;
best_log_blk . m_grid_width = ( uint8_t ) grid_w ;
best_log_blk . m_grid_height = ( uint8_t ) grid_h ;
best_log_blk . m_weight_ise_range = ( uint8_t ) weights_ise_range ;
best_log_blk . m_endpoint_ise_range = ( uint8_t ) endpoints_ise_range ;
partition_pattern_vec * pPat = & g_partitions2 [ unique_pat_index ] ;
const uint32_t p_seed = g_part2_unique_index_to_seed [ unique_pat_index ] ;
vec4F part_pixels_q16 [ 2 ] [ 64 ] ;
half_vec3 part_half_pixels [ 2 ] [ 64 ] ;
uint8_t part_pixel_index [ 2 ] [ 64 ] ;
uint32_t part_total_pixels [ 2 ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
uint32_t part_index = ( * pPat ) [ x + y * BLOCK_W ] ;
uint32_t l = part_total_pixels [ part_index ] ;
part_pixels_q16 [ part_index ] [ l ] = pBlock_pixels_q16 [ x + y * BLOCK_W ] ;
part_half_pixels [ part_index ] [ l ] = pBlock_pixels_half [ x + y * BLOCK_W ] ;
part_pixel_index [ part_index ] [ l ] = ( uint8_t ) ( x + y * BLOCK_W ) ;
part_total_pixels [ part_index ] = l + 1 ;
} // x
} // y
uint8_t blk_endpoints [ 2 ] [ basist : : NUM_MODE11_ENDPOINTS ] ;
uint8_t blk_weights [ 2 ] [ BLOCK_W * BLOCK_H ] ;
uint32_t best_submode [ 2 ] ;
for ( uint32_t part_iter = 0 ; part_iter < 2 ; part_iter + + )
{
assert ( part_total_pixels [ part_iter ] ) ;
double e ;
if ( cem = = 7 )
{
e = encode_astc_hdr_block_mode_7 (
part_total_pixels [ part_iter ] ,
( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
best_log_blk . m_weight_ise_range ,
best_submode [ part_iter ] ,
BIG_FLOAT_VAL ,
blk_endpoints [ part_iter ] ,
blk_weights [ part_iter ] ,
coptions ,
best_log_blk . m_endpoint_ise_range ) ;
}
else
{
assert ( cem = = 11 ) ;
e = encode_astc_hdr_block_mode_11 (
part_total_pixels [ part_iter ] ,
( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
best_log_blk . m_weight_ise_range ,
best_submode [ part_iter ] ,
BIG_FLOAT_VAL ,
blk_endpoints [ part_iter ] ,
blk_weights [ part_iter ] ,
coptions ,
false ,
best_log_blk . m_endpoint_ise_range , uber_mode_flag , false , - 1 , 7 , false ,
mode11_opt_mode ) ;
}
if ( e = = BIG_FLOAT_VAL )
return false ;
} // part_iter
uint8_t ise_weights [ BLOCK_W * BLOCK_H ] ;
uint32_t src_pixel_index [ 2 ] = { 0 , 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
uint32_t part_index = ( * pPat ) [ x + y * BLOCK_W ] ;
ise_weights [ x + y * BLOCK_W ] = blk_weights [ part_index ] [ src_pixel_index [ part_index ] ] ;
src_pixel_index [ part_index ] + + ;
} // x
} // y
if ( ( grid_w = = BLOCK_W ) & & ( grid_h = = BLOCK_H ) )
{
best_log_blk . m_partition_id = ( uint16_t ) p_seed ;
memcpy ( best_log_blk . m_endpoints , blk_endpoints [ 0 ] , num_endpoint_vals ) ;
memcpy ( best_log_blk . m_endpoints + num_endpoint_vals , blk_endpoints [ 1 ] , num_endpoint_vals ) ;
memcpy ( best_log_blk . m_weights , ise_weights , BLOCK_W * BLOCK_H ) ;
res [ 0 ] . m_valid = true ;
res [ 0 ] . m_log_blk = best_log_blk ;
}
else
{
uint8_t desired_weights [ BLOCK_H * BLOCK_W ] ;
const auto & dequant_tab = astc_helpers : : g_dequant_tables . get_weight_tab ( weights_ise_range ) . m_ISE_to_val ;
for ( uint32_t by = 0 ; by < BLOCK_H ; by + + )
for ( uint32_t bx = 0 ; bx < BLOCK_W ; bx + + )
desired_weights [ bx + by * BLOCK_W ] = dequant_tab [ ise_weights [ bx + by * BLOCK_W ] ] ;
uint8_t downsampled_weights [ BLOCK_H * BLOCK_W ] ;
const float * pDownsample_matrix = get_6x6_downsample_matrix ( grid_w , grid_h ) ;
if ( ! pDownsample_matrix )
{
assert ( 0 ) ;
return false ;
}
downsample_weight_grid (
pDownsample_matrix ,
BLOCK_W , BLOCK_H , // source/from dimension (block size)
grid_w , grid_h , // dest/to dimension (grid size)
desired_weights , // these are dequantized weights, NOT ISE symbols, [by][bx]
downsampled_weights ) ; // [wy][wx]
best_log_blk . m_partition_id = ( uint16_t ) p_seed ;
memcpy ( best_log_blk . m_endpoints , blk_endpoints [ 0 ] , num_endpoint_vals ) ;
memcpy ( best_log_blk . m_endpoints + num_endpoint_vals , blk_endpoints [ 1 ] , num_endpoint_vals ) ;
const auto & weight_to_ise = astc_helpers : : g_dequant_tables . get_weight_tab ( weights_ise_range ) . m_val_to_ise ;
for ( uint32_t gy = 0 ; gy < grid_h ; gy + + )
for ( uint32_t gx = 0 ; gx < grid_w ; gx + + )
best_log_blk . m_weights [ gx + gy * grid_w ] = weight_to_ise [ downsampled_weights [ gx + gy * grid_w ] ] ;
res [ 0 ] . m_valid = true ;
res [ 0 ] . m_log_blk = best_log_blk ;
if ( ( refine_endpoints_flag ) & & ( comp_level > = MIN_REFINE_LEVEL ) & & ( ( grid_w < 6 ) | | ( grid_h < 6 ) ) )
{
bool any_refined = false ;
for ( uint32_t part_iter = 0 ; part_iter < 2 ; part_iter + + )
{
bool refine_status = refine_endpoints (
cem ,
endpoints_ise_range ,
best_log_blk . m_endpoints + part_iter * num_endpoint_vals , // the endpoints to optimize
BLOCK_W , BLOCK_H , // block dimensions
grid_w , grid_h , best_log_blk . m_weights , weights_ise_range , // weight grid
part_total_pixels [ part_iter ] , ( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
& part_pixel_index [ part_iter ] [ 0 ] , // maps this subset's pixels to block offsets
coptions , mode11_opt_mode ) ;
if ( refine_status )
any_refined = true ;
}
if ( any_refined )
{
res [ 1 ] . m_valid = true ;
res [ 1 ] . m_log_blk = best_log_blk ;
}
}
}
return true ;
}
typedef basisu : : hash_map < partition_pattern_vec , std : : pair < uint32_t , uint32_t > > partition3_hash_map ;
partition_pattern_vec g_partitions3 [ NUM_UNIQUE_PARTITIONS3 ] ;
int g_part3_seed_to_unique_index [ 1024 ] ;
vp_tree g_part3_vp_tree ;
static void init_partitions3_6x6 ( )
{
uint32_t t = 0 ;
for ( uint32_t i = 0 ; i < 1024 ; i + + )
g_part3_seed_to_unique_index [ i ] = - 1 ;
partition3_hash_map part3_hash ;
part3_hash . reserve ( 512 ) ;
for ( uint32_t seed_index = 0 ; seed_index < 1024 ; seed_index + + )
{
partition_pattern_vec p3 ;
uint32_t part_hist [ 3 ] = { 0 } ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
uint64_t p = astc_helpers : : compute_texel_partition ( seed_index , x , y , 0 , 3 , false ) ;
assert ( p < 3 ) ;
p3 . m_parts [ x + y * 6 ] = ( uint8_t ) p ;
part_hist [ p ] + + ;
}
}
if ( ! part_hist [ 0 ] | | ! part_hist [ 1 ] | | ! part_hist [ 2 ] )
continue ;
uint32_t j ;
for ( j = 0 ; j < NUM_PART3_MAPPINGS ; j + + )
{
partition_pattern_vec temp_part3 ( p3 . get_permuted3 ( j ) ) ;
if ( part3_hash . contains ( temp_part3 ) )
break ;
}
if ( j < NUM_PART3_MAPPINGS )
continue ;
part3_hash . insert ( p3 , std : : make_pair ( seed_index , t ) ) ;
assert ( g_part3_unique_index_to_seed [ t ] = = seed_index ) ;
g_part3_seed_to_unique_index [ seed_index ] = t ;
g_partitions3 [ t ] = p3 ;
t + + ;
}
g_part3_vp_tree . init ( NUM_UNIQUE_PARTITIONS3 , g_partitions3 ) ;
}
static bool estimate_partition3_6x6 (
const basist : : half_float pBlock_pixels_half [ ] [ 3 ] ,
int * pBest_parts , uint32_t num_best_parts )
{
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 , BLOCK_T = BLOCK_W * BLOCK_H , NUM_SUBSETS = 3 ;
assert ( num_best_parts & & ( num_best_parts < = NUM_UNIQUE_PARTITIONS3 ) ) ;
vec3F training_vecs [ BLOCK_T ] , mean ( 0.0f ) ;
float brightest_inten = 0.0f , darkest_inten = BIG_FLOAT_VAL ;
vec3F cluster_centroids [ NUM_SUBSETS ] ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F & v = training_vecs [ i ] ;
v . set ( ( float ) pBlock_pixels_half [ i ] [ 0 ] , ( float ) pBlock_pixels_half [ i ] [ 1 ] , ( float ) pBlock_pixels_half [ i ] [ 2 ] ) ;
float inten = v . dot ( vec3F ( 1.0f ) ) ;
if ( inten < darkest_inten )
{
darkest_inten = inten ;
cluster_centroids [ 0 ] = v ;
}
if ( inten > brightest_inten )
{
brightest_inten = inten ;
cluster_centroids [ 1 ] = v ;
}
}
if ( cluster_centroids [ 0 ] = = cluster_centroids [ 1 ] )
return false ;
float furthest_dist2 = 0.0f ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F & v = training_vecs [ i ] ;
float dist_a = v . squared_distance ( cluster_centroids [ 0 ] ) ;
if ( dist_a = = 0.0f )
continue ;
float dist_b = v . squared_distance ( cluster_centroids [ 1 ] ) ;
if ( dist_b = = 0.0f )
continue ;
float dist2 = dist_a + dist_b ;
if ( dist2 > furthest_dist2 )
{
furthest_dist2 = dist2 ;
cluster_centroids [ 2 ] = v ;
}
}
if ( ( cluster_centroids [ 0 ] = = cluster_centroids [ 2 ] ) | | ( cluster_centroids [ 1 ] = = cluster_centroids [ 2 ] ) )
return false ;
uint32_t cluster_pixels [ NUM_SUBSETS ] [ BLOCK_T ] ;
uint32_t num_cluster_pixels [ NUM_SUBSETS ] ;
vec3F new_cluster_means [ NUM_SUBSETS ] ;
const uint32_t NUM_ITERS = 4 ;
for ( uint32_t s = 0 ; s < NUM_ITERS ; s + + )
{
memset ( num_cluster_pixels , 0 , sizeof ( num_cluster_pixels ) ) ;
memset ( new_cluster_means , 0 , sizeof ( new_cluster_means ) ) ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
float d [ NUM_SUBSETS ] = {
training_vecs [ i ] . squared_distance ( cluster_centroids [ 0 ] ) ,
training_vecs [ i ] . squared_distance ( cluster_centroids [ 1 ] ) ,
training_vecs [ i ] . squared_distance ( cluster_centroids [ 2 ] ) } ;
float min_d = d [ 0 ] ;
uint32_t min_idx = 0 ;
for ( uint32_t j = 1 ; j < NUM_SUBSETS ; j + + )
{
if ( d [ j ] < min_d )
{
min_d = d [ j ] ;
min_idx = j ;
}
}
cluster_pixels [ min_idx ] [ num_cluster_pixels [ min_idx ] ] = i ;
new_cluster_means [ min_idx ] + = training_vecs [ i ] ;
num_cluster_pixels [ min_idx ] + + ;
} // i
for ( uint32_t j = 0 ; j < NUM_SUBSETS ; j + + )
{
if ( ! num_cluster_pixels [ j ] )
return false ;
cluster_centroids [ j ] = new_cluster_means [ j ] / ( float ) num_cluster_pixels [ j ] ;
}
} // s
partition_pattern_vec desired_part ;
for ( uint32_t p = 0 ; p < NUM_SUBSETS ; p + + )
{
for ( uint32_t i = 0 ; i < num_cluster_pixels [ p ] ; i + + )
{
const uint32_t pix_index = cluster_pixels [ p ] [ i ] ;
desired_part [ pix_index ] = ( uint8_t ) p ;
}
}
# if BRUTE_FORCE_PART_SEARCH
partition_pattern_vec desired_parts [ NUM_PART3_MAPPINGS ] ;
for ( uint32_t j = 0 ; j < NUM_PART3_MAPPINGS ; j + + )
desired_parts [ j ] = desired_part . get_permuted3 ( j ) ;
uint32_t part_similarity [ NUM_UNIQUE_PARTITIONS3 ] ;
for ( uint32_t part_index = 0 ; part_index < NUM_UNIQUE_PARTITIONS3 ; part_index + + )
{
const partition_pattern_vec & pat = g_partitions3 [ part_index ] ;
uint32_t lowest_pat_dist = UINT32_MAX ;
for ( uint32_t p = 0 ; p < NUM_PART3_MAPPINGS ; p + + )
{
uint32_t dist = pat . get_squared_distance ( desired_parts [ p ] ) ;
if ( dist < lowest_pat_dist )
lowest_pat_dist = dist ;
}
part_similarity [ part_index ] = ( lowest_pat_dist < < 16 ) | part_index ;
} // part_index;
std : : sort ( part_similarity , part_similarity + NUM_UNIQUE_PARTITIONS3 ) ;
for ( uint32_t i = 0 ; i < num_best_parts ; i + + )
pBest_parts [ i ] = part_similarity [ i ] & 0xFFFF ;
# else
vp_tree : : result_queue results ;
results . reserve ( num_best_parts ) ;
g_part3_vp_tree . find_nearest ( 3 , desired_part , results , num_best_parts ) ;
assert ( results . get_size ( ) = = num_best_parts ) ;
const auto & elements = results . get_elements ( ) ;
for ( uint32_t i = 0 ; i < results . get_size ( ) ; i + + )
pBest_parts [ i ] = elements [ 1 + i ] . m_pat_index ;
# endif
return true ;
}
static bool encode_block_3_subsets (
trial_result & res ,
uint32_t cem ,
uint32_t grid_w , uint32_t grid_h ,
uint32_t weights_ise_range , uint32_t endpoints_ise_range ,
const half_vec3 * pBlock_pixels_half , const vec4F * pBlock_pixels_q16 ,
astc_hdr_codec_base_options & coptions ,
bool uber_mode_flag ,
const int * pEst_patterns , int num_est_patterns ,
uint32_t comp_level ,
opt_mode_t mode11_opt_mode )
{
BASISU_NOTE_UNUSED ( uber_mode_flag ) ;
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 , NUM_SUBSETS = 3 ;
const uint32_t num_endpoint_vals = astc_helpers : : get_num_cem_values ( cem ) ;
res . m_valid = false ;
double best_e = BIG_FLOAT_VAL ;
astc_helpers : : log_astc_block best_log_blk ;
clear_obj ( best_log_blk ) ;
best_log_blk . m_num_partitions = NUM_SUBSETS ;
best_log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) cem ;
best_log_blk . m_color_endpoint_modes [ 1 ] = ( uint8_t ) cem ;
best_log_blk . m_color_endpoint_modes [ 2 ] = ( uint8_t ) cem ;
best_log_blk . m_grid_width = ( uint8_t ) grid_w ;
best_log_blk . m_grid_height = ( uint8_t ) grid_h ;
best_log_blk . m_weight_ise_range = ( uint8_t ) weights_ise_range ;
best_log_blk . m_endpoint_ise_range = ( uint8_t ) endpoints_ise_range ;
const uint32_t n = num_est_patterns ? num_est_patterns : NUM_UNIQUE_PARTITIONS3 ;
for ( uint32_t unique_p_iter = 0 ; unique_p_iter < n ; unique_p_iter + + )
{
const uint32_t unique_part_index = num_est_patterns ? pEst_patterns [ unique_p_iter ] : unique_p_iter ;
assert ( unique_part_index < NUM_UNIQUE_PARTITIONS3 ) ;
const partition_pattern_vec * pPart = & g_partitions3 [ unique_part_index ] ;
vec4F part_pixels_q16 [ NUM_SUBSETS ] [ 64 ] ;
half_vec3 part_half_pixels [ NUM_SUBSETS ] [ 64 ] ;
uint8_t part_pixel_index [ NUM_SUBSETS ] [ 64 ] ;
uint32_t part_total_pixels [ NUM_SUBSETS ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pPart - > m_parts [ x + y * BLOCK_W ] ;
uint32_t l = part_total_pixels [ part_index ] ;
part_pixels_q16 [ part_index ] [ l ] = pBlock_pixels_q16 [ x + y * BLOCK_W ] ;
part_half_pixels [ part_index ] [ l ] = pBlock_pixels_half [ x + y * BLOCK_W ] ;
part_pixel_index [ part_index ] [ l ] = ( uint8_t ) ( x + y * BLOCK_W ) ;
part_total_pixels [ part_index ] = l + 1 ;
} // x
} // y
uint8_t blk_endpoints [ NUM_SUBSETS ] [ basist : : NUM_MODE11_ENDPOINTS ] ;
uint8_t blk_weights [ NUM_SUBSETS ] [ BLOCK_W * BLOCK_H ] ;
uint32_t best_submode [ NUM_SUBSETS ] ;
double e = 0.0f ;
for ( uint32_t part_iter = 0 ; part_iter < NUM_SUBSETS ; part_iter + + )
{
assert ( part_total_pixels [ part_iter ] ) ;
if ( cem = = 7 )
{
e + = encode_astc_hdr_block_mode_7 (
part_total_pixels [ part_iter ] ,
( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
best_log_blk . m_weight_ise_range ,
best_submode [ part_iter ] ,
BIG_FLOAT_VAL ,
blk_endpoints [ part_iter ] ,
blk_weights [ part_iter ] ,
coptions ,
best_log_blk . m_endpoint_ise_range ) ;
}
else
{
assert ( cem = = 11 ) ;
e + = encode_astc_hdr_block_mode_11 (
part_total_pixels [ part_iter ] ,
( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
best_log_blk . m_weight_ise_range ,
best_submode [ part_iter ] ,
BIG_FLOAT_VAL ,
blk_endpoints [ part_iter ] ,
blk_weights [ part_iter ] ,
coptions ,
false , best_log_blk . m_endpoint_ise_range , uber_mode_flag , false ,
FIRST_MODE11_SUBMODE_INDEX , MAX_MODE11_SUBMODE_INDEX , false , mode11_opt_mode ) ;
}
} // part_iter
uint8_t ise_weights [ BLOCK_W * BLOCK_H ] ;
uint32_t src_pixel_index [ NUM_SUBSETS ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pPart - > m_parts [ x + y * BLOCK_W ] ;
ise_weights [ x + y * BLOCK_W ] = blk_weights [ part_index ] [ src_pixel_index [ part_index ] ] ;
src_pixel_index [ part_index ] + + ;
} // x
} // y
if ( ( grid_w = = BLOCK_W ) & & ( grid_h = = BLOCK_H ) )
{
if ( e < best_e )
{
best_e = e ;
best_log_blk . m_partition_id = ( uint16_t ) g_part3_unique_index_to_seed [ unique_part_index ] ;
for ( uint32_t p = 0 ; p < NUM_SUBSETS ; p + + )
memcpy ( best_log_blk . m_endpoints + num_endpoint_vals * p , blk_endpoints [ p ] , num_endpoint_vals ) ;
memcpy ( best_log_blk . m_weights , ise_weights , BLOCK_W * BLOCK_H ) ;
}
}
else
{
uint8_t desired_weights [ BLOCK_H * BLOCK_W ] ;
const auto & dequant_tab = astc_helpers : : g_dequant_tables . get_weight_tab ( weights_ise_range ) . m_ISE_to_val ;
for ( uint32_t by = 0 ; by < BLOCK_H ; by + + )
for ( uint32_t bx = 0 ; bx < BLOCK_W ; bx + + )
desired_weights [ bx + by * BLOCK_W ] = dequant_tab [ ise_weights [ bx + by * BLOCK_W ] ] ;
uint8_t downsampled_weights [ BLOCK_H * BLOCK_W ] ;
const float * pDownsample_matrix = get_6x6_downsample_matrix ( grid_w , grid_h ) ;
if ( ! pDownsample_matrix )
{
assert ( 0 ) ;
return false ;
}
downsample_weight_grid (
pDownsample_matrix ,
BLOCK_W , BLOCK_H , // source/from dimension (block size)
grid_w , grid_h , // dest/to dimension (grid size)
desired_weights , // these are dequantized weights, NOT ISE symbols, [by][bx]
downsampled_weights ) ; // [wy][wx]
astc_helpers : : log_astc_block trial_blk ( best_log_blk ) ;
trial_blk . m_partition_id = ( uint16_t ) g_part3_unique_index_to_seed [ unique_part_index ] ;
for ( uint32_t p = 0 ; p < NUM_SUBSETS ; p + + )
memcpy ( trial_blk . m_endpoints + num_endpoint_vals * p , blk_endpoints [ p ] , num_endpoint_vals ) ;
const auto & weight_to_ise = astc_helpers : : g_dequant_tables . get_weight_tab ( weights_ise_range ) . m_val_to_ise ;
for ( uint32_t gy = 0 ; gy < grid_h ; gy + + )
for ( uint32_t gx = 0 ; gx < grid_w ; gx + + )
trial_blk . m_weights [ gx + gy * grid_w ] = weight_to_ise [ downsampled_weights [ gx + gy * grid_w ] ] ;
if ( ( comp_level > = MIN_REFINE_LEVEL ) & & ( ( grid_w < 6 ) | | ( grid_h < 6 ) ) )
{
for ( uint32_t part_iter = 0 ; part_iter < NUM_SUBSETS ; part_iter + + )
{
bool refine_status = refine_endpoints (
cem ,
endpoints_ise_range ,
trial_blk . m_endpoints + part_iter * num_endpoint_vals , // the endpoints to optimize
BLOCK_W , BLOCK_H , // block dimensions
grid_w , grid_h , trial_blk . m_weights , weights_ise_range , // weight grid
part_total_pixels [ part_iter ] , ( basist : : half_float ( * ) [ 3 ] ) part_half_pixels [ part_iter ] , ( vec4F * ) part_pixels_q16 [ part_iter ] ,
& part_pixel_index [ part_iter ] [ 0 ] , // maps this subset's pixels to block offsets
coptions , mode11_opt_mode ) ;
BASISU_NOTE_UNUSED ( refine_status ) ;
}
}
half_vec4 decoded_pixels_half4 [ BLOCK_H ] [ BLOCK_W ] ; // [y][x]
bool status = astc_helpers : : decode_block ( trial_blk , decoded_pixels_half4 , BLOCK_W , BLOCK_H , astc_helpers : : cDecodeModeHDR16 ) ;
assert ( status ) ;
if ( ! status )
return false ;
half_vec3 decoded_pixels_half3 [ BLOCK_H ] [ BLOCK_W ] ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
decoded_pixels_half3 [ y ] [ x ] . set ( decoded_pixels_half4 [ y ] [ x ] [ 0 ] , decoded_pixels_half4 [ y ] [ x ] [ 1 ] , decoded_pixels_half4 [ y ] [ x ] [ 2 ] ) ;
double trial_err = compute_block_error ( BLOCK_W * BLOCK_H , ( const basist : : half_float * ) pBlock_pixels_half , ( const basist : : half_float * ) decoded_pixels_half3 , coptions ) ;
if ( trial_err < best_e )
{
best_e = trial_err ;
best_log_blk = trial_blk ;
}
}
} // unique_p_iter
if ( best_e < BIG_FLOAT_VAL )
{
res . m_log_blk = best_log_blk ;
res . m_valid = true ;
res . m_err = best_e ;
}
else
{
res . m_valid = false ;
}
return res . m_valid ;
}
static uint32_t encode_values ( bitwise_coder & coder , uint32_t total_values , const uint8_t * pVals , uint32_t endpoint_range )
{
const uint32_t MAX_VALS = 64 ;
uint32_t bit_values [ MAX_VALS ] , tq_values [ ( MAX_VALS + 2 ) / 3 ] ;
uint32_t total_tq_values = 0 , tq_accum = 0 , tq_mul = 1 ;
assert ( ( total_values ) & & ( total_values < = MAX_VALS ) ) ;
const uint32_t ep_bits = astc_helpers : : g_ise_range_table [ endpoint_range ] [ 0 ] ;
const uint32_t ep_trits = astc_helpers : : g_ise_range_table [ endpoint_range ] [ 1 ] ;
const uint32_t ep_quints = astc_helpers : : g_ise_range_table [ endpoint_range ] [ 2 ] ;
for ( uint32_t i = 0 ; i < total_values ; i + + )
{
uint32_t val = pVals [ i ] ;
uint32_t bits = val & ( ( 1 < < ep_bits ) - 1 ) ;
uint32_t tq = val > > ep_bits ;
bit_values [ i ] = bits ;
if ( ep_trits )
{
assert ( tq < 3 ) ;
tq_accum + = tq * tq_mul ;
tq_mul * = 3 ;
if ( tq_mul = = 243 )
{
assert ( total_tq_values < BASISU_ARRAY_SIZE ( tq_values ) ) ;
tq_values [ total_tq_values + + ] = tq_accum ;
tq_accum = 0 ;
tq_mul = 1 ;
}
}
else if ( ep_quints )
{
assert ( tq < 5 ) ;
tq_accum + = tq * tq_mul ;
tq_mul * = 5 ;
if ( tq_mul = = 125 )
{
assert ( total_tq_values < BASISU_ARRAY_SIZE ( tq_values ) ) ;
tq_values [ total_tq_values + + ] = tq_accum ;
tq_accum = 0 ;
tq_mul = 1 ;
}
}
}
uint32_t total_bits_output = 0 ;
for ( uint32_t i = 0 ; i < total_tq_values ; i + + )
{
const uint32_t num_bits = ep_trits ? 8 : 7 ;
coder . put_bits ( tq_values [ i ] , num_bits ) ;
total_bits_output + = num_bits ;
}
if ( tq_mul > 1 )
{
uint32_t num_bits ;
if ( ep_trits )
{
if ( tq_mul = = 3 )
num_bits = 2 ;
else if ( tq_mul = = 9 )
num_bits = 4 ;
else if ( tq_mul = = 27 )
num_bits = 5 ;
else //if (tq_mul == 81)
num_bits = 7 ;
}
else
{
if ( tq_mul = = 5 )
num_bits = 3 ;
else //if (tq_mul == 25)
num_bits = 5 ;
}
coder . put_bits ( tq_accum , num_bits ) ;
total_bits_output + = num_bits ;
}
for ( uint32_t i = 0 ; i < total_values ; i + + )
{
coder . put_bits ( bit_values [ i ] , ep_bits ) ;
total_bits_output + = ep_bits ;
}
return total_bits_output ;
}
static inline uint32_t get_num_endpoint_vals ( uint32_t cem )
{
assert ( ( cem = = 7 ) | | ( cem = = 11 ) ) ;
return ( cem = = 11 ) ? basist : : NUM_MODE11_ENDPOINTS : basist : : NUM_MODE7_ENDPOINTS ;
}
static void code_block ( bitwise_coder & coder ,
const astc_helpers : : log_astc_block & log_blk ,
block_mode block_mode_index ,
endpoint_mode em , const uint8_t * pEP_deltas )
{
coder . put_truncated_binary ( ( uint32_t ) block_mode_index , ( uint32_t ) block_mode : : cBMTotalModes ) ;
coder . put_truncated_binary ( ( uint32_t ) em , ( uint32_t ) endpoint_mode : : cTotal ) ;
const uint32_t num_endpoint_vals = get_num_endpoint_vals ( log_blk . m_color_endpoint_modes [ 0 ] ) ;
if ( ( em = = endpoint_mode : : cUseLeftDelta ) | | ( em = = endpoint_mode : : cUseUpperDelta ) )
{
assert ( log_blk . m_num_partitions = = 1 ) ;
for ( uint32_t i = 0 ; i < num_endpoint_vals ; i + + )
coder . put_bits ( pEP_deltas [ i ] , NUM_ENDPOINT_DELTA_BITS ) ;
}
else if ( em = = endpoint_mode : : cRaw )
{
if ( log_blk . m_num_partitions = = 2 )
{
const int unique_partition_index = g_part2_seed_to_unique_index [ log_blk . m_partition_id ] ;
assert ( unique_partition_index ! = - 1 ) ;
coder . put_truncated_binary ( unique_partition_index , NUM_UNIQUE_PARTITIONS2 ) ;
}
else if ( log_blk . m_num_partitions = = 3 )
{
const int unique_partition_index = g_part3_seed_to_unique_index [ log_blk . m_partition_id ] ;
assert ( unique_partition_index ! = - 1 ) ;
coder . put_truncated_binary ( unique_partition_index , NUM_UNIQUE_PARTITIONS3 ) ;
}
encode_values ( coder , num_endpoint_vals * log_blk . m_num_partitions , log_blk . m_endpoints , log_blk . m_endpoint_ise_range ) ;
}
encode_values ( coder , log_blk . m_grid_width * log_blk . m_grid_height * ( log_blk . m_dual_plane ? 2 : 1 ) , log_blk . m_weights , log_blk . m_weight_ise_range ) ;
}
struct smooth_map_params
{
bool m_no_mse_scaling ;
float m_max_smooth_std_dev ;
float m_smooth_max_mse_scale ;
float m_max_med_smooth_std_dev ;
float m_med_smooth_max_mse_scale ;
float m_max_ultra_smooth_std_dev ;
float m_ultra_smooth_max_mse_scale ;
bool m_debug_images ;
smooth_map_params ( )
{
clear ( ) ;
}
void clear ( )
{
m_no_mse_scaling = false ;
// 3x3 region
m_max_smooth_std_dev = 100.0f ;
m_smooth_max_mse_scale = 13000.0f ;
// 7x7 region
m_max_med_smooth_std_dev = 9.0f ;
m_med_smooth_max_mse_scale = 15000.0f ;
// 11x11 region
m_max_ultra_smooth_std_dev = 4.0f ;
//m_ultra_smooth_max_mse_scale = 4500.0f;
//m_ultra_smooth_max_mse_scale = 10000.0f;
//m_ultra_smooth_max_mse_scale = 50000.0f;
//m_ultra_smooth_max_mse_scale = 100000.0f;
//m_ultra_smooth_max_mse_scale = 400000.0f;
//m_ultra_smooth_max_mse_scale = 800000.0f;
m_ultra_smooth_max_mse_scale = 2000000.0f ;
m_debug_images = true ;
}
} ;
Resampler : : Contrib_List * g_contrib_lists [ 7 ] ; // 1-6
static void init_contrib_lists ( )
{
for ( uint32_t dst_width = 1 ; dst_width < = 6 ; dst_width + + )
//g_contrib_lists[dst_width] = Resampler::make_clist(6, 6, basisu::Resampler::BOUNDARY_CLAMP, gaussian_filter, BASISU_GAUSSIAN_FILTER_SUPPORT, 6.0f / (float)dst_width, 0.0f);
g_contrib_lists [ dst_width ] = Resampler : : make_clist ( 6 , 6 , basisu : : Resampler : : BOUNDARY_CLAMP , gaussian_filter , BASISU_BELL_FILTER_SUPPORT , 6.0f / ( float ) dst_width , 0.0f ) ;
}
#if 0
static void filter_block ( uint32_t grid_x , uint32_t grid_y , const vec3F * pSrc_block , half_vec3 * pDst_block_half3 , vec4F * pDst_block_q16 )
{
vec3F temp_block [ 6 ] [ 6 ] ; // [y][x]
// first filter rows to temp_block
if ( grid_x = = 6 )
{
memcpy ( temp_block , pSrc_block , sizeof ( vec3F ) * 6 * 6 ) ;
}
else
{
Resampler : : Contrib_List * pRow_lists = g_contrib_lists [ grid_x ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
vec3F p ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pRow_lists [ x ] . n ; i + + )
p + = pSrc_block [ y * 6 + pRow_lists [ x ] . p [ i ] . pixel ] * pRow_lists [ x ] . p [ i ] . weight ;
p . clamp ( 0.0f , basist : : ASTC_HDR_MAX_VAL ) ;
temp_block [ y ] [ x ] = p ;
} // x
} // y
}
// filter columns
if ( grid_y = = 6 )
{
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
const basist : : half_float h = basist : : float_to_half ( temp_block [ y ] [ x ] [ c ] ) ;
pDst_block_half3 [ x + y * 6 ] [ c ] = h ;
pDst_block_q16 [ x + y * 6 ] [ c ] = ( float ) half_to_qlog16 ( h ) ;
}
pDst_block_q16 [ x + y * 6 ] [ 3 ] = 0.0f ;
} // x
} // y
}
else
{
Resampler : : Contrib_List * pCol_lists = g_contrib_lists [ grid_y ] ;
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
vec3F p ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pCol_lists [ y ] . n ; i + + )
p + = temp_block [ pCol_lists [ y ] . p [ i ] . pixel ] [ x ] * pCol_lists [ y ] . p [ i ] . weight ;
p . clamp ( 0.0f , basist : : ASTC_HDR_MAX_VAL ) ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
const basist : : half_float h = basist : : float_to_half ( p [ c ] ) ;
pDst_block_half3 [ x + y * 6 ] [ c ] = h ;
pDst_block_q16 [ x + y * 6 ] [ c ] = ( float ) half_to_qlog16 ( h ) ;
}
pDst_block_q16 [ x + y * 6 ] [ 3 ] = 0.0f ;
} // x
} // y
}
}
# endif
static void filter_block ( uint32_t grid_x , uint32_t grid_y , const vec4F * pSrc_block , vec4F * pDst_block )
{
vec4F temp_block [ 6 ] [ 6 ] ; // [y][x]
// first filter rows to temp_block
if ( grid_x = = 6 )
{
memcpy ( temp_block , pSrc_block , sizeof ( vec4F ) * 6 * 6 ) ;
}
else
{
Resampler : : Contrib_List * pRow_lists = g_contrib_lists [ grid_x ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
vec3F p ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pRow_lists [ x ] . n ; i + + )
p + = vec3F ( pSrc_block [ y * 6 + pRow_lists [ x ] . p [ i ] . pixel ] ) * pRow_lists [ x ] . p [ i ] . weight ;
p . clamp ( 0.0f , basist : : ASTC_HDR_MAX_VAL ) ;
temp_block [ y ] [ x ] = p ;
} // x
} // y
}
// filter columns
if ( grid_y = = 6 )
{
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
for ( uint32_t c = 0 ; c < 3 ; c + + )
pDst_block [ x + y * 6 ] [ c ] = temp_block [ y ] [ x ] [ c ] ;
} // x
} // y
}
else
{
Resampler : : Contrib_List * pCol_lists = g_contrib_lists [ grid_y ] ;
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
vec3F p ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pCol_lists [ y ] . n ; i + + )
p + = temp_block [ pCol_lists [ y ] . p [ i ] . pixel ] [ x ] * pCol_lists [ y ] . p [ i ] . weight ;
p . clamp ( 0.0f , basist : : ASTC_HDR_MAX_VAL ) ;
pDst_block [ x + y * 6 ] = p ;
} // x
} // y
}
}
static void filter_block ( uint32_t grid_x , uint32_t grid_y , const vec3F * pSrc_block , vec3F * pDst_block )
{
vec3F temp_block [ 6 ] [ 6 ] ; // [y][x]
// first filter rows to temp_block
if ( grid_x = = 6 )
{
memcpy ( temp_block , pSrc_block , sizeof ( vec3F ) * 6 * 6 ) ;
}
else
{
Resampler : : Contrib_List * pRow_lists = g_contrib_lists [ grid_x ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
vec3F p ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pRow_lists [ x ] . n ; i + + )
p + = vec3F ( pSrc_block [ y * 6 + pRow_lists [ x ] . p [ i ] . pixel ] ) * pRow_lists [ x ] . p [ i ] . weight ;
temp_block [ y ] [ x ] = p ;
} // x
} // y
}
// filter columns
if ( grid_y = = 6 )
{
2025-10-09 12:28:20 +02:00
memcpy ( ( void * ) pDst_block , temp_block , sizeof ( vec3F ) * 6 * 6 ) ;
2025-03-11 10:51:10 +01:00
}
else
{
Resampler : : Contrib_List * pCol_lists = g_contrib_lists [ grid_y ] ;
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
vec3F & p = pDst_block [ x + y * 6 ] ;
p . set ( 0.0f ) ;
for ( uint32_t i = 0 ; i < pCol_lists [ y ] . n ; i + + )
p + = temp_block [ pCol_lists [ y ] . p [ i ] . pixel ] [ x ] * pCol_lists [ y ] . p [ i ] . weight ;
} // x
} // y
}
}
static float diff_blocks ( const vec4F * pA , const vec4F * pB )
{
const uint32_t BLOCK_T = 36 ;
float diff = 0.0f ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
diff + = square ( pA [ i ] [ 0 ] - pB [ i ] [ 0 ] ) + square ( pA [ i ] [ 1 ] - pB [ i ] [ 1 ] ) + square ( pA [ i ] [ 2 ] - pB [ i ] [ 2 ] ) ;
return diff * ( 1.0f / ( float ) BLOCK_T ) ;
}
static float sub_and_compute_std_dev ( const vec3F * pA , const vec3F * pB )
{
const uint32_t BLOCK_T = 36 ;
vec3F mean ( 0.0f ) ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F diff ( pA [ i ] - pB [ i ] ) ;
mean + = diff ;
}
mean * = ( 1.0f / ( float ) BLOCK_T ) ;
vec3F diff_sum ( 0.0f ) ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
vec3F diff ( pA [ i ] - pB [ i ] ) ;
diff - = mean ;
diff_sum + = vec3F : : component_mul ( diff , diff ) ;
}
vec3F var ( diff_sum * ( 1.0f / ( float ) BLOCK_T ) ) ;
vec3F std_dev ( sqrtf ( var [ 0 ] ) , sqrtf ( var [ 1 ] ) , sqrtf ( var [ 2 ] ) ) ;
return maximum ( std_dev [ 0 ] , std_dev [ 1 ] , std_dev [ 2 ] ) ;
}
static void create_smooth_maps2 (
vector2D < float > & smooth_block_mse_scales ,
const image & orig_img ,
smooth_map_params & params , image * pUltra_smooth_img = nullptr )
{
const uint32_t width = orig_img . get_width ( ) ;
const uint32_t height = orig_img . get_height ( ) ;
//const uint32_t total_pixels = orig_img.get_total_pixels();
const uint32_t num_comps = 3 ;
if ( params . m_no_mse_scaling )
{
smooth_block_mse_scales . set_all ( 1.0f ) ;
return ;
}
smooth_block_mse_scales . resize ( width , height ) ;
image smooth_vis , med_smooth_vis , ultra_smooth_vis ;
if ( params . m_debug_images )
{
smooth_vis . resize ( width , height ) ;
med_smooth_vis . resize ( width , height ) ;
ultra_smooth_vis . resize ( width , height ) ;
}
for ( uint32_t y = 0 ; y < height ; y + + )
{
for ( uint32_t x = 0 ; x < width ; x + + )
{
{
tracked_stat_dbl comp_stats [ 4 ] ;
for ( int yd = - 1 ; yd < = 1 ; yd + + )
{
for ( int xd = - 1 ; xd < = 1 ; xd + + )
{
const color_rgba & p = orig_img . get_clamped ( ( int ) x + xd , ( int ) y + yd ) ;
comp_stats [ 0 ] . update ( ( float ) p [ 0 ] ) ;
comp_stats [ 1 ] . update ( ( float ) p [ 1 ] ) ;
comp_stats [ 2 ] . update ( ( float ) p [ 2 ] ) ;
}
}
float max_std_dev = 0.0f ;
for ( uint32_t i = 0 ; i < num_comps ; i + + )
max_std_dev = basisu : : maximum ( max_std_dev , ( float ) comp_stats [ i ] . get_std_dev ( ) ) ;
float yl = clampf ( max_std_dev / params . m_max_smooth_std_dev , 0.0f , 1.0f ) ;
//yl = powf(yl, 2.0f);
yl = powf ( yl , 1.0f / 2.0f ) ; // substantially less bits
smooth_block_mse_scales ( x , y ) = lerp ( params . m_smooth_max_mse_scale , 1.0f , yl ) ;
if ( params . m_debug_images )
{
//smooth_vis(x, y).set(clamp((int)((smooth_block_mse_scales(x, y) - 1.0f) / (params.m_smooth_max_mse_scale - 1.0f) * 255.0f + .5f), 0, 255));
// white=high local activity (edges/detail)
// black=low local activity (smooth - error is amplified)
smooth_vis ( x , y ) . set ( clamp ( ( int ) ( ( yl * 255.0f ) + .5f ) , 0 , 255 ) ) ;
}
}
{
tracked_stat_dbl comp_stats [ 4 ] ;
const int S = 3 ;
for ( int yd = - S ; yd < S ; yd + + )
{
for ( int xd = - S ; xd < S ; xd + + )
{
const color_rgba & p = orig_img . get_clamped ( ( int ) x + xd , ( int ) y + yd ) ;
comp_stats [ 0 ] . update ( ( float ) p [ 0 ] ) ;
comp_stats [ 1 ] . update ( ( float ) p [ 1 ] ) ;
comp_stats [ 2 ] . update ( ( float ) p [ 2 ] ) ;
}
}
float max_std_dev = 0.0f ;
for ( uint32_t i = 0 ; i < num_comps ; i + + )
max_std_dev = basisu : : maximum ( max_std_dev , ( float ) comp_stats [ i ] . get_std_dev ( ) ) ;
float yl = clampf ( max_std_dev / params . m_max_med_smooth_std_dev , 0.0f , 1.0f ) ;
//yl = powf(yl, 2.0f);
smooth_block_mse_scales ( x , y ) = lerp ( params . m_med_smooth_max_mse_scale , smooth_block_mse_scales ( x , y ) , yl ) ;
if ( params . m_debug_images )
med_smooth_vis ( x , y ) . set ( ( int ) std : : round ( yl * 255.0f ) ) ;
}
{
tracked_stat_dbl comp_stats [ 4 ] ;
const int S = 5 ;
for ( int yd = - S ; yd < S ; yd + + )
{
for ( int xd = - S ; xd < S ; xd + + )
{
const color_rgba & p = orig_img . get_clamped ( ( int ) x + xd , ( int ) y + yd ) ;
comp_stats [ 0 ] . update ( ( float ) p [ 0 ] ) ;
comp_stats [ 1 ] . update ( ( float ) p [ 1 ] ) ;
comp_stats [ 2 ] . update ( ( float ) p [ 2 ] ) ;
}
}
float max_std_dev = 0.0f ;
for ( uint32_t i = 0 ; i < num_comps ; i + + )
max_std_dev = basisu : : maximum ( max_std_dev , ( float ) comp_stats [ i ] . get_std_dev ( ) ) ;
float yl = clampf ( max_std_dev / params . m_max_ultra_smooth_std_dev , 0.0f , 1.0f ) ;
yl = powf ( yl , 2.0f ) ;
smooth_block_mse_scales ( x , y ) = lerp ( params . m_ultra_smooth_max_mse_scale , smooth_block_mse_scales ( x , y ) , yl ) ;
if ( params . m_debug_images )
ultra_smooth_vis ( x , y ) . set ( ( int ) std : : round ( yl * 255.0f ) ) ;
}
}
}
if ( params . m_debug_images )
{
save_png ( " dbg_smooth_vis.png " , smooth_vis ) ;
save_png ( " dbg_med_smooth_vis.png " , med_smooth_vis ) ;
save_png ( " dbg_ultra_smooth_vis.png " , ultra_smooth_vis ) ;
image vis_img ( width , height ) ;
float max_scale = 0.0f ;
for ( uint32_t y = 0 ; y < height ; y + + )
for ( uint32_t x = 0 ; x < width ; x + + )
max_scale = basisu : : maximumf ( max_scale , smooth_block_mse_scales ( x , y ) ) ;
for ( uint32_t y = 0 ; y < height ; y + + )
for ( uint32_t x = 0 ; x < width ; x + + )
vis_img ( x , y ) . set ( ( int ) std : : round ( smooth_block_mse_scales ( x , y ) * 255.0f / max_scale ) ) ;
save_png ( " scale_vis.png " , vis_img ) ;
}
if ( pUltra_smooth_img )
* pUltra_smooth_img = ultra_smooth_vis ;
}
const float REALLY_DARK_I_THRESHOLD = 0.0625f ;
const float REALLY_DARK_MSE_ERR_SCALE = 128.0f ;
const float REALLY_DARK_DELTA_ITP_JND_SCALE = 5.0f ;
static float compute_pixel_mse_itp ( const vec3F & orig_pixel_itp , const vec3F & comp_pixel_itp , bool delta_itp_dark_adjustment )
{
float delta_i = orig_pixel_itp [ 0 ] - comp_pixel_itp [ 0 ] ;
float delta_t = orig_pixel_itp [ 1 ] - comp_pixel_itp [ 1 ] ;
float delta_p = orig_pixel_itp [ 2 ] - comp_pixel_itp [ 2 ] ;
float err = ( delta_i * delta_i ) + ( delta_t * delta_t ) + ( delta_p * delta_p ) ;
if ( delta_itp_dark_adjustment )
{
// We have to process a large range of inputs, including extremely dark inputs.
// Artifically amplify MSE on very dark pixels - otherwise they'll be overly compressed at higher lambdas.
// This is to better handle very dark signals which could be explictly overexposed.
float s = bu_math : : smoothstep ( 0.0f , REALLY_DARK_I_THRESHOLD , orig_pixel_itp [ 0 ] ) ;
s = lerp ( REALLY_DARK_MSE_ERR_SCALE , 1.0f , s ) ;
err * = s ;
}
return err ;
}
static float compute_block_mse_itp ( uint32_t block_w , uint32_t block_h , const vec3F * pOrig_pixels_itp , const vec3F * pComp_pixels_itp , bool delta_itp_dark_adjustment )
{
float total_mse = 0.0f ;
for ( uint32_t y = 0 ; y < block_h ; y + + )
{
for ( uint32_t x = 0 ; x < block_w ; x + + )
{
total_mse + = compute_pixel_mse_itp ( pOrig_pixels_itp [ x + y * block_w ] , pComp_pixels_itp [ x + y * block_w ] , delta_itp_dark_adjustment ) ;
} // x
} // y
return total_mse * ( 1.0f / ( float ) ( block_w * block_h ) ) ;
}
static float compute_block_ssim_itp ( uint32_t block_w , uint32_t block_h , const vec3F * pOrig_pixels_itp , const vec3F * pComp_pixels_itp )
{
const uint32_t n = block_w * block_h ;
assert ( n < = 36 ) ;
stats < float > x_stats [ 3 ] , y_stats [ 3 ] ;
comparative_stats < float > xy_cov [ 3 ] ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
x_stats [ c ] . calc_simplified ( n , & pOrig_pixels_itp [ 0 ] [ c ] , 3 ) ;
y_stats [ c ] . calc_simplified ( n , & pComp_pixels_itp [ 0 ] [ c ] , 3 ) ;
}
for ( uint32_t c = 0 ; c < 3 ; c + + )
xy_cov [ c ] . calc_cov ( n , & pOrig_pixels_itp [ 0 ] [ c ] , & pComp_pixels_itp [ 0 ] [ c ] , 3 , 3 , & x_stats [ c ] , & y_stats [ c ] ) ;
float ssim [ 3 ] ;
const double d = 1.0f , k1 = .01f , k2 = .03f ;
// weight mean error more highly to reduce blocking
float ap = 1.5f , bp = 1.0f , cp = 1.0f ;
const double s_c1 = square ( k1 * d ) , s_c2 = square ( k2 * d ) ;
const double s_c3 ( s_c2 * .5f ) ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
float lum = ( float ) ( ( 2.0f * x_stats [ c ] . m_avg * y_stats [ c ] . m_avg + s_c1 ) / ( square ( x_stats [ c ] . m_avg ) + square ( y_stats [ c ] . m_avg ) + s_c1 ) ) ;
lum = saturate ( lum ) ;
float con = ( float ) ( ( 2.0f * x_stats [ c ] . m_std_dev * y_stats [ c ] . m_std_dev + s_c2 ) / ( x_stats [ c ] . m_var + y_stats [ c ] . m_var + s_c2 ) ) ;
con = saturate ( con ) ;
float str = ( float ) ( ( xy_cov [ c ] . m_cov + s_c3 ) / ( x_stats [ c ] . m_std_dev * y_stats [ c ] . m_std_dev + s_c3 ) ) ;
str = saturate ( str ) ;
ssim [ c ] = powf ( lum , ap ) * powf ( con , bp ) * powf ( str , cp ) ;
}
#if 0
float final_ssim = ( ssim [ 0 ] * .4f + ssim [ 1 ] * .3f + ssim [ 2 ] * .3f ) ;
# elif 1
float final_ssim = ssim [ 0 ] * ssim [ 1 ] * ssim [ 2 ] ;
# else
const float LP = .75f ;
float final_ssim = ssim [ 0 ] * powf ( ( ssim [ 1 ] + ssim [ 2 ] ) * .5f , LP ) ;
# endif
return final_ssim ;
}
// delta ITP, 1.0 is JND (Rec. ITU-R BT.2124), modified for higher error at low light
static float compute_pixel_delta_itp ( const vec3F & a , const vec3F & b , const vec3F & orig , bool delta_itp_dark_adjustment )
{
float delta_i = a [ 0 ] - b [ 0 ] ;
float delta_t = a [ 1 ] - b [ 1 ] ;
float delta_p = a [ 2 ] - b [ 2 ] ;
float err = 720.0f * sqrtf ( ( delta_i * delta_i ) + ( delta_t * delta_t ) + ( delta_p * delta_p ) ) ;
float s = bu_math : : smoothstep ( 0.0f , REALLY_DARK_I_THRESHOLD , orig [ 0 ] ) ;
if ( delta_itp_dark_adjustment )
{
// This is to better handle very dark signals which could be explictly overexposed.
s = lerp ( REALLY_DARK_DELTA_ITP_JND_SCALE , 1.0f , s ) ;
err * = s ;
}
return err ;
}
struct candidate_encoding
{
encoding_type m_encoding_type ;
basist : : half_float m_solid_color [ 3 ] ;
uint32_t m_run_len ;
vec3F m_comp_pixels [ MAX_BLOCK_H ] [ MAX_BLOCK_W ] ; // [y][x]
vec3F m_comp_pixels_itp [ MAX_BLOCK_H ] [ MAX_BLOCK_W ] ; // [y][x]
endpoint_mode m_endpoint_mode ;
block_mode m_block_mode ;
bitwise_coder m_coder ;
// The block to code, which may not be valid ASTC. This may have to be transcoded (by requantizing the weights/endpoints) before it's valid ASTC.
// Note the endpoints may be coded endpoints OR transcoded endpoints, depending on the encoding type.
astc_helpers : : log_astc_block m_coded_log_blk ;
// The block the decoder outputs.
astc_helpers : : log_astc_block m_decomp_log_blk ;
int m_reuse_delta_index ;
float m_t , m_d , m_bits ;
candidate_encoding ( )
{
clear ( ) ;
}
candidate_encoding ( const candidate_encoding & other )
{
* this = other ;
}
candidate_encoding ( candidate_encoding & & other )
{
* this = std : : move ( other ) ;
}
candidate_encoding & operator = ( const candidate_encoding & rhs )
{
if ( this = = & rhs )
return * this ;
m_encoding_type = rhs . m_encoding_type ;
memcpy ( m_solid_color , rhs . m_solid_color , sizeof ( m_solid_color ) ) ;
m_run_len = rhs . m_run_len ;
memcpy ( m_comp_pixels , rhs . m_comp_pixels , sizeof ( m_comp_pixels ) ) ;
m_endpoint_mode = rhs . m_endpoint_mode ;
m_block_mode = rhs . m_block_mode ;
m_coder = rhs . m_coder ;
m_coded_log_blk = rhs . m_coded_log_blk ;
m_decomp_log_blk = rhs . m_decomp_log_blk ;
m_reuse_delta_index = rhs . m_reuse_delta_index ;
return * this ;
}
candidate_encoding & operator = ( candidate_encoding & & rhs )
{
if ( this = = & rhs )
return * this ;
m_encoding_type = rhs . m_encoding_type ;
memcpy ( m_solid_color , rhs . m_solid_color , sizeof ( m_solid_color ) ) ;
m_run_len = rhs . m_run_len ;
memcpy ( m_comp_pixels , rhs . m_comp_pixels , sizeof ( m_comp_pixels ) ) ;
m_endpoint_mode = rhs . m_endpoint_mode ;
m_block_mode = rhs . m_block_mode ;
m_coder = std : : move ( rhs . m_coder ) ;
m_coded_log_blk = rhs . m_coded_log_blk ;
m_decomp_log_blk = rhs . m_decomp_log_blk ;
m_reuse_delta_index = rhs . m_reuse_delta_index ;
return * this ;
}
void clear ( )
{
m_encoding_type = encoding_type : : cInvalid ;
clear_obj ( m_solid_color ) ;
m_run_len = 0 ;
clear_obj ( m_comp_pixels ) ;
m_endpoint_mode = endpoint_mode : : cInvalid ;
m_block_mode = block_mode : : cInvalid ;
m_coder . restart ( ) ;
m_coded_log_blk . clear ( ) ;
m_decomp_log_blk . clear ( ) ;
m_t = 0 ;
m_d = 0 ;
m_bits = 0 ;
m_reuse_delta_index = 0 ;
}
} ;
bool decode_astc_block ( uint32_t block_w , uint32_t block_h , astc_helpers : : log_astc_block & log_blk , vec3F * pPixels )
{
assert ( ( block_w < = 6 ) & & ( block_h < = 6 ) ) ;
half_vec4 decoded_pixels_half4 [ 6 * 6 ] ; // [y][x]
bool status = astc_helpers : : decode_block ( log_blk , decoded_pixels_half4 , block_w , block_h , astc_helpers : : cDecodeModeHDR16 ) ;
assert ( status ) ;
if ( ! status )
return false ;
for ( uint32_t y = 0 ; y < block_h ; y + + )
{
for ( uint32_t x = 0 ; x < block_w ; x + + )
{
pPixels [ x + y * block_w ] . set (
basist : : half_to_float ( decoded_pixels_half4 [ x + y * block_w ] [ 0 ] ) ,
basist : : half_to_float ( decoded_pixels_half4 [ x + y * block_w ] [ 1 ] ) ,
basist : : half_to_float ( decoded_pixels_half4 [ x + y * block_w ] [ 2 ] ) ) ;
} // x
} //y
return true ;
}
static inline bool validate_log_blk ( const astc_helpers : : log_astc_block & decomp_blk )
{
astc_helpers : : astc_block phys_blk ;
return astc_helpers : : pack_astc_block ( phys_blk , decomp_blk ) ;
}
# define SYNC_MARKERS (0)
static bool decode_file ( const uint8_vec & comp_data , vector2D < astc_helpers : : astc_block > & decoded_blocks , uint32_t & width , uint32_t & height )
{
interval_timer tm ;
tm . start ( ) ;
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 ;
width = 0 ;
height = 0 ;
if ( comp_data . size ( ) < = 2 * 3 )
return false ;
basist : : bitwise_decoder decoder ;
if ( ! decoder . init ( comp_data . data ( ) , comp_data . size_u32 ( ) ) )
return false ;
if ( decoder . get_bits ( 16 ) ! = 0xABCD )
return false ;
width = decoder . get_bits ( 16 ) ;
height = decoder . get_bits ( 16 ) ;
if ( ! width | | ! height | | ( width > MAX_ASTC_HDR_6X6_DIM ) | | ( height > MAX_ASTC_HDR_6X6_DIM ) )
return false ;
const uint32_t num_blocks_x = ( width + BLOCK_W - 1 ) / BLOCK_W ;
const uint32_t num_blocks_y = ( height + BLOCK_H - 1 ) / BLOCK_H ;
const uint32_t total_blocks = num_blocks_x * num_blocks_y ;
decoded_blocks . resize ( num_blocks_x , num_blocks_y ) ;
//memset(decoded_blocks.get_ptr(), 0, decoded_blocks.size_in_bytes());
vector2D < astc_helpers : : log_astc_block > decoded_log_blocks ( num_blocks_x , num_blocks_y ) ;
//memset(decoded_log_blocks.get_ptr(), 0, decoded_log_blocks.size_in_bytes());
uint32_t cur_bx = 0 , cur_by = 0 ;
uint32_t step_counter = 0 ;
BASISU_NOTE_UNUSED ( step_counter ) ;
while ( cur_by < num_blocks_y )
{
step_counter + + ;
//if ((cur_bx == 9) && (cur_by == 13))
// printf("!");
# if SYNC_MARKERS
uint32_t mk = decoder . get_bits ( 16 ) ;
if ( mk ! = 0xDEAD )
{
printf ( " ! " ) ;
assert ( 0 ) ;
return false ;
}
# endif
if ( decoder . get_bits_remaining ( ) < 1 )
return false ;
encoding_type et = encoding_type : : cBlock ;
uint32_t b0 = decoder . get_bits ( 1 ) ;
if ( ! b0 )
{
uint32_t b1 = decoder . get_bits ( 1 ) ;
if ( b1 )
et = encoding_type : : cReuse ;
else
{
uint32_t b2 = decoder . get_bits ( 1 ) ;
if ( b2 )
et = encoding_type : : cSolid ;
else
et = encoding_type : : cRun ;
}
}
switch ( et )
{
case encoding_type : : cRun :
{
if ( ! cur_bx & & ! cur_by )
return false ;
const uint32_t run_len = decoder . decode_vlc ( 5 ) + 1 ;
uint32_t num_blocks_remaining = total_blocks - ( cur_bx + cur_by * num_blocks_x ) ;
if ( run_len > num_blocks_remaining )
return false ;
uint32_t prev_bx = cur_bx , prev_by = cur_by ;
if ( cur_bx )
prev_bx - - ;
else
{
prev_bx = num_blocks_x - 1 ;
prev_by - - ;
}
const astc_helpers : : log_astc_block & prev_log_blk = decoded_log_blocks ( prev_bx , prev_by ) ;
const astc_helpers : : astc_block & prev_phys_blk = decoded_blocks ( prev_bx , prev_by ) ;
for ( uint32_t i = 0 ; i < run_len ; i + + )
{
decoded_log_blocks ( cur_bx , cur_by ) = prev_log_blk ;
decoded_blocks ( cur_bx , cur_by ) = prev_phys_blk ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
}
break ;
}
case encoding_type : : cSolid :
{
const basist : : half_float rh = ( basist : : half_float ) decoder . get_bits ( 15 ) ;
const basist : : half_float gh = ( basist : : half_float ) decoder . get_bits ( 15 ) ;
const basist : : half_float bh = ( basist : : half_float ) decoder . get_bits ( 15 ) ;
astc_helpers : : log_astc_block & log_blk = decoded_log_blocks ( cur_bx , cur_by ) ;
log_blk . clear ( ) ;
log_blk . m_solid_color_flag_hdr = true ;
log_blk . m_solid_color [ 0 ] = rh ;
log_blk . m_solid_color [ 1 ] = gh ;
log_blk . m_solid_color [ 2 ] = bh ;
log_blk . m_solid_color [ 3 ] = basist : : float_to_half ( 1.0f ) ;
bool status = astc_helpers : : pack_astc_block ( decoded_blocks ( cur_bx , cur_by ) , log_blk ) ;
if ( ! status )
return false ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
break ;
}
case encoding_type : : cReuse :
{
if ( ! cur_bx & & ! cur_by )
return false ;
const uint32_t reuse_delta_index = decoder . get_bits ( REUSE_XY_DELTA_BITS ) ;
const int reuse_delta_x = g_reuse_xy_deltas [ reuse_delta_index ] . m_x ;
const int reuse_delta_y = g_reuse_xy_deltas [ reuse_delta_index ] . m_y ;
const int prev_bx = cur_bx + reuse_delta_x , prev_by = cur_by + reuse_delta_y ;
if ( ( prev_bx < 0 ) | | ( prev_bx > = ( int ) num_blocks_x ) )
return false ;
if ( prev_by < 0 )
return false ;
const astc_helpers : : log_astc_block & prev_log_blk = decoded_log_blocks ( prev_bx , prev_by ) ;
const astc_helpers : : astc_block & prev_phys_blk = decoded_blocks ( prev_bx , prev_by ) ;
if ( prev_log_blk . m_solid_color_flag_hdr )
return false ;
astc_helpers : : log_astc_block & log_blk = decoded_log_blocks ( cur_bx , cur_by ) ;
astc_helpers : : astc_block & phys_blk = decoded_blocks ( cur_bx , cur_by ) ;
log_blk = prev_log_blk ;
const uint32_t total_grid_weights = log_blk . m_grid_width * log_blk . m_grid_height * ( log_blk . m_dual_plane ? 2 : 1 ) ;
bool status = basist : : astc_6x6_hdr : : decode_values ( decoder , total_grid_weights , log_blk . m_weight_ise_range , log_blk . m_weights ) ;
if ( ! status )
return false ;
astc_helpers : : log_astc_block decomp_blk ;
status = astc_helpers : : unpack_block ( & prev_phys_blk , decomp_blk , BLOCK_W , BLOCK_H ) ;
if ( ! status )
return false ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( total_grid_weights , log_blk . m_weights , log_blk . m_weight_ise_range , transcode_weights , decomp_blk . m_weight_ise_range ) ;
copy_weight_grid ( log_blk . m_dual_plane , log_blk . m_grid_width , log_blk . m_grid_height , transcode_weights , decomp_blk ) ;
status = astc_helpers : : pack_astc_block ( phys_blk , decomp_blk ) ;
if ( ! status )
return false ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
break ;
}
case encoding_type : : cBlock :
{
const block_mode bm = ( block_mode ) decoder . decode_truncated_binary ( ( uint32_t ) block_mode : : cBMTotalModes ) ;
const endpoint_mode em = ( endpoint_mode ) decoder . decode_truncated_binary ( ( uint32_t ) endpoint_mode : : cTotal ) ;
switch ( em )
{
case endpoint_mode : : cUseLeft :
case endpoint_mode : : cUseUpper :
{
int neighbor_bx = cur_bx , neighbor_by = cur_by ;
if ( em = = endpoint_mode : : cUseLeft )
neighbor_bx - - ;
else
neighbor_by - - ;
if ( ( neighbor_bx < 0 ) | | ( neighbor_by < 0 ) )
return false ;
const astc_helpers : : log_astc_block & neighbor_blk = decoded_log_blocks ( neighbor_bx , neighbor_by ) ;
if ( ! neighbor_blk . m_color_endpoint_modes [ 0 ] )
return false ;
const block_mode_desc & bmd = g_block_mode_descs [ ( uint32_t ) bm ] ;
const uint32_t num_endpoint_values = get_num_endpoint_vals ( bmd . m_cem ) ;
if ( bmd . m_cem ! = neighbor_blk . m_color_endpoint_modes [ 0 ] )
return false ;
astc_helpers : : log_astc_block & log_blk = decoded_log_blocks ( cur_bx , cur_by ) ;
astc_helpers : : astc_block & phys_blk = decoded_blocks ( cur_bx , cur_by ) ;
log_blk . clear ( ) ;
log_blk . m_num_partitions = 1 ;
log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) bmd . m_cem ;
log_blk . m_endpoint_ise_range = neighbor_blk . m_endpoint_ise_range ;
log_blk . m_weight_ise_range = ( uint8_t ) bmd . m_weight_ise_range ;
log_blk . m_grid_width = ( uint8_t ) bmd . m_grid_x ;
log_blk . m_grid_height = ( uint8_t ) bmd . m_grid_y ;
log_blk . m_dual_plane = ( uint8_t ) bmd . m_dp ;
log_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
memcpy ( log_blk . m_endpoints , neighbor_blk . m_endpoints , num_endpoint_values ) ;
const uint32_t total_grid_weights = bmd . m_grid_x * bmd . m_grid_y * ( bmd . m_dp ? 2 : 1 ) ;
bool status = decode_values ( decoder , total_grid_weights , bmd . m_weight_ise_range , log_blk . m_weights ) ;
if ( ! status )
return false ;
astc_helpers : : log_astc_block decomp_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_num_partitions = 1 ;
decomp_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) bmd . m_cem ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) bmd . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) bmd . m_transcode_weight_ise_range ;
decomp_blk . m_dual_plane = bmd . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( bmd . m_cem , log_blk . m_endpoint_ise_range , log_blk . m_endpoints , bmd . m_transcode_endpoint_ise_range , decomp_blk . m_endpoints ) ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( total_grid_weights , log_blk . m_weights , bmd . m_weight_ise_range , transcode_weights , bmd . m_transcode_weight_ise_range ) ;
copy_weight_grid ( bmd . m_dp , bmd . m_grid_x , bmd . m_grid_y , transcode_weights , decomp_blk ) ;
status = astc_helpers : : pack_astc_block ( phys_blk , decomp_blk ) ;
if ( ! status )
return false ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
break ;
}
case endpoint_mode : : cUseLeftDelta :
case endpoint_mode : : cUseUpperDelta :
{
int neighbor_bx = cur_bx , neighbor_by = cur_by ;
if ( em = = endpoint_mode : : cUseLeftDelta )
neighbor_bx - - ;
else
neighbor_by - - ;
if ( ( neighbor_bx < 0 ) | | ( neighbor_by < 0 ) )
return false ;
const astc_helpers : : log_astc_block & neighbor_blk = decoded_log_blocks ( neighbor_bx , neighbor_by ) ;
if ( ! neighbor_blk . m_color_endpoint_modes [ 0 ] )
return false ;
const block_mode_desc & bmd = g_block_mode_descs [ ( uint32_t ) bm ] ;
const uint32_t num_endpoint_values = get_num_endpoint_vals ( bmd . m_cem ) ;
if ( bmd . m_cem ! = neighbor_blk . m_color_endpoint_modes [ 0 ] )
return false ;
astc_helpers : : log_astc_block & log_blk = decoded_log_blocks ( cur_bx , cur_by ) ;
astc_helpers : : astc_block & phys_blk = decoded_blocks ( cur_bx , cur_by ) ;
log_blk . clear ( ) ;
log_blk . m_num_partitions = 1 ;
log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) bmd . m_cem ;
log_blk . m_dual_plane = bmd . m_dp ;
log_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
log_blk . m_endpoint_ise_range = ( uint8_t ) bmd . m_endpoint_ise_range ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( bmd . m_cem , neighbor_blk . m_endpoint_ise_range , neighbor_blk . m_endpoints , bmd . m_endpoint_ise_range , log_blk . m_endpoints ) ;
const int total_endpoint_delta_vals = 1 < < NUM_ENDPOINT_DELTA_BITS ;
const int low_delta_limit = - ( total_endpoint_delta_vals / 2 ) ; // high_delta_limit = (total_endpoint_delta_vals / 2) - 1;
const auto & ise_to_rank = astc_helpers : : g_dequant_tables . get_endpoint_tab ( log_blk . m_endpoint_ise_range ) . m_ISE_to_rank ;
const auto & rank_to_ise = astc_helpers : : g_dequant_tables . get_endpoint_tab ( log_blk . m_endpoint_ise_range ) . m_rank_to_ISE ;
const int total_endpoint_levels = astc_helpers : : get_ise_levels ( log_blk . m_endpoint_ise_range ) ;
for ( uint32_t i = 0 ; i < num_endpoint_values ; i + + )
{
int cur_val = ise_to_rank [ log_blk . m_endpoints [ i ] ] ;
int delta = ( int ) decoder . get_bits ( NUM_ENDPOINT_DELTA_BITS ) + low_delta_limit ;
cur_val + = delta ;
if ( ( cur_val < 0 ) | | ( cur_val > = total_endpoint_levels ) )
return false ;
log_blk . m_endpoints [ i ] = rank_to_ise [ cur_val ] ;
}
log_blk . m_weight_ise_range = ( uint8_t ) bmd . m_weight_ise_range ;
log_blk . m_grid_width = ( uint8_t ) bmd . m_grid_x ;
log_blk . m_grid_height = ( uint8_t ) bmd . m_grid_y ;
const uint32_t total_grid_weights = bmd . m_grid_x * bmd . m_grid_y * ( bmd . m_dp ? 2 : 1 ) ;
bool status = decode_values ( decoder , total_grid_weights , bmd . m_weight_ise_range , log_blk . m_weights ) ;
if ( ! status )
return false ;
astc_helpers : : log_astc_block decomp_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_num_partitions = 1 ;
decomp_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) bmd . m_cem ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) bmd . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) bmd . m_transcode_weight_ise_range ;
decomp_blk . m_dual_plane = ( uint8_t ) bmd . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( bmd . m_cem , log_blk . m_endpoint_ise_range , log_blk . m_endpoints , bmd . m_transcode_endpoint_ise_range , decomp_blk . m_endpoints ) ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( total_grid_weights , log_blk . m_weights , bmd . m_weight_ise_range , transcode_weights , bmd . m_transcode_weight_ise_range ) ;
copy_weight_grid ( bmd . m_dp , bmd . m_grid_x , bmd . m_grid_y , transcode_weights , decomp_blk ) ;
status = astc_helpers : : pack_astc_block ( phys_blk , decomp_blk ) ;
if ( ! status )
return false ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
break ;
}
case endpoint_mode : : cRaw :
{
const block_mode_desc & bmd = g_block_mode_descs [ ( uint32_t ) bm ] ;
const uint32_t num_endpoint_values = get_num_endpoint_vals ( bmd . m_cem ) ;
astc_helpers : : log_astc_block & log_blk = decoded_log_blocks ( cur_bx , cur_by ) ;
astc_helpers : : astc_block & phys_blk = decoded_blocks ( cur_bx , cur_by ) ;
log_blk . clear ( ) ;
log_blk . m_num_partitions = ( uint8_t ) bmd . m_num_partitions ;
for ( uint32_t p = 0 ; p < bmd . m_num_partitions ; p + + )
log_blk . m_color_endpoint_modes [ p ] = ( uint8_t ) bmd . m_cem ;
log_blk . m_endpoint_ise_range = ( uint8_t ) bmd . m_endpoint_ise_range ;
log_blk . m_weight_ise_range = ( uint8_t ) bmd . m_weight_ise_range ;
log_blk . m_grid_width = ( uint8_t ) bmd . m_grid_x ;
log_blk . m_grid_height = ( uint8_t ) bmd . m_grid_y ;
log_blk . m_dual_plane = ( uint8_t ) bmd . m_dp ;
log_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
if ( bmd . m_num_partitions = = 2 )
{
const uint32_t unique_partition_index = decoder . decode_truncated_binary ( NUM_UNIQUE_PARTITIONS2 ) ;
log_blk . m_partition_id = ( uint16_t ) g_part2_unique_index_to_seed [ unique_partition_index ] ;
}
else if ( bmd . m_num_partitions = = 3 )
{
const uint32_t unique_partition_index = decoder . decode_truncated_binary ( NUM_UNIQUE_PARTITIONS3 ) ;
log_blk . m_partition_id = ( uint16_t ) g_part3_unique_index_to_seed [ unique_partition_index ] ;
}
bool status = decode_values ( decoder , num_endpoint_values * bmd . m_num_partitions , bmd . m_endpoint_ise_range , log_blk . m_endpoints ) ;
if ( ! status )
return false ;
const uint32_t total_grid_weights = bmd . m_grid_x * bmd . m_grid_y * ( bmd . m_dp ? 2 : 1 ) ;
status = decode_values ( decoder , total_grid_weights , bmd . m_weight_ise_range , log_blk . m_weights ) ;
if ( ! status )
return false ;
astc_helpers : : log_astc_block decomp_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_dual_plane = bmd . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) bmd . m_dp_channel ;
decomp_blk . m_partition_id = log_blk . m_partition_id ;
decomp_blk . m_num_partitions = ( uint8_t ) bmd . m_num_partitions ;
for ( uint32_t p = 0 ; p < bmd . m_num_partitions ; p + + )
decomp_blk . m_color_endpoint_modes [ p ] = ( uint8_t ) bmd . m_cem ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) bmd . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) bmd . m_transcode_weight_ise_range ;
for ( uint32_t p = 0 ; p < bmd . m_num_partitions ; p + + )
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( bmd . m_cem , bmd . m_endpoint_ise_range , log_blk . m_endpoints + num_endpoint_values * p , bmd . m_transcode_endpoint_ise_range , decomp_blk . m_endpoints + num_endpoint_values * p ) ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( total_grid_weights , log_blk . m_weights , bmd . m_weight_ise_range , transcode_weights , bmd . m_transcode_weight_ise_range ) ;
copy_weight_grid ( bmd . m_dp , bmd . m_grid_x , bmd . m_grid_y , transcode_weights , decomp_blk ) ;
status = astc_helpers : : pack_astc_block ( phys_blk , decomp_blk ) ;
if ( ! status )
return false ;
cur_bx + + ;
if ( cur_bx = = num_blocks_x )
{
cur_bx = 0 ;
cur_by + + ;
}
break ;
}
default :
{
assert ( 0 ) ;
return false ;
}
}
break ;
}
default :
{
assert ( 0 ) ;
return false ;
}
}
}
if ( decoder . get_bits ( 16 ) ! = 0xA742 )
{
fmt_error_printf ( " End marker not found! \n " ) ;
return false ;
}
//fmt_printf("Total decode_file() time: {} secs\n", tm.get_elapsed_secs());
return true ;
}
static bool unpack_physical_astc_block ( const void * pBlock , uint32_t block_width , uint32_t block_height , vec4F * pPixels )
{
astc_helpers : : log_astc_block log_blk ;
if ( ! astc_helpers : : unpack_block ( pBlock , log_blk , block_width , block_height ) )
return false ;
basist : : half_float half_block [ MAX_BLOCK_W * MAX_BLOCK_H ] [ 4 ] ;
if ( ! astc_helpers : : decode_block ( log_blk , half_block , block_width , block_height , astc_helpers : : cDecodeModeHDR16 ) )
return false ;
const uint32_t total_block_pixels = block_width * block_height ;
for ( uint32_t p = 0 ; p < total_block_pixels ; p + + )
{
pPixels [ p ] [ 0 ] = basist : : half_to_float ( half_block [ p ] [ 0 ] ) ;
pPixels [ p ] [ 1 ] = basist : : half_to_float ( half_block [ p ] [ 1 ] ) ;
pPixels [ p ] [ 2 ] = basist : : half_to_float ( half_block [ p ] [ 2 ] ) ;
pPixels [ p ] [ 3 ] = basist : : half_to_float ( half_block [ p ] [ 3 ] ) ;
}
return true ;
}
static bool unpack_physical_astc_block_google ( const void * pBlock , uint32_t block_width , uint32_t block_height , vec4F * pPixels )
{
return basisu_astc : : astc : : decompress_hdr ( ( float * ) pPixels , ( uint8_t * ) pBlock , block_width , block_height ) ;
}
static bool pack_bc6h_image ( const imagef & src_img , vector2D < basist : : bc6h_block > & bc6h_blocks , imagef * pPacked_bc6h_img , const fast_bc6h_params & enc_params )
{
const uint32_t width = src_img . get_width ( ) ;
const uint32_t height = src_img . get_height ( ) ;
if ( pPacked_bc6h_img )
pPacked_bc6h_img - > resize ( width , height ) ;
interval_timer tm ;
double total_enc_time = 0.0f ;
2025-10-09 12:28:20 +02:00
BASISU_NOTE_UNUSED ( total_enc_time ) ;
2025-03-11 10:51:10 +01:00
const uint32_t num_blocks_x = src_img . get_block_width ( 4 ) ;
const uint32_t num_blocks_y = src_img . get_block_height ( 4 ) ;
bc6h_blocks . resize ( num_blocks_x , num_blocks_y ) ;
for ( uint32_t by = 0 ; by < num_blocks_y ; by + + )
{
for ( uint32_t bx = 0 ; bx < num_blocks_x ; bx + + )
{
// Extract source image block
vec4F block_pixels [ 4 ] [ 4 ] ; // [y][x]
src_img . extract_block_clamped ( & block_pixels [ 0 ] [ 0 ] , bx * 4 , by * 4 , 4 , 4 ) ;
basist : : half_float half_pixels [ 16 * 3 ] ; // [y][x]
for ( uint32_t y = 0 ; y < 4 ; y + + )
{
for ( uint32_t x = 0 ; x < 4 ; x + + )
{
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
float v = block_pixels [ y ] [ x ] [ c ] ;
basist : : half_float h = basist : : float_to_half ( v ) ;
half_pixels [ ( x + y * 4 ) * 3 + c ] = h ;
} // c
} // x
} // y
basist : : bc6h_block & bc6h_blk = bc6h_blocks ( bx , by ) ;
tm . start ( ) ;
basist : : astc_6x6_hdr : : fast_encode_bc6h ( half_pixels , & bc6h_blk , enc_params ) ;
total_enc_time + = tm . get_elapsed_secs ( ) ;
if ( pPacked_bc6h_img )
{
basist : : half_float unpacked_blk [ 16 * 3 ] ;
bool status = unpack_bc6h ( & bc6h_blk , unpacked_blk , false ) ;
assert ( status ) ;
if ( ! status )
{
fmt_error_printf ( " unpack_bc6h() failed \n " ) ;
return false ;
}
for ( uint32_t y = 0 ; y < 4 ; y + + )
{
for ( uint32_t x = 0 ; x < 4 ; x + + )
{
vec4F p ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
float v = basist : : half_to_float ( unpacked_blk [ ( x + y * 4 ) * 3 + c ] ) ;
p [ c ] = v ;
} // c
p [ 3 ] = 1.0f ;
pPacked_bc6h_img - > set_clipped ( bx * 4 + x , by * 4 + y , p ) ;
} // x
} // y
}
} // bx
} // by
//fmt_printf("Total BC6H encode time: {}\n", total_enc_time);
return true ;
}
static float dist_to_line_squared ( const vec3F & p , const vec3F & line_org , const vec3F & line_dir )
{
vec3F q ( p - line_org ) ;
vec3F v ( q - q . dot ( line_dir ) * line_dir ) ;
return v . dot ( v ) ;
}
static void estimate_partitions_mode7_and_11 (
uint32_t num_parts , // 2 or 3 partitions
uint32_t num_unique_pats , const partition_pattern_vec * pUnique_pats , // list of all unique, canonicalized patterns
uint32_t num_pats_to_examine , const uint32_t * pUnique_pat_indices_to_examine , // indices of pats to examine
const vec3F * pHalf_pixels_as_floats , // block's half pixel values casted to floats
const astc_hdr_codec_base_options & coptions , // options
uint32_t num_desired_pats ,
int * pDesired_pat_indices_mode11 , int * pDesired_pat_indices_mode7 ) // output indices
{
BASISU_NOTE_UNUSED ( coptions ) ;
BASISU_NOTE_UNUSED ( num_unique_pats ) ;
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 , MAX_PARTS = 3 ; // BLOCK_T = 6 * 6
assert ( num_parts < = MAX_PARTS ) ;
struct candidate_res
{
float m_total_sq_dist ;
uint32_t m_index ;
bool operator < ( const candidate_res & rhs ) const { return m_total_sq_dist < rhs . m_total_sq_dist ; }
} ;
const uint32_t MAX_CANDIDATES = 1024 ;
assert ( num_desired_pats & & ( num_desired_pats < = MAX_CANDIDATES ) ) ;
candidate_res mode11_candidates [ MAX_CANDIDATES ] ;
candidate_res mode7_candidates [ MAX_CANDIDATES ] ;
const vec3F grayscale_axis ( 0.5773502691f ) ;
for ( uint32_t examine_iter = 0 ; examine_iter < num_pats_to_examine ; examine_iter + + )
{
const uint32_t unique_part_index = pUnique_pat_indices_to_examine [ examine_iter ] ;
assert ( unique_part_index < num_unique_pats ) ;
const partition_pattern_vec * pPat = & pUnique_pats [ unique_part_index ] ;
vec3F part_means [ MAX_PARTS ] ;
uint32_t part_total_texels [ MAX_PARTS ] = { 0 } ;
for ( uint32_t i = 0 ; i < num_parts ; i + + )
part_means [ i ] . clear ( ) ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = ( * pPat ) ( x , y ) ;
assert ( part_index < num_parts ) ;
part_means [ part_index ] + = pHalf_pixels_as_floats [ x + y * BLOCK_W ] ;
part_total_texels [ part_index ] + + ;
} // x
} // y
for ( uint32_t i = 0 ; i < num_parts ; i + + )
{
assert ( part_total_texels [ i ] ) ;
part_means [ i ] / = ( float ) part_total_texels [ i ] ;
}
float part_cov [ MAX_PARTS ] [ 6 ] ;
memset ( part_cov , 0 , sizeof ( part_cov ) ) ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = ( * pPat ) ( x , y ) ;
assert ( part_index < num_parts ) ;
const vec3F p ( pHalf_pixels_as_floats [ x + y * BLOCK_W ] - part_means [ part_index ] ) ;
const float r = p [ 0 ] , g = p [ 1 ] , b = p [ 2 ] ;
part_cov [ part_index ] [ 0 ] + = r * r ;
part_cov [ part_index ] [ 1 ] + = r * g ;
part_cov [ part_index ] [ 2 ] + = r * b ;
part_cov [ part_index ] [ 3 ] + = g * g ;
part_cov [ part_index ] [ 4 ] + = g * b ;
part_cov [ part_index ] [ 5 ] + = b * b ;
} // x
} // y
// For each partition compute the total variance of all channels.
float total_variance [ MAX_PARTS ] ;
for ( uint32_t part_index = 0 ; part_index < num_parts ; part_index + + )
total_variance [ part_index ] = part_cov [ part_index ] [ 0 ] + part_cov [ part_index ] [ 3 ] + part_cov [ part_index ] [ 5 ] ;
vec3F part_axis [ MAX_PARTS ] ;
float mode11_eigenvalue_est [ MAX_PARTS ] ; // For each partition, compute the variance along the principle axis
float mode7_eigenvalue_est [ MAX_PARTS ] ; // For each partition, compute the variance along the principle axis
for ( uint32_t part_index = 0 ; part_index < num_parts ; part_index + + )
{
float * pCov = & part_cov [ part_index ] [ 0 ] ;
float xr = .9f , xg = 1.0f , xb = .7f ;
const uint32_t NUM_POWER_ITERS = 4 ;
for ( uint32_t iter = 0 ; iter < NUM_POWER_ITERS ; iter + + )
{
float r = xr * pCov [ 0 ] + xg * pCov [ 1 ] + xb * pCov [ 2 ] ;
float g = xr * pCov [ 1 ] + xg * pCov [ 3 ] + xb * pCov [ 4 ] ;
float b = xr * pCov [ 2 ] + xg * pCov [ 4 ] + xb * pCov [ 5 ] ;
float m = maximumf ( maximumf ( fabsf ( r ) , fabsf ( g ) ) , fabsf ( b ) ) ;
if ( m > = 1e-10 f )
{
m = 1.0f / m ;
r * = m ;
g * = m ;
b * = m ;
}
xr = r ;
xg = g ;
xb = b ;
}
float len_sq = xr * xr + xg * xg + xb * xb ;
if ( len_sq < 1e-10 f )
{
xr = grayscale_axis [ 0 ] ;
xg = grayscale_axis [ 0 ] ;
xb = grayscale_axis [ 0 ] ;
}
else
{
len_sq = 1.0f / sqrtf ( len_sq ) ;
xr * = len_sq ;
xg * = len_sq ;
xb * = len_sq ;
}
{
// Transform the principle axis by the covariance matrix, which will scale the vector by its eigenvalue (the variance of the dataset projected onto the principle axis).
float r = xr * pCov [ 0 ] + xg * pCov [ 1 ] + xb * pCov [ 2 ] ;
float g = xr * pCov [ 1 ] + xg * pCov [ 3 ] + xb * pCov [ 4 ] ;
float b = xr * pCov [ 2 ] + xg * pCov [ 4 ] + xb * pCov [ 5 ] ;
// Estimate the principle eigenvalue by computing the magnitude of the transformed vector.
// The result is the variance along the principle axis.
//float z1 = sqrtf(r * r + g * g + b * b); // this works with the principle axis
//float z2 = r * xr + g * xg + b * xb; // compute length projected along xr,xg,xb
mode11_eigenvalue_est [ part_index ] = r * xr + g * xg + b * xb ;
}
{
const float yrgb = grayscale_axis [ 0 ] ;
// Transform the grayscale axis by the covariance matrix, which will scale the vector by the eigenvalue (which is the variance of the dataset projected onto this vector).
float r = yrgb * pCov [ 0 ] + yrgb * pCov [ 1 ] + yrgb * pCov [ 2 ] ;
float g = yrgb * pCov [ 1 ] + yrgb * pCov [ 3 ] + yrgb * pCov [ 4 ] ;
float b = yrgb * pCov [ 2 ] + yrgb * pCov [ 4 ] + yrgb * pCov [ 5 ] ;
mode7_eigenvalue_est [ part_index ] = r * yrgb + g * yrgb + b * yrgb ;
}
} // part_index
// Compute the total variance (squared error) of the other 2 axes by subtracting the total variance of all channels by the variance of the principle axis.
// TODO: Could also compute the ratio of the principle axis's variance vs. the total variance.
float mode11_total_sq_dist_to_line_alt = 0.0f ;
for ( uint32_t part_index = 0 ; part_index < num_parts ; part_index + + )
{
float d = maximum ( 0.0f , total_variance [ part_index ] - mode11_eigenvalue_est [ part_index ] ) ;
mode11_total_sq_dist_to_line_alt + = d ;
}
{
#if 0
// TODO: This total distance can be computed rapidly. First compute the total variance of each channel (sum the diag entries of the covar matrix),
// then compute the principle eigenvalue, and subtract. The result is the variance of the projection distances.
float total_sq_dist_to_line = 0.0f ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
const uint32_t part_index = ( * pPat ) [ i ] ;
assert ( part_index < num_parts ) ;
total_sq_dist_to_line + = dist_to_line_squared ( pHalf_pixels_as_floats [ i ] , part_means [ part_index ] , part_axis [ part_index ] ) ;
}
mode11_candidates [ examine_iter ] . m_total_sq_dist = total_sq_dist_to_line ;
# else
mode11_candidates [ examine_iter ] . m_total_sq_dist = mode11_total_sq_dist_to_line_alt ;
# endif
mode11_candidates [ examine_iter ] . m_index = unique_part_index ;
}
{
float mode7_total_sq_dist_to_line_alt = 0.0f ;
for ( uint32_t part_index = 0 ; part_index < num_parts ; part_index + + )
{
float d = maximum ( 0.0f , total_variance [ part_index ] - mode7_eigenvalue_est [ part_index ] ) ;
mode7_total_sq_dist_to_line_alt + = d ;
}
mode7_candidates [ examine_iter ] . m_total_sq_dist = mode7_total_sq_dist_to_line_alt ;
mode7_candidates [ examine_iter ] . m_index = unique_part_index ;
}
} // examine_iter
std : : sort ( & mode11_candidates [ 0 ] , & mode11_candidates [ num_pats_to_examine ] ) ;
std : : sort ( & mode7_candidates [ 0 ] , & mode7_candidates [ num_pats_to_examine ] ) ;
for ( uint32_t i = 0 ; i < num_desired_pats ; i + + )
pDesired_pat_indices_mode11 [ i ] = mode11_candidates [ i ] . m_index ;
for ( uint32_t i = 0 ; i < num_desired_pats ; i + + )
pDesired_pat_indices_mode7 [ i ] = mode7_candidates [ i ] . m_index ;
}
static void estimate_partitions_mode7 (
uint32_t num_parts , // 2 or 3 partitions
uint32_t num_unique_pats , const partition_pattern_vec * pUnique_pats , // list of all unique, canonicalized patterns
uint32_t num_pats_to_examine , const uint32_t * pUnique_pat_indices_to_examine , // indices of pats to examine
const vec3F * pHalf_pixels_as_floats , // block's half pixel values casted to floats
const astc_hdr_codec_base_options & coptions , // options
uint32_t num_desired_pats , uint32_t * pDesired_pat_indices ) // output indices
{
BASISU_NOTE_UNUSED ( coptions ) ;
BASISU_NOTE_UNUSED ( num_unique_pats ) ;
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 , BLOCK_T = 6 * 6 , MAX_PARTS = 3 ;
assert ( num_parts < = MAX_PARTS ) ;
struct candidate_res
{
float m_total_sq_dist ;
uint32_t m_index ;
bool operator < ( const candidate_res & rhs ) const { return m_total_sq_dist < rhs . m_total_sq_dist ; }
} ;
const uint32_t MAX_CANDIDATES = 1024 ;
assert ( num_desired_pats & & ( num_desired_pats < = MAX_CANDIDATES ) ) ;
candidate_res candidates [ MAX_CANDIDATES ] ;
for ( uint32_t examine_iter = 0 ; examine_iter < num_pats_to_examine ; examine_iter + + )
{
const uint32_t unique_part_index = pUnique_pat_indices_to_examine [ examine_iter ] ;
assert ( unique_part_index < num_unique_pats ) ;
const partition_pattern_vec * pPat = & pUnique_pats [ unique_part_index ] ;
vec3F part_means [ MAX_PARTS ] ;
uint32_t part_total_texels [ MAX_PARTS ] = { 0 } ;
for ( uint32_t i = 0 ; i < num_parts ; i + + )
part_means [ i ] . clear ( ) ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = ( * pPat ) ( x , y ) ;
assert ( part_index < num_parts ) ;
part_means [ part_index ] + = pHalf_pixels_as_floats [ x + y * BLOCK_W ] ;
part_total_texels [ part_index ] + + ;
} // x
} // y
for ( uint32_t i = 0 ; i < num_parts ; i + + )
{
assert ( part_total_texels [ i ] ) ;
part_means [ i ] / = ( float ) part_total_texels [ i ] ;
}
vec3F part_axis ( 0.5773502691f ) ;
// TODO: This total distance can be computed rapidly. First compute the total variance of each channel (sum the diag entries of the covar matrix),
// then compute the principle eigenvalue, and subtract. The result is the variance of the projection distances.
float total_sq_dist_to_line = 0.0f ;
for ( uint32_t i = 0 ; i < BLOCK_T ; i + + )
{
const uint32_t part_index = ( * pPat ) [ i ] ;
assert ( part_index < num_parts ) ;
total_sq_dist_to_line + = dist_to_line_squared ( pHalf_pixels_as_floats [ i ] , part_means [ part_index ] , part_axis ) ;
}
candidates [ examine_iter ] . m_total_sq_dist = total_sq_dist_to_line ;
candidates [ examine_iter ] . m_index = unique_part_index ;
} // examine_iter
std : : sort ( & candidates [ 0 ] , & candidates [ num_pats_to_examine ] ) ;
for ( uint32_t i = 0 ; i < num_desired_pats ; i + + )
pDesired_pat_indices [ i ] = candidates [ i ] . m_index ;
}
static float calc_deblocking_penalty_itp (
uint32_t bx , uint32_t by , uint32_t width , uint32_t height ,
const imagef & pass_src_img_itp , const candidate_encoding & candidate )
{
float total_deblock_penalty = 0.0f ;
float total_orig_mse = 0.0f , total_comp_mse = 0.0f ;
uint32_t total_c = 0 ;
for ( uint32_t b = 0 ; b < 4 ; b + + )
{
for ( uint32_t i = 0 ; i < 6 ; i + + )
{
int ox = 0 , oy = 0 , qx = 0 , qy = 0 ;
switch ( b )
{
case 0 :
ox = bx * 6 + i ; oy = ( by - 1 ) * 6 + 5 ;
qx = bx * 6 + i ; qy = by * 6 ;
break ;
case 1 :
ox = bx * 6 + i ; oy = ( by + 1 ) * 6 ;
qx = bx * 6 + i ; qy = by * 6 + 5 ;
break ;
case 2 :
ox = ( bx - 1 ) * 6 + 5 ; oy = by * 6 + i ;
qx = bx * 6 ; qy = by * 6 + i ;
break ;
case 3 :
ox = ( bx + 1 ) * 6 ; oy = by * 6 + i ;
qx = bx * 6 + 5 ; qy = by * 6 + i ;
break ;
}
if ( ( ox < 0 ) | | ( oy < 0 ) | | ( ox > = ( int ) width ) | | ( oy > = ( int ) height ) )
continue ;
const vec3F & o_pixel_itp = pass_src_img_itp ( ox , oy ) ;
const vec3F & q_pixel_itp = pass_src_img_itp ( qx , qy ) ;
const vec3F & d_pixel_itp = candidate . m_comp_pixels_itp [ qy - by * 6 ] [ qx - bx * 6 ] ; // compressed block
vec3F orig_delta_v ( o_pixel_itp - q_pixel_itp ) ;
total_orig_mse + = square ( orig_delta_v [ 0 ] ) + square ( orig_delta_v [ 1 ] ) + square ( orig_delta_v [ 2 ] ) ;
vec3F d_delta_v ( o_pixel_itp - d_pixel_itp ) ;
total_comp_mse + = square ( d_delta_v [ 0 ] ) + square ( d_delta_v [ 1 ] ) + square ( d_delta_v [ 2 ] ) ;
total_c + + ;
}
}
if ( total_c )
{
total_orig_mse / = ( float ) total_c ;
total_comp_mse / = ( float ) total_c ;
if ( total_orig_mse )
{
total_deblock_penalty = fabsf ( ( total_comp_mse - total_orig_mse ) / total_orig_mse ) ;
}
}
return total_deblock_penalty ;
}
static bool calc_strip_size (
float lambda ,
uint32_t num_blocks_y , uint32_t total_threads , bool force_one_strip ,
uint32_t & res_total_strips , uint32_t & res_rows_per_strip , astc_hdr_6x6_global_config & global_cfg )
{
uint32_t total_strips = 1 ;
if ( lambda = = 0.0f )
{
if ( ! force_one_strip )
{
total_strips = total_threads ;
}
}
else
{
const uint32_t MIN_DESIRED_STRIPS = 8 ;
const uint32_t MAX_TARGET_STRIPS = 32 ;
const uint32_t TARGET_ASTC_6X6_ROWS_PER_STRIP = 12 ;
if ( ! force_one_strip )
{
total_strips = maximum < uint32_t > ( 1 , num_blocks_y / TARGET_ASTC_6X6_ROWS_PER_STRIP ) ;
if ( num_blocks_y > = MIN_DESIRED_STRIPS * 2 )
total_strips = maximum ( total_strips , MIN_DESIRED_STRIPS ) ;
}
total_strips = minimum ( total_strips , MAX_TARGET_STRIPS ) ;
}
uint32_t rows_per_strip = 0 ;
if ( total_strips < = 1 )
{
rows_per_strip = num_blocks_y ;
}
else
{
rows_per_strip = ( num_blocks_y / total_strips ) & ~ 1 ;
if ( rows_per_strip < 2 )
rows_per_strip = 2 ; // num_blocks_y;
}
assert ( ( rows_per_strip = = num_blocks_y ) | | ( ( rows_per_strip & 1 ) = = 0 ) ) ;
total_strips = ( num_blocks_y + rows_per_strip - 1 ) / rows_per_strip ;
if ( global_cfg . m_debug_output )
{
fmt_printf ( " num_blocks_y: {}, total_threads : {}, Total strips : {} \n " , num_blocks_y , total_threads , total_strips ) ;
fmt_printf ( " ASTC 6x6 block rows per strip: {} \n " , rows_per_strip ) ;
fmt_printf ( " ASTC 6x6 block rows on final strip: {} \n " , num_blocks_y - ( total_strips - 1 ) * rows_per_strip ) ;
}
uint32_t total_rows = 0 ;
for ( uint32_t strip_index = 0 ; strip_index < total_strips ; strip_index + + )
{
uint32_t strip_first_by = strip_index * rows_per_strip ;
uint32_t strip_last_by = minimum < uint32_t > ( strip_first_by + rows_per_strip - 1 , num_blocks_y ) ;
if ( strip_index = = ( total_strips - 1 ) )
strip_last_by = num_blocks_y - 1 ;
uint32_t num_strip_block_rows = ( strip_last_by - strip_first_by ) + 1 ;
total_rows + = num_strip_block_rows ;
if ( global_cfg . m_debug_output )
fmt_printf ( " Strip row: {}, total block rows: {} \n " , strip_index , num_strip_block_rows ) ;
}
if ( total_rows ! = num_blocks_y )
{
fmt_error_printf ( " Strip calc failed \n " ) ;
return false ;
}
res_total_strips = total_strips ;
res_rows_per_strip = rows_per_strip ;
return true ;
}
static void convet_rgb_image_to_itp ( const imagef & src_img , imagef & dst_img , const astc_hdr_6x6_global_config & cfg )
{
const uint32_t width = src_img . get_width ( ) , height = src_img . get_height ( ) ;
dst_img . resize ( width , height ) ;
for ( uint32_t y = 0 ; y < height ; y + + )
{
for ( uint32_t x = 0 ; x < width ; x + + )
{
vec3F src_rgb ( src_img ( x , y ) ) ;
vec3F src_itp ;
linear_rgb_to_itp ( src_rgb , src_itp , cfg ) ;
dst_img ( x , y ) = src_itp ;
}
}
}
const uint32_t BLOCK_W = 6 , BLOCK_H = 6 ;
const uint32_t NUM_BLOCK_PIXELS = BLOCK_W * BLOCK_H ;
const float SOLID_PENALTY = 4.0f ;
const float REUSE_PENALTY = 1.0f ;
const float RUN_PENALTY = 10.0f ;
const float MSE_WEIGHT = 300000.0f ;
const float SSIM_WEIGHT = 200.0f ;
const float TWO_LEVEL_PENALTY = 1.425f ;
const float SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM = .04f ;
const float SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM = .04f ;
const float COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY = 1.5f ;
const float COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY = 1.25f ;
const float COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY = 1.15f ;
struct uastc_hdr_6x6_debug_state
{
uint32_t m_encoding_type_hist [ ( uint32_t ) encoding_type : : cTotal ] = { 0 } ;
uint32_t m_endpoint_mode_hist [ ( uint32_t ) endpoint_mode : : cTotal ] = { 0 } ;
uint32_t m_block_mode_hist [ ( uint32_t ) block_mode : : cBMTotalModes ] = { 0 } ;
uint64_t m_block_mode_total_bits [ ( uint32_t ) block_mode : : cBMTotalModes ] = { 0 } ;
basisu : : vector < basisu : : stats < float > > m_block_mode_comp_stats [ ( uint32_t ) block_mode : : cBMTotalModes ] [ 3 ] ;
basisu : : vector < basisu : : comparative_stats < float > > m_block_mode_comparative_stats [ ( uint32_t ) block_mode : : cBMTotalModes ] [ 3 ] ;
std : : atomic < uint32_t > m_total_gaussian1_blocks ;
std : : atomic < uint32_t > m_total_gaussian2_blocks ;
std : : atomic < uint32_t > m_total_filter_horizontal ;
std : : atomic < uint32_t > m_detail_stats [ 5 ] ;
std : : atomic < uint32_t > m_total_mode7_skips ;
std : : atomic < uint32_t > m_total_blocks_compressed ;
std : : atomic < uint32_t > m_total_candidates_considered ;
std : : atomic < uint32_t > m_max_candidates_considered ;
std : : atomic < uint32_t > m_total_part2_stats [ 4 ] ;
std : : atomic < uint32_t > m_dp_stats [ 5 ] ;
std : : atomic < uint32_t > m_reuse_num_parts [ 4 ] ;
std : : atomic < uint32_t > m_reuse_total_dp ;
imagef m_stat_vis ;
std : : mutex m_stat_vis_mutex ;
image m_part_vis ;
image m_mode_vis ;
image m_mode_vis2 ;
image m_grid_vis ;
image m_enc_vis ;
std : : mutex m_vis_image_mutex ;
std : : atomic < uint32_t > m_comp_level_hist [ ASTC_HDR_6X6_MAX_COMP_LEVEL + 1 ] ;
std : : atomic < uint32_t > m_total_jnd_replacements ;
std : : mutex m_stats_mutex ;
uastc_hdr_6x6_debug_state ( )
{
for ( uint32_t i = 0 ; i < ( uint32_t ) block_mode : : cBMTotalModes ; i + + )
{
for ( uint32_t j = 0 ; j < 3 ; j + + )
{
m_block_mode_comp_stats [ i ] [ j ] . reserve ( 512 ) ;
m_block_mode_comparative_stats [ i ] [ j ] . reserve ( 512 ) ;
}
}
}
void init ( uint32_t width , uint32_t height )
{
m_stat_vis . resize ( width , height ) ;
m_part_vis . resize ( width , height ) ;
m_mode_vis . resize ( width , height ) ;
m_mode_vis2 . resize ( width , height ) ;
m_grid_vis . resize ( width , height ) ;
m_enc_vis . resize ( width , height ) ;
basisu : : clear_obj ( m_encoding_type_hist ) ;
basisu : : clear_obj ( m_endpoint_mode_hist ) ;
basisu : : clear_obj ( m_block_mode_hist ) ;
basisu : : clear_obj ( m_block_mode_total_bits ) ;
for ( uint32_t i = 0 ; i < ( uint32_t ) block_mode : : cBMTotalModes ; i + + )
{
for ( uint32_t j = 0 ; j < 3 ; j + + )
{
m_block_mode_comp_stats [ i ] [ j ] . clear ( ) ;
m_block_mode_comparative_stats [ i ] [ j ] . clear ( ) ;
}
}
m_total_gaussian1_blocks . store ( 0 ) ;
m_total_gaussian2_blocks . store ( 0 ) ;
m_total_filter_horizontal . store ( 0 ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_detail_stats ) ; i + + )
m_detail_stats [ i ] . store ( 0 ) ;
m_total_mode7_skips . store ( 0 ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_comp_level_hist ) ; i + + )
m_comp_level_hist [ i ] . store ( 0 ) ;
m_total_blocks_compressed . store ( 0 ) ;
m_total_candidates_considered . store ( 0 ) ;
m_max_candidates_considered . store ( 0 ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_total_part2_stats ) ; i + + )
m_total_part2_stats [ i ] . store ( 0 ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_dp_stats ) ; i + + )
m_dp_stats [ i ] . store ( 0 ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_reuse_num_parts ) ; i + + )
m_reuse_num_parts [ i ] . store ( 0 ) ;
m_reuse_total_dp . store ( 0 ) ;
m_total_jnd_replacements . store ( 0 ) ;
}
void print ( uint32_t total_blocks ) const
{
fmt_printf ( " Total blocks: {} \n " , total_blocks ) ;
fmt_printf ( " Total JND replacements: {} {3.2}% \n " , m_total_jnd_replacements , ( float ) m_total_jnd_replacements * 100.0f / ( float ) total_blocks ) ;
fmt_printf ( " Comp level histogram: {} {} {} {} {} \n " , m_comp_level_hist [ 0 ] , m_comp_level_hist [ 1 ] , m_comp_level_hist [ 2 ] , m_comp_level_hist [ 3 ] , m_comp_level_hist [ 4 ] ) ;
fmt_printf ( " Total gaussian 1 blocks: {} {3.2}% \n " , m_total_gaussian1_blocks , ( float ) m_total_gaussian1_blocks * 100.0f / ( float ) total_blocks ) ;
fmt_printf ( " Total gaussian 2 blocks: {} {3.2}% \n " , m_total_gaussian2_blocks , ( float ) m_total_gaussian2_blocks * 100.0f / ( float ) total_blocks ) ;
fmt_printf ( " Total filter horizontal: {} {3.2}% \n " , m_total_filter_horizontal , ( float ) m_total_filter_horizontal * 100.0f / ( float ) total_blocks ) ;
fmt_printf ( " Detail stats: Detailed block low grid skip: {}, Blurry block skip: {}, Very blurry block skip: {}, NH:{} H:{} \n " , m_detail_stats [ 0 ] , m_detail_stats [ 1 ] , m_detail_stats [ 2 ] , m_detail_stats [ 3 ] , m_detail_stats [ 4 ] ) ;
fmt_printf ( " Total mode7 skips: {} \n " , m_total_mode7_skips ) ;
fmt_printf ( " Total candidates: {}, {} avg per block \n " , m_total_candidates_considered , ( float ) m_total_candidates_considered / ( float ) total_blocks ) ;
fmt_printf ( " Max ever candidates: {} \n " , m_max_candidates_considered ) ;
fmt_printf ( " Part2/3 stats: {} {} {} {} \n " , m_total_part2_stats [ 0 ] , m_total_part2_stats [ 1 ] , m_total_part2_stats [ 2 ] , m_total_part2_stats [ 3 ] ) ;
fmt_printf ( " Dual plane stats: {} {} {} {} {} \n " , m_dp_stats [ 0 ] , m_dp_stats [ 1 ] , m_dp_stats [ 2 ] , m_dp_stats [ 3 ] , m_dp_stats [ 4 ] ) ;
fmt_printf ( " Reuse total dual plane: {} \n " , m_reuse_total_dp ) ;
fmt_printf ( " Reuse part stats: {} {} {} \n " , m_reuse_num_parts [ 1 ] , m_reuse_num_parts [ 2 ] , m_reuse_num_parts [ 3 ] ) ;
fmt_printf ( " \n Encoding type histogram: \n " ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_encoding_type_hist ) ; i + + )
fmt_printf ( " {}: {} \n " , i , m_encoding_type_hist [ i ] ) ;
fmt_printf ( " \n Endpoint mode histogram: \n " ) ;
for ( uint32_t i = 0 ; i < std : : size ( m_endpoint_mode_hist ) ; i + + )
fmt_printf ( " {}: {} \n " , i , m_endpoint_mode_hist [ i ] ) ;
fmt_printf ( " \n Block mode histogram: \n " ) ;
uint32_t total_dp = 0 , total_sp = 0 ;
uint32_t total_mode11 = 0 , total_mode7 = 0 ;
uint32_t part_hist [ 3 ] = { 0 } ;
uint32_t part2_mode7_total = 0 , part2_mode11_total = 0 ;
uint32_t total_used_modes = 0 ;
for ( uint32_t i = 0 ; i < std : : size ( m_block_mode_hist ) ; i + + )
{
const auto & bm_desc = g_block_mode_descs [ i ] ;
const uint32_t total_uses = m_block_mode_hist [ i ] ;
if ( bm_desc . m_dp )
total_dp + = total_uses ;
else
total_sp + = total_uses ;
if ( bm_desc . m_cem = = 7 )
total_mode7 + = total_uses ;
else
total_mode11 + = total_uses ;
part_hist [ bm_desc . m_num_partitions - 1 ] + = total_uses ;
if ( bm_desc . m_num_partitions = = 2 )
{
if ( bm_desc . m_cem = = 7 )
part2_mode7_total + = total_uses ;
else
{
assert ( bm_desc . m_cem = = 11 ) ;
part2_mode11_total + = total_uses ;
}
}
float avg_std_dev = 0.0f ;
float avg_cross_correlations [ 3 ] = { 0 } ;
if ( m_block_mode_comp_stats [ i ] [ 0 ] . size ( ) )
{
const uint32_t num_uses = m_block_mode_comp_stats [ i ] [ 0 ] . size_u32 ( ) ;
for ( uint32_t j = 0 ; j < num_uses ; j + + )
avg_std_dev + = ( float ) maximum ( m_block_mode_comp_stats [ i ] [ 0 ] [ j ] . m_std_dev , m_block_mode_comp_stats [ i ] [ 1 ] [ j ] . m_std_dev , m_block_mode_comp_stats [ i ] [ 2 ] [ j ] . m_std_dev ) ;
avg_std_dev / = ( float ) num_uses ;
for ( uint32_t j = 0 ; j < num_uses ; j + + )
{
avg_cross_correlations [ 0 ] + = fabsf ( ( float ) m_block_mode_comparative_stats [ i ] [ 0 ] [ j ] . m_pearson ) ;
avg_cross_correlations [ 1 ] + = fabsf ( ( float ) m_block_mode_comparative_stats [ i ] [ 1 ] [ j ] . m_pearson ) ;
avg_cross_correlations [ 2 ] + = fabsf ( ( float ) m_block_mode_comparative_stats [ i ] [ 2 ] [ j ] . m_pearson ) ;
}
avg_cross_correlations [ 0 ] / = ( float ) num_uses ;
avg_cross_correlations [ 1 ] / = ( float ) num_uses ;
avg_cross_correlations [ 2 ] / = ( float ) num_uses ;
}
fmt_printf ( " { 2}: uses: { 6}, cem: {}, dp: {} chan: {}, parts: {}, grid: {}x{}, endpoint levels: {}, weight levels: {}, Avg bits: {}, Avg Max Std Dev: {}, RG: {} RB: {} GB: {} \n " , i , total_uses ,
bm_desc . m_cem ,
bm_desc . m_dp , bm_desc . m_dp_channel ,
bm_desc . m_num_partitions ,
bm_desc . m_grid_x , bm_desc . m_grid_y ,
astc_helpers : : get_ise_levels ( bm_desc . m_endpoint_ise_range ) ,
astc_helpers : : get_ise_levels ( bm_desc . m_weight_ise_range ) ,
total_uses ? ( ( double ) m_block_mode_total_bits [ i ] / total_uses ) : 0.0f ,
avg_std_dev , avg_cross_correlations [ 0 ] , avg_cross_correlations [ 1 ] , avg_cross_correlations [ 2 ] ) ;
if ( total_uses )
total_used_modes + + ;
}
fmt_printf ( " Total used modes: {} \n " , total_used_modes ) ;
fmt_printf ( " Total single plane: {}, total dual plane: {} \n " , total_sp , total_dp ) ;
fmt_printf ( " Total mode 11: {}, mode 7: {} \n " , total_mode11 , total_mode7 ) ;
fmt_printf ( " Partition histogram: {} {} {} \n " , part_hist [ 0 ] , part_hist [ 1 ] , part_hist [ 2 ] ) ;
fmt_printf ( " 2 subset mode 7 uses: {}, mode 11 uses: {} \n " , part2_mode7_total , part2_mode11_total ) ;
}
} ;
struct uastc_hdr_6x6_encode_state
{
astc_hdr_codec_base_options master_coptions ;
imagef src_img ;
imagef src_img_filtered1 ;
imagef src_img_filtered2 ;
imagef src_img_itp ;
imagef src_img_filtered1_itp ;
imagef src_img_filtered2_itp ;
vector2D < float > smooth_block_mse_scales ;
imagef packed_img ;
basisu : : vector < bitwise_coder > strip_bits ;
basisu : : vector2D < astc_helpers : : astc_block > final_astc_blocks ;
vector2D < candidate_encoding > coded_blocks ;
} ;
static bool compress_strip_task (
uint32_t strip_index , uint32_t total_strips , uint32_t strip_first_by , uint32_t strip_last_by ,
uint32_t num_blocks_x , uint32_t num_blocks_y , uint32_t total_blocks , uint32_t width , uint32_t height ,
astc_hdr_6x6_global_config & global_cfg , uastc_hdr_6x6_debug_state & debug_state , uastc_hdr_6x6_encode_state & enc_state )
{
BASISU_NOTE_UNUSED ( num_blocks_y ) ;
BASISU_NOTE_UNUSED ( total_strips ) ;
vec3F prev_comp_pixels [ BLOCK_H ] [ BLOCK_W ] ; // [y][x]
basisu : : clear_obj ( prev_comp_pixels ) ;
uint32_t prev_run_len = 0 ;
bitwise_coder prev_encoding ;
candidate_encoding prev_candidate_encoding ; // the previous candidate written, which may have been a run extension
candidate_encoding prev_non_run_candidate_encoding ; // the previous *non-run* candidate written
bitwise_coder & strip_coded_bits = enc_state . strip_bits [ strip_index ] ;
const uint32_t CANDIDATES_TO_RESERVE = 1536 ;
basisu : : vector < candidate_encoding > candidates ;
candidates . reserve ( CANDIDATES_TO_RESERVE ) ;
for ( uint32_t by = strip_first_by ; by < = strip_last_by ; by + + )
{
const bool has_upper_neighbor = by > strip_first_by ;
for ( uint32_t bx = 0 ; bx < num_blocks_x ; bx + + )
{
//if ((bx == 1) && (by == 2))
// basisu::fmt_printf("!");
for ( uint32_t outer_pass = 0 ; outer_pass < 3 ; outer_pass + + )
{
const bool has_left_neighbor = bx > 0 ;
//const bool has_prev = has_left_neighbor || has_upper_neighbor;
// Select either the original source image, or the Gaussian filtered version.
// From here the encoder *must* use these 2 sources.
const imagef & pass_src_img = ( outer_pass = = 2 ) ? enc_state . src_img_filtered2 :
( ( outer_pass = = 1 ) ? enc_state . src_img_filtered1 : enc_state . src_img ) ;
const imagef & pass_src_img_itp = ( outer_pass = = 2 ) ? enc_state . src_img_filtered2_itp :
( ( outer_pass = = 1 ) ? enc_state . src_img_filtered1_itp : enc_state . src_img_itp ) ;
// Extract source image block
vec4F block_pixels [ BLOCK_H ] [ BLOCK_W ] ; // [y][x]
pass_src_img . extract_block_clamped ( & block_pixels [ 0 ] [ 0 ] , bx * BLOCK_W , by * BLOCK_H , BLOCK_W , BLOCK_H ) ;
vec4F block_pixels_itp [ BLOCK_H ] [ BLOCK_W ] ; // [y][x]
pass_src_img_itp . extract_block_clamped ( & block_pixels_itp [ 0 ] [ 0 ] , bx * BLOCK_W , by * BLOCK_H , BLOCK_W , BLOCK_H ) ;
half_vec3 half_pixels [ BLOCK_H ] [ BLOCK_W ] ; // [y][x] half-float values
vec3F half_pixels_as_floats [ BLOCK_H ] [ BLOCK_W ] ; // [y][x] half float values, integer bits as floats
vec4F block_pixels_q16 [ BLOCK_H ] [ BLOCK_W ] ; // [y][x], q16 space for low-level ASTC encoding
vec3F block_pixels_as_itp [ BLOCK_H ] [ BLOCK_W ] ; // [y][x] input converted to itp space, for faster error calculations
bool is_grayscale = true ;
candidates . resize ( 0 ) ;
float block_ly = BIG_FLOAT_VAL , block_hy = 0.0f , block_avg_y = 0.0f ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
vec3F rgb_input ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
float v = block_pixels [ y ] [ x ] [ c ] ;
rgb_input [ c ] = v ;
const basist : : half_float h = basisu : : fast_float_to_half_no_clamp_neg_nan_or_inf ( v ) ;
assert ( h = = basist : : float_to_half ( v ) ) ;
half_pixels [ y ] [ x ] [ c ] = h ;
block_pixels_q16 [ y ] [ x ] [ c ] = ( float ) half_to_qlog16 ( h ) ;
half_pixels_as_floats [ y ] [ x ] [ c ] = ( float ) h ;
} // c
float py = rgb_input . dot ( vec3F ( REC_709_R , REC_709_G , REC_709_B ) ) ;
if ( py < block_ly )
block_ly = py ;
if ( py > block_hy )
block_hy = py ;
block_avg_y + = py ;
//linear_rgb_to_itp(rgb_input, block_pixels_as_itp[y][x]);
block_pixels_as_itp [ y ] [ x ] = block_pixels_itp [ y ] [ x ] ;
block_pixels_q16 [ y ] [ x ] [ 3 ] = 0.0f ;
if ( ( half_pixels [ y ] [ x ] [ 0 ] ! = half_pixels [ y ] [ x ] [ 1 ] ) | | ( half_pixels [ y ] [ x ] [ 0 ] ! = half_pixels [ y ] [ x ] [ 2 ] ) )
is_grayscale = false ;
} // x
} // y
block_avg_y * = ( 1.0f / ( float ) NUM_BLOCK_PIXELS ) ;
encode_astc_block_stats enc_block_stats ;
enc_block_stats . init ( NUM_BLOCK_PIXELS , & block_pixels_q16 [ 0 ] [ 0 ] ) ;
vec4F x_filtered [ 6 ] [ 6 ] , y_filtered [ 6 ] [ 6 ] ;
filter_block ( 3 , 6 , ( vec4F * ) block_pixels , ( vec4F * ) x_filtered ) ; // filter rows (horizontal)
filter_block ( 6 , 3 , ( vec4F * ) block_pixels , ( vec4F * ) y_filtered ) ; // filter cols (vertically)
const float filtered_x_err = diff_blocks ( ( vec4F * ) block_pixels , ( vec4F * ) x_filtered ) ;
const float filtered_y_err = diff_blocks ( ( vec4F * ) block_pixels , ( vec4F * ) y_filtered ) ;
const bool filter_horizontally = filtered_x_err < filtered_y_err ;
//const float block_mag_gradient_mag = block_max_gradient_mag(bx, by);
if ( filter_horizontally )
debug_state . m_total_filter_horizontal . fetch_add ( 1 , std : : memory_order_relaxed ) ;
vec3F lowpass_filtered [ 6 ] [ 6 ] ;
filter_block ( 3 , 3 , & half_pixels_as_floats [ 0 ] [ 0 ] , & lowpass_filtered [ 0 ] [ 0 ] ) ;
float lowpass_std_dev = sub_and_compute_std_dev ( & lowpass_filtered [ 0 ] [ 0 ] , & half_pixels_as_floats [ 0 ] [ 0 ] ) ;
const bool very_detailed_block = lowpass_std_dev > 350.0f ;
const bool very_blurry_block = lowpass_std_dev < 30.0f ;
const bool super_blurry_block = lowpass_std_dev < 15.0f ;
basisu : : stats < float > half_comp_stats [ 3 ] ;
for ( uint32_t c = 0 ; c < 3 ; c + + )
half_comp_stats [ c ] . calc ( NUM_BLOCK_PIXELS , & half_pixels_as_floats [ 0 ] [ 0 ] [ c ] , 3 ) ;
const float SINGLE_PART_HALF_THRESH = 256.0f ;
const float COMPLEX_HALF_THRESH = 1024.0f ;
// HACK HACK
const float VERY_COMPLEX_HALF_THRESH = 1400.0f ; // 1536.0f;
const float max_std_dev = ( float ) maximum ( half_comp_stats [ 0 ] . m_std_dev , half_comp_stats [ 1 ] . m_std_dev , half_comp_stats [ 2 ] . m_std_dev ) ;
const bool very_simple_block = ( max_std_dev < SINGLE_PART_HALF_THRESH ) ;
const bool complex_block = ( max_std_dev > COMPLEX_HALF_THRESH ) ;
const bool very_complex_block = ( max_std_dev > VERY_COMPLEX_HALF_THRESH ) ;
// Dynamically choose a comp_level for this block.
astc_hdr_codec_base_options coptions ( enc_state . master_coptions ) ;
uint32_t comp_level = global_cfg . m_master_comp_level ;
if ( very_complex_block )
comp_level = global_cfg . m_highest_comp_level ;
else if ( complex_block )
comp_level = ( global_cfg . m_master_comp_level + global_cfg . m_highest_comp_level + 1 ) / 2 ;
debug_state . m_comp_level_hist [ comp_level ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
bool any_2subset_enabled = false , any_2subset_mode11_enabled = false , any_2subset_mode7_enabled = false , any_3subset_enabled = false ;
BASISU_NOTE_UNUSED ( any_2subset_mode11_enabled ) ;
for ( uint32_t i = 0 ; i < ( uint32_t ) block_mode : : cBMTotalModes ; i + + )
{
if ( comp_level = = 0 )
{
if ( ( g_block_mode_descs [ i ] . m_flags & BASIST_HDR_6X6_LEVEL0 ) = = 0 )
continue ;
}
else if ( comp_level = = 1 )
{
if ( ( g_block_mode_descs [ i ] . m_flags & BASIST_HDR_6X6_LEVEL1 ) = = 0 )
continue ;
}
else if ( comp_level = = 2 )
{
if ( ( g_block_mode_descs [ i ] . m_flags & BASIST_HDR_6X6_LEVEL2 ) = = 0 )
continue ;
}
if ( g_block_mode_descs [ i ] . m_num_partitions = = 2 )
{
any_2subset_enabled = true ;
if ( g_block_mode_descs [ i ] . m_cem = = 7 )
{
any_2subset_mode7_enabled = true ;
}
else
{
assert ( g_block_mode_descs [ i ] . m_cem = = 11 ) ;
any_2subset_mode11_enabled = true ;
}
}
else if ( g_block_mode_descs [ i ] . m_num_partitions = = 3 )
any_3subset_enabled = true ;
}
coptions . m_mode7_full_s_optimization = ( comp_level > = 2 ) ;
const bool uber_mode_flag = ( comp_level > = 3 ) ;
coptions . m_allow_uber_mode = uber_mode_flag ;
coptions . m_ultra_quant = ( comp_level > = 4 ) ;
coptions . m_take_first_non_clamping_mode11_submode = ( comp_level < = 2 ) ;
coptions . m_take_first_non_clamping_mode7_submode = ( comp_level < = 2 ) ;
coptions . m_disable_weight_plane_optimization = ( comp_level > = 2 ) ;
// -------------------
uint32_t total_used_block_chans = 0 ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
total_used_block_chans + = ( half_comp_stats [ i ] . m_range > 0.0f ) ;
const bool is_solid_block = ( total_used_block_chans = = 0 ) ;
basisu : : comparative_stats < float > half_cross_chan_stats [ 3 ] ;
// R vs. G
half_cross_chan_stats [ 0 ] . calc_pearson ( NUM_BLOCK_PIXELS ,
& half_pixels_as_floats [ 0 ] [ 0 ] [ 0 ] , & half_pixels_as_floats [ 0 ] [ 0 ] [ 1 ] ,
3 , 3 ,
& half_comp_stats [ 0 ] , & half_comp_stats [ 1 ] ) ;
// R vs. B
half_cross_chan_stats [ 1 ] . calc_pearson ( NUM_BLOCK_PIXELS ,
& half_pixels_as_floats [ 0 ] [ 0 ] [ 0 ] , & half_pixels_as_floats [ 0 ] [ 0 ] [ 2 ] ,
3 , 3 ,
& half_comp_stats [ 0 ] , & half_comp_stats [ 2 ] ) ;
// G vs. B
half_cross_chan_stats [ 2 ] . calc_pearson ( NUM_BLOCK_PIXELS ,
& half_pixels_as_floats [ 0 ] [ 0 ] [ 1 ] , & half_pixels_as_floats [ 0 ] [ 0 ] [ 2 ] ,
3 , 3 ,
& half_comp_stats [ 1 ] , & half_comp_stats [ 2 ] ) ;
const float rg_corr = fabsf ( ( float ) half_cross_chan_stats [ 0 ] . m_pearson ) ;
const float rb_corr = fabsf ( ( float ) half_cross_chan_stats [ 1 ] . m_pearson ) ;
const float gb_corr = fabsf ( ( float ) half_cross_chan_stats [ 2 ] . m_pearson ) ;
float min_corr = BIG_FLOAT_VAL , max_corr = - BIG_FLOAT_VAL ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
2025-10-09 12:28:20 +02:00
#if 0
// 9/5/2025, wrong metric, we're iterating channels pairs here, not individual channels.
// On 3 active channel blocks this causes no difference.
if ( half_comp_stats [ i ] . m_range > 0.0f )
# else
static const uint8_t s_chan_pairs [ 3 ] [ 2 ] = { { 0 , 1 } , { 0 , 2 } , { 1 , 2 } } ;
const uint32_t chanA = s_chan_pairs [ i ] [ 0 ] ;
const uint32_t chanB = s_chan_pairs [ i ] [ 1 ] ;
if ( ( half_comp_stats [ chanA ] . m_range > 0.0f ) & & ( half_comp_stats [ chanB ] . m_range > 0.0f ) )
# endif
2025-03-11 10:51:10 +01:00
{
const float c = fabsf ( ( float ) half_cross_chan_stats [ i ] . m_pearson ) ;
min_corr = minimum ( min_corr , c ) ;
max_corr = maximum ( max_corr , c ) ;
}
}
bool use_single_subset_mode7 = true ;
if ( comp_level < = 1 )
{
// TODO: could also compute angle between principle axis and the grayscale axis.
// TODO: Transform grayscale axis by covar matrix, compute variance vs. total variance
const float MODE7_MIN_CHAN_CORR = .5f ;
const float MODE7_PCA_ANGLE_THRESH = .9f ;
2025-10-09 12:28:20 +02:00
use_single_subset_mode7 = is_grayscale | | is_solid_block | | ( ( total_used_block_chans = = 1 ) | | ( min_corr > = MODE7_MIN_CHAN_CORR ) ) ;
2025-03-11 10:51:10 +01:00
if ( use_single_subset_mode7 )
{
float cos_ang = fabsf ( enc_block_stats . m_axis_q16 . dot ( vec3F ( 0.5773502691f ) ) ) ;
if ( cos_ang < MODE7_PCA_ANGLE_THRESH )
use_single_subset_mode7 = false ;
}
}
const float STRONG_CORR_THRESH = ( comp_level < = 1 ) ? .5f : ( ( comp_level < = 3 ) ? .75f : .9f ) ;
int desired_dp_chan = - 1 ;
if ( total_used_block_chans < = 1 )
{
// no need for dual plane (except possibly 2x2 weight grids for RDO)
}
else
{
if ( min_corr > = STRONG_CORR_THRESH )
{
// all channel pairs strongly correlated, no need for dual plane
debug_state . m_dp_stats [ 0 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
else
{
if ( total_used_block_chans = = 2 )
{
if ( half_comp_stats [ 0 ] . m_range = = 0.0f )
{
// r unused, check for strong gb correlation
if ( gb_corr < STRONG_CORR_THRESH )
desired_dp_chan = 1 ;
}
else if ( half_comp_stats [ 1 ] . m_range = = 0.0f )
{
// g unused, check for strong rb correlation
if ( rb_corr < STRONG_CORR_THRESH )
desired_dp_chan = 0 ;
}
else
{
// b unused, check for strong rg correlation
if ( rg_corr < STRONG_CORR_THRESH )
desired_dp_chan = 0 ;
}
}
else
{
assert ( total_used_block_chans = = 3 ) ;
// see if rg/rb is weakly correlated vs. gb
if ( ( rg_corr < gb_corr ) & & ( rb_corr < gb_corr ) )
desired_dp_chan = 0 ;
// see if gr/gb is weakly correlated vs. rb
else if ( ( rg_corr < rb_corr ) & & ( gb_corr < rb_corr ) )
desired_dp_chan = 1 ;
// assume b is weakest
else
desired_dp_chan = 2 ;
}
if ( desired_dp_chan = = - 1 )
debug_state . m_dp_stats [ 1 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
else
debug_state . m_dp_stats [ 2 + desired_dp_chan ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
}
// 2x2 is special for RDO at higher lambdas - always pick a preferred channel.
int desired_dp_chan_2x2 = 0 ;
if ( total_used_block_chans = = 2 )
{
if ( half_comp_stats [ 0 ] . m_range = = 0.0f )
desired_dp_chan_2x2 = 1 ;
}
else if ( total_used_block_chans = = 3 )
{
// see if rg/rb is weakly correlated vs. gb
if ( ( rg_corr < gb_corr ) & & ( rb_corr < gb_corr ) )
desired_dp_chan_2x2 = 0 ;
// see if gr/gb is weakly correlated vs. rb
else if ( ( rg_corr < rb_corr ) & & ( gb_corr < rb_corr ) )
desired_dp_chan_2x2 = 1 ;
// assume b is weakest
else
desired_dp_chan_2x2 = 2 ;
}
// Gather all candidate encodings
bool status = false ;
// ---- Run candidate
if ( ( global_cfg . m_use_runs ) & & ( has_left_neighbor | | has_upper_neighbor ) )
{
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
candidate . m_encoding_type = encoding_type : : cRun ;
candidate . m_decomp_log_blk = prev_non_run_candidate_encoding . m_decomp_log_blk ;
candidate . m_coded_log_blk = prev_non_run_candidate_encoding . m_coded_log_blk ;
memcpy ( candidate . m_comp_pixels , prev_comp_pixels , sizeof ( prev_comp_pixels ) ) ;
if ( ! prev_run_len )
{
candidate . m_coder . put_bits ( RUN_CODE , RUN_CODE_LEN ) ;
candidate . m_coder . put_vlc ( 0 , 5 ) ;
}
else
{
// extend current run - compute the # of new bits needed for the extension.
uint32_t prev_run_bits = prev_encoding . get_total_bits_u32 ( ) ;
assert ( prev_run_bits > 0 ) ;
// We're not actually going to code this, because the previously emitted run code will be extended.
bitwise_coder temp_coder ;
temp_coder . put_bits ( RUN_CODE , RUN_CODE_LEN ) ;
temp_coder . put_vlc ( ( prev_run_len + 1 ) - 1 , 5 ) ;
uint32_t cur_run_bits = temp_coder . get_total_bits_u32 ( ) ;
assert ( cur_run_bits > = prev_run_bits ) ;
uint32_t total_new_bits = cur_run_bits - prev_run_bits ;
if ( total_new_bits > 0 )
candidate . m_coder . put_bits ( 0 , total_new_bits ) ; // dummy bits
}
candidate . m_run_len = prev_run_len + 1 ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
}
// ---- Reuse candidate
if ( ( ! is_solid_block ) & & ( global_cfg . m_lambda > 0.0f ) )
{
for ( uint32_t reuse_delta_index = 0 ; reuse_delta_index < global_cfg . m_num_reuse_xy_deltas ; reuse_delta_index + + )
{
const int reuse_delta_x = g_reuse_xy_deltas [ reuse_delta_index ] . m_x ;
const int reuse_delta_y = g_reuse_xy_deltas [ reuse_delta_index ] . m_y ;
const int reuse_bx = bx + reuse_delta_x , reuse_by = by + reuse_delta_y ;
if ( ( reuse_bx < 0 ) | | ( reuse_bx > = ( int ) num_blocks_x ) )
continue ;
if ( reuse_by < ( int ) strip_first_by )
break ;
const candidate_encoding & prev_candidate = enc_state . coded_blocks ( reuse_bx , reuse_by ) ;
// TODO - support this.
if ( prev_candidate . m_encoding_type = = encoding_type : : cSolid )
continue ;
assert ( ( prev_candidate . m_encoding_type = = encoding_type : : cBlock ) | | ( prev_candidate . m_encoding_type = = encoding_type : : cReuse ) ) ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
astc_helpers : : log_astc_block & decomp_log_blk = candidate . m_decomp_log_blk ;
const astc_helpers : : log_astc_block & prev_coded_log_blk = prev_candidate . m_coded_log_blk ;
const uint32_t grid_x = prev_coded_log_blk . m_grid_width , grid_y = prev_coded_log_blk . m_grid_height ;
const bool dual_plane = prev_candidate . m_coded_log_blk . m_dual_plane ;
const uint32_t num_grid_samples = grid_x * grid_y ;
const uint32_t num_endpoint_vals = get_num_endpoint_vals ( prev_coded_log_blk . m_color_endpoint_modes [ 0 ] ) ;
coded_log_blk = prev_candidate . m_coded_log_blk ;
decomp_log_blk = prev_candidate . m_decomp_log_blk ;
if ( prev_coded_log_blk . m_num_partitions = = 1 )
{
// Now encode the block using the transcoded endpoints
basist : : half_float decoded_half [ MAX_SUPPORTED_WEIGHT_LEVELS ] [ 3 ] ;
if ( prev_coded_log_blk . m_color_endpoint_modes [ 0 ] = = 7 )
{
status = get_astc_hdr_mode_7_block_colors ( coded_log_blk . m_endpoints , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , coded_log_blk . m_weight_ise_range , coded_log_blk . m_endpoint_ise_range ) ;
}
else
{
status = get_astc_hdr_mode_11_block_colors ( coded_log_blk . m_endpoints , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , coded_log_blk . m_weight_ise_range , coded_log_blk . m_endpoint_ise_range ) ;
}
assert ( status ) ;
uint8_t trial_weights0 [ BLOCK_W * BLOCK_H ] , trial_weights1 [ BLOCK_W * BLOCK_H ] ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
if ( dual_plane )
{
eval_selectors_dual_plane ( prev_candidate . m_coded_log_blk . m_color_component_selector ,
BLOCK_W * BLOCK_H , trial_weights0 , trial_weights1 , ( basist : : half_float * ) & half_pixels [ 0 ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
downsample_ise_weights_dual_plane (
coded_log_blk . m_weight_ise_range , coded_log_blk . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
trial_weights0 , trial_weights1 , coded_log_blk . m_weights ) ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples * 2 , coded_log_blk . m_weights , coded_log_blk . m_weight_ise_range , transcode_weights , decomp_log_blk . m_weight_ise_range ) ;
}
else
{
eval_selectors ( BLOCK_W * BLOCK_H , trial_weights0 , coded_log_blk . m_weight_ise_range , ( basist : : half_float * ) & half_pixels [ 0 ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
downsample_ise_weights (
coded_log_blk . m_weight_ise_range , coded_log_blk . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
trial_weights0 , coded_log_blk . m_weights ) ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples , coded_log_blk . m_weights , coded_log_blk . m_weight_ise_range , transcode_weights , decomp_log_blk . m_weight_ise_range ) ;
}
// Create the block the decoder would transcode into.
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_log_blk ) ;
}
else if ( prev_coded_log_blk . m_num_partitions = = 2 )
{
assert ( ! dual_plane ) ;
const int unique_pat_index = g_part2_seed_to_unique_index [ coded_log_blk . m_partition_id ] ;
assert ( ( unique_pat_index > = 0 ) & & ( unique_pat_index < ( int ) NUM_UNIQUE_PARTITIONS2 ) ) ;
const partition_pattern_vec & pat_vec = g_partitions2 [ unique_pat_index ] ;
vec4F part_pixels_q16 [ 2 ] [ 64 ] ;
half_vec3 part_half_pixels [ 2 ] [ 64 ] ;
uint32_t part_total_pixels [ 2 ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pat_vec [ x + y * 6 ] ;
uint32_t l = part_total_pixels [ part_index ] ;
part_pixels_q16 [ part_index ] [ l ] = block_pixels_q16 [ y ] [ x ] ;
part_half_pixels [ part_index ] [ l ] = half_pixels [ y ] [ x ] ;
part_total_pixels [ part_index ] = l + 1 ;
} // x
} // y
uint8_t blk_weights [ 2 ] [ BLOCK_W * BLOCK_H ] ;
for ( uint32_t part_index = 0 ; part_index < 2 ; part_index + + )
{
basist : : half_float decoded_half [ MAX_SUPPORTED_WEIGHT_LEVELS ] [ 3 ] ;
if ( prev_coded_log_blk . m_color_endpoint_modes [ 0 ] = = 7 )
{
status = get_astc_hdr_mode_7_block_colors ( coded_log_blk . m_endpoints + num_endpoint_vals * part_index , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , coded_log_blk . m_weight_ise_range , coded_log_blk . m_endpoint_ise_range ) ;
}
else
{
status = get_astc_hdr_mode_11_block_colors ( coded_log_blk . m_endpoints + num_endpoint_vals * part_index , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , coded_log_blk . m_weight_ise_range , coded_log_blk . m_endpoint_ise_range ) ;
}
assert ( status ) ;
eval_selectors ( part_total_pixels [ part_index ] , blk_weights [ part_index ] , coded_log_blk . m_weight_ise_range ,
( basist : : half_float * ) & part_half_pixels [ part_index ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
} // part_index
uint8_t ise_weights [ BLOCK_W * BLOCK_H ] ;
uint32_t src_pixel_index [ 2 ] = { 0 , 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pat_vec [ x + y * 6 ] ;
ise_weights [ x + y * BLOCK_W ] = blk_weights [ part_index ] [ src_pixel_index [ part_index ] ] ;
src_pixel_index [ part_index ] + + ;
} // x
} // y
downsample_ise_weights (
coded_log_blk . m_weight_ise_range , coded_log_blk . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
ise_weights , coded_log_blk . m_weights ) ;
// Transcode these codable weights to ASTC weights.
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples , coded_log_blk . m_weights , coded_log_blk . m_weight_ise_range , transcode_weights , decomp_log_blk . m_weight_ise_range ) ;
// Create the block the decoder would transcode into.
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_log_blk ) ;
}
else if ( prev_coded_log_blk . m_num_partitions = = 3 )
{
assert ( ! dual_plane ) ;
const int unique_pat_index = g_part3_seed_to_unique_index [ coded_log_blk . m_partition_id ] ;
assert ( ( unique_pat_index > = 0 ) & & ( unique_pat_index < ( int ) NUM_UNIQUE_PARTITIONS3 ) ) ;
const partition_pattern_vec & pat = g_partitions3 [ unique_pat_index ] ;
vec4F part_pixels_q16 [ 3 ] [ 64 ] ;
half_vec3 part_half_pixels [ 3 ] [ 64 ] ;
uint32_t part_total_pixels [ 3 ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pat . m_parts [ x + y * BLOCK_W ] ;
uint32_t l = part_total_pixels [ part_index ] ;
part_pixels_q16 [ part_index ] [ l ] = block_pixels_q16 [ y ] [ x ] ;
part_half_pixels [ part_index ] [ l ] = half_pixels [ y ] [ x ] ;
part_total_pixels [ part_index ] = l + 1 ;
} // x
} // y
uint8_t blk_weights [ 3 ] [ BLOCK_W * BLOCK_H ] ;
for ( uint32_t part_index = 0 ; part_index < 3 ; part_index + + )
{
basist : : half_float decoded_half [ MAX_SUPPORTED_WEIGHT_LEVELS ] [ 3 ] ;
status = get_astc_hdr_mode_7_block_colors ( coded_log_blk . m_endpoints + num_endpoint_vals * part_index , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , coded_log_blk . m_weight_ise_range , coded_log_blk . m_endpoint_ise_range ) ;
assert ( status ) ;
eval_selectors ( part_total_pixels [ part_index ] , blk_weights [ part_index ] , coded_log_blk . m_weight_ise_range ,
( basist : : half_float * ) & part_half_pixels [ part_index ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( coded_log_blk . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
} // part_index
uint8_t ise_weights [ BLOCK_W * BLOCK_H ] ;
uint32_t src_pixel_index [ 3 ] = { 0 } ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
const uint32_t part_index = pat . m_parts [ x + y * BLOCK_W ] ;
ise_weights [ x + y * BLOCK_W ] = blk_weights [ part_index ] [ src_pixel_index [ part_index ] ] ;
src_pixel_index [ part_index ] + + ;
} // x
} // y
downsample_ise_weights (
coded_log_blk . m_weight_ise_range , coded_log_blk . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
ise_weights , coded_log_blk . m_weights ) ;
// Transcode these codable weights to ASTC weights.
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples , coded_log_blk . m_weights , coded_log_blk . m_weight_ise_range , transcode_weights , decomp_log_blk . m_weight_ise_range ) ;
// Create the block the decoder would transcode into.
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_log_blk ) ;
}
if ( ! validate_log_blk ( decomp_log_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_log_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( REUSE_CODE , REUSE_CODE_LEN ) ;
candidate . m_coder . put_bits ( reuse_delta_index , REUSE_XY_DELTA_BITS ) ;
encode_values ( candidate . m_coder , num_grid_samples * ( dual_plane ? 2 : 1 ) , coded_log_blk . m_weights , coded_log_blk . m_weight_ise_range ) ;
candidate . m_encoding_type = encoding_type : : cReuse ;
candidate . m_block_mode = prev_candidate . m_block_mode ;
candidate . m_endpoint_mode = prev_candidate . m_endpoint_mode ;
candidate . m_reuse_delta_index = reuse_delta_index ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
} // reuse_delta_index
}
// ---- Solid candidate
if ( global_cfg . m_use_solid_blocks )
{
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
// solid
candidate . m_encoding_type = encoding_type : : cSolid ;
float r = 0.0f , g = 0.0f , b = 0.0f ;
const float LOG_BIAS = .125f ;
bool solid_block = true ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
if ( ( block_pixels [ 0 ] [ 0 ] [ 0 ] ! = block_pixels [ y ] [ x ] [ 0 ] ) | |
( block_pixels [ 0 ] [ 0 ] [ 1 ] ! = block_pixels [ y ] [ x ] [ 1 ] ) | |
( block_pixels [ 0 ] [ 0 ] [ 2 ] ! = block_pixels [ y ] [ x ] [ 2 ] ) )
{
solid_block = false ;
}
r + = log2f ( block_pixels [ y ] [ x ] [ 0 ] + LOG_BIAS ) ;
g + = log2f ( block_pixels [ y ] [ x ] [ 1 ] + LOG_BIAS ) ;
b + = log2f ( block_pixels [ y ] [ x ] [ 2 ] + LOG_BIAS ) ;
}
}
if ( solid_block )
{
r = block_pixels [ 0 ] [ 0 ] [ 0 ] ;
g = block_pixels [ 0 ] [ 0 ] [ 1 ] ;
b = block_pixels [ 0 ] [ 0 ] [ 2 ] ;
}
else
{
r = maximum < float > ( 0.0f , powf ( 2.0f , r * ( 1.0f / ( float ) NUM_BLOCK_PIXELS ) ) - LOG_BIAS ) ;
g = maximum < float > ( 0.0f , powf ( 2.0f , g * ( 1.0f / ( float ) NUM_BLOCK_PIXELS ) ) - LOG_BIAS ) ;
b = maximum < float > ( 0.0f , powf ( 2.0f , b * ( 1.0f / ( float ) NUM_BLOCK_PIXELS ) ) - LOG_BIAS ) ;
r = minimum < float > ( r , basist : : MAX_HALF_FLOAT ) ;
g = minimum < float > ( g , basist : : MAX_HALF_FLOAT ) ;
b = minimum < float > ( b , basist : : MAX_HALF_FLOAT ) ;
}
basist : : half_float rh = float_to_half_non_neg_no_nan_inf ( r ) , gh = float_to_half_non_neg_no_nan_inf ( g ) , bh = float_to_half_non_neg_no_nan_inf ( b ) ;
candidate . m_solid_color [ 0 ] = rh ;
candidate . m_solid_color [ 1 ] = gh ;
candidate . m_solid_color [ 2 ] = bh ;
candidate . m_coder . put_bits ( SOLID_CODE , SOLID_CODE_LEN ) ;
candidate . m_coder . put_bits ( rh , 15 ) ;
candidate . m_coder . put_bits ( gh , 15 ) ;
candidate . m_coder . put_bits ( bh , 15 ) ;
vec3F cp ( basist : : half_to_float ( rh ) , basist : : half_to_float ( gh ) , basist : : half_to_float ( bh ) ) ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
candidate . m_comp_pixels [ y ] [ x ] = cp ;
astc_helpers : : log_astc_block & log_blk = candidate . m_coded_log_blk ;
log_blk . clear ( ) ;
log_blk . m_solid_color_flag_hdr = true ;
log_blk . m_solid_color [ 0 ] = rh ;
log_blk . m_solid_color [ 1 ] = gh ;
log_blk . m_solid_color [ 2 ] = bh ;
log_blk . m_solid_color [ 3 ] = basist : : float_to_half ( 1.0f ) ;
candidate . m_decomp_log_blk = log_blk ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
}
if ( ( ! is_solid_block ) | | ( ! global_cfg . m_use_solid_blocks ) )
{
static uint8_t s_parts2_normal [ 5 ] = { 0 , 2 , 4 , 6 , 8 } ;
static uint8_t s_parts3_normal [ 5 ] = { 0 , 0 , 4 , 6 , 8 } ;
static uint8_t s_parts2_complex [ 5 ] = { 0 , 4 , 8 , 10 , 16 } ;
static uint8_t s_parts3_complex [ 5 ] = { 0 , 0 , 8 , 10 , 16 } ;
static uint8_t s_parts2_very_complex [ 5 ] = { 0 , 8 , 12 , 14 , 20 } ;
static uint8_t s_parts3_very_complex [ 5 ] = { 0 , 0 , 12 , 14 , 20 } ;
uint32_t total_parts2 = 0 , total_parts3 = 0 ;
assert ( comp_level < 5 ) ;
if ( ( very_simple_block ) & & ( comp_level < = 3 ) )
{
// Block's std dev is so low that 2-3 subsets are unlikely to help much
total_parts2 = 0 ;
total_parts3 = 0 ;
debug_state . m_total_part2_stats [ 0 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
else if ( very_complex_block )
{
total_parts2 = s_parts2_very_complex [ comp_level ] ;
total_parts3 = s_parts3_very_complex [ comp_level ] ;
if ( global_cfg . m_extra_patterns_flag )
{
total_parts2 + = ( comp_level = = 4 ) ? 30 : 20 ;
total_parts3 + = ( comp_level = = 4 ) ? 30 : 20 ;
}
debug_state . m_total_part2_stats [ 2 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
else if ( complex_block )
{
total_parts2 = s_parts2_complex [ comp_level ] ;
total_parts3 = s_parts3_complex [ comp_level ] ;
if ( global_cfg . m_extra_patterns_flag )
{
total_parts2 + = ( comp_level = = 4 ) ? 15 : 10 ;
total_parts3 + = ( comp_level = = 4 ) ? 15 : 10 ;
}
debug_state . m_total_part2_stats [ 3 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
else
{
// moderate complexity - use defaults
total_parts2 = s_parts2_normal [ comp_level ] ;
total_parts3 = s_parts3_normal [ comp_level ] ;
if ( global_cfg . m_extra_patterns_flag )
{
total_parts2 + = 5 ;
total_parts3 + = 5 ;
}
debug_state . m_total_part2_stats [ 1 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
if ( ! any_2subset_enabled )
total_parts2 = 0 ;
if ( ! any_3subset_enabled )
total_parts3 = 0 ;
int best_parts2_mode11 [ NUM_UNIQUE_PARTITIONS2 ] , best_parts2_mode7 [ NUM_UNIQUE_PARTITIONS2 ] ;
bool has_estimated_parts2 = false ;
if ( total_parts2 )
{
if ( global_cfg . m_brute_force_partition_matching )
{
int candidate_pats2 [ NUM_UNIQUE_PARTITIONS2 ] ;
for ( uint32_t i = 0 ; i < NUM_UNIQUE_PARTITIONS2 ; i + + )
candidate_pats2 [ i ] = i ;
if ( any_2subset_enabled )
{
estimate_partitions_mode7_and_11 (
2 ,
NUM_UNIQUE_PARTITIONS2 , g_partitions2 ,
NUM_UNIQUE_PARTITIONS2 , ( uint32_t * ) candidate_pats2 ,
& half_pixels_as_floats [ 0 ] [ 0 ] ,
coptions ,
total_parts2 , best_parts2_mode11 , best_parts2_mode7 ) ;
}
has_estimated_parts2 = true ;
}
else
{
if ( comp_level > = 1 )
{
const uint32_t MAX_CANDIDATES2 = 48 ;
int candidate_pats2 [ MAX_CANDIDATES2 * 2 ] ;
uint32_t num_candidate_pats2 = maximum ( ( total_parts2 * 3 ) / 2 , very_complex_block ? MAX_CANDIDATES2 : ( MAX_CANDIDATES2 / 2 ) ) ;
num_candidate_pats2 = minimum < uint32_t > ( num_candidate_pats2 , ( uint32_t ) std : : size ( candidate_pats2 ) ) ;
has_estimated_parts2 = estimate_partition2_6x6 ( ( basist : : half_float ( * ) [ 3 ] ) half_pixels , candidate_pats2 , num_candidate_pats2 ) ;
if ( has_estimated_parts2 )
{
estimate_partitions_mode7_and_11 (
2 ,
NUM_UNIQUE_PARTITIONS2 , g_partitions2 ,
num_candidate_pats2 , ( uint32_t * ) candidate_pats2 ,
& half_pixels_as_floats [ 0 ] [ 0 ] ,
coptions ,
total_parts2 , best_parts2_mode11 , best_parts2_mode7 ) ;
}
}
else
{
has_estimated_parts2 = estimate_partition2_6x6 ( ( basist : : half_float ( * ) [ 3 ] ) half_pixels , best_parts2_mode11 , total_parts2 ) ;
if ( ( has_estimated_parts2 ) & & ( any_2subset_mode7_enabled ) )
memcpy ( best_parts2_mode7 , best_parts2_mode11 , total_parts2 * sizeof ( best_parts2_mode7 [ 0 ] ) ) ;
}
}
}
int best_parts3 [ NUM_UNIQUE_PARTITIONS3 ] ;
bool has_estimated_parts3 = false ;
if ( total_parts3 )
{
#if 0
has_estimated_parts3 = estimate_partition3_6x6 ( ( basist : : half_float ( * ) [ 3 ] ) half_pixels , best_parts3 , total_parts3 ) ;
# elif 1
if ( global_cfg . m_brute_force_partition_matching )
{
int candidate_pats3 [ NUM_UNIQUE_PARTITIONS3 ] ;
for ( uint32_t i = 0 ; i < NUM_UNIQUE_PARTITIONS3 ; i + + )
candidate_pats3 [ i ] = i ;
estimate_partitions_mode7 (
3 ,
NUM_UNIQUE_PARTITIONS3 , g_partitions3 ,
NUM_UNIQUE_PARTITIONS3 , ( uint32_t * ) candidate_pats3 ,
& half_pixels_as_floats [ 0 ] [ 0 ] ,
coptions ,
total_parts3 , ( uint32_t * ) best_parts3 ) ;
has_estimated_parts3 = true ;
}
else
{
const uint32_t MAX_CANDIDATES3 = 48 ;
int candidate_pats3 [ MAX_CANDIDATES3 * 2 ] ;
uint32_t num_candidate_pats3 = maximum ( ( total_parts3 * 3 ) / 2 , very_complex_block ? MAX_CANDIDATES3 : ( MAX_CANDIDATES3 / 2 ) ) ;
num_candidate_pats3 = minimum < uint32_t > ( num_candidate_pats3 , ( uint32_t ) std : : size ( candidate_pats3 ) ) ;
has_estimated_parts3 = estimate_partition3_6x6 ( ( basist : : half_float ( * ) [ 3 ] ) half_pixels , candidate_pats3 , num_candidate_pats3 ) ;
if ( has_estimated_parts3 )
{
estimate_partitions_mode7 (
3 ,
NUM_UNIQUE_PARTITIONS3 , g_partitions3 ,
num_candidate_pats3 , ( uint32_t * ) candidate_pats3 ,
& half_pixels_as_floats [ 0 ] [ 0 ] ,
coptions ,
total_parts3 , ( uint32_t * ) best_parts3 ) ;
}
}
# endif
}
const opt_mode_t mode11_opt_mode = complex_block ? cWeightedLeastSquares : cOrdinaryLeastSquares ;
// ---- Encoded block candidate
for ( uint32_t block_mode_iter = 0 ; block_mode_iter < ( uint32_t ) block_mode : : cBMTotalModes ; block_mode_iter + + )
{
const block_mode bm = ( block_mode ) block_mode_iter ;
if ( comp_level = = 0 )
{
if ( ( g_block_mode_descs [ block_mode_iter ] . m_flags & BASIST_HDR_6X6_LEVEL0 ) = = 0 )
continue ;
}
else if ( comp_level = = 1 )
{
if ( ( g_block_mode_descs [ block_mode_iter ] . m_flags & BASIST_HDR_6X6_LEVEL1 ) = = 0 )
continue ;
}
else if ( comp_level = = 2 )
{
if ( ( g_block_mode_descs [ block_mode_iter ] . m_flags & BASIST_HDR_6X6_LEVEL2 ) = = 0 )
continue ;
}
if ( global_cfg . m_block_stat_optimizations_flag )
{
if ( ( comp_level < = 3 ) & & ( g_block_mode_descs [ block_mode_iter ] . m_dp ) )
{
if ( ( global_cfg . m_lambda > 0.0f ) & & ( ! complex_block ) & & ( g_block_mode_descs [ block_mode_iter ] . m_grid_x = = 2 ) & & ( g_block_mode_descs [ block_mode_iter ] . m_grid_y = = 2 ) )
{
if ( g_block_mode_descs [ block_mode_iter ] . m_dp_channel ! = desired_dp_chan_2x2 )
continue ;
}
else
{
if ( g_block_mode_descs [ block_mode_iter ] . m_dp_channel ! = desired_dp_chan )
continue ;
}
}
if ( comp_level < = 3 )
{
const uint32_t grid_x = g_block_mode_descs [ block_mode_iter ] . m_grid_x ;
const uint32_t grid_y = g_block_mode_descs [ block_mode_iter ] . m_grid_y ;
if ( ! g_block_mode_descs [ block_mode_iter ] . m_dp )
{
// Minor gain (.5-1% less canidates)
if ( very_detailed_block )
{
if ( grid_x * grid_y < = 12 )
{
debug_state . m_detail_stats [ 0 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
// Major gains (10-25% less candidates)
if ( very_blurry_block )
{
if ( ( grid_x > 4 ) | | ( grid_y > 4 ) | | ( g_block_mode_descs [ block_mode_iter ] . m_num_partitions > 1 ) )
{
debug_state . m_detail_stats [ 1 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
if ( super_blurry_block )
{
if ( ( grid_x > 3 ) | | ( grid_y > 3 ) | | ( g_block_mode_descs [ block_mode_iter ] . m_num_partitions > 1 ) )
{
debug_state . m_detail_stats [ 2 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
}
if ( grid_x ! = grid_y )
{
if ( grid_x < grid_y )
{
if ( ! filter_horizontally )
{
debug_state . m_detail_stats [ 3 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
else
{
if ( filter_horizontally )
{
debug_state . m_detail_stats [ 4 ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
}
}
if ( global_cfg . m_lambda = = 0.0f )
{
// Rarely useful if lambda=0
if ( ( g_block_mode_descs [ block_mode_iter ] . m_grid_x = = 2 ) & & ( g_block_mode_descs [ block_mode_iter ] . m_grid_y = = 2 ) )
continue ;
}
} // block_stat_optimizations_flag
if ( ( ! use_single_subset_mode7 ) & &
( g_block_mode_descs [ block_mode_iter ] . m_cem = = 7 ) & &
( g_block_mode_descs [ block_mode_iter ] . m_num_partitions = = 1 ) )
{
debug_state . m_total_mode7_skips . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
for ( uint32_t endpoint_mode_iter = 0 ; endpoint_mode_iter < ( uint32_t ) endpoint_mode : : cTotal ; endpoint_mode_iter + + )
{
if ( global_cfg . m_lambda = = 0.0f )
{
// No use trying anything else
if ( endpoint_mode_iter ! = ( uint32_t ) endpoint_mode : : cRaw )
continue ;
}
if ( global_cfg . m_disable_delta_endpoint_usage )
{
if ( ( endpoint_mode_iter = = ( uint32_t ) endpoint_mode : : cUseUpperDelta ) | | ( endpoint_mode_iter = = ( uint32_t ) endpoint_mode : : cUseLeftDelta ) )
continue ;
}
if ( ! global_cfg . m_favor_higher_compression )
{
if ( comp_level = = 0 )
{
if ( endpoint_mode_iter = = ( uint32_t ) endpoint_mode : : cUseUpperDelta )
continue ;
}
if ( comp_level < = 1 )
{
if ( ( endpoint_mode_iter = = ( uint32_t ) endpoint_mode : : cUseLeft ) | | ( endpoint_mode_iter = = ( uint32_t ) endpoint_mode : : cUseUpper ) )
continue ;
}
}
const endpoint_mode em = ( endpoint_mode ) endpoint_mode_iter ;
switch ( em )
{
case endpoint_mode : : cUseLeft :
case endpoint_mode : : cUseUpper :
{
const block_mode_desc & local_md = g_block_mode_descs [ block_mode_iter ] ;
const uint32_t cem = local_md . m_cem ;
if ( local_md . m_num_partitions > 1 )
break ;
if ( ( em = = endpoint_mode : : cUseLeft ) & & ( ! has_left_neighbor ) )
break ;
else if ( ( em = = endpoint_mode : : cUseUpper ) & & ( ! has_upper_neighbor ) )
break ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
int nx = bx , ny = by ;
if ( em = = endpoint_mode : : cUseLeft )
nx - - ;
else
ny - - ;
const candidate_encoding & neighbor_blk = enc_state . coded_blocks ( nx , ny ) ;
if ( neighbor_blk . m_encoding_type = = encoding_type : : cSolid )
break ;
assert ( ( neighbor_blk . m_encoding_type = = encoding_type : : cBlock ) | | ( neighbor_blk . m_encoding_type = = encoding_type : : cReuse ) ) ;
const block_mode_desc & neighbor_md = g_block_mode_descs [ ( uint32_t ) neighbor_blk . m_block_mode ] ;
if ( neighbor_md . m_cem ! = cem )
break ;
assert ( neighbor_blk . m_coded_log_blk . m_color_endpoint_modes [ 0 ] = = cem ) ;
const uint32_t grid_x = local_md . m_grid_x , grid_y = local_md . m_grid_y ;
const bool dual_plane = local_md . m_dp ;
const uint32_t num_grid_samples = grid_x * grid_y ;
const uint32_t num_endpoint_vals = get_num_endpoint_vals ( local_md . m_cem ) ;
coded_log_blk . m_grid_width = ( uint8_t ) grid_x ;
coded_log_blk . m_grid_height = ( uint8_t ) grid_y ;
coded_log_blk . m_dual_plane = ( uint8_t ) dual_plane ;
coded_log_blk . m_color_component_selector = ( uint8_t ) local_md . m_dp_channel ;
coded_log_blk . m_num_partitions = 1 ;
coded_log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) neighbor_md . m_cem ;
coded_log_blk . m_weight_ise_range = ( uint8_t ) local_md . m_weight_ise_range ;
// We're not explictly writing any endpoints, just reusing existing ones. So copy the neighbor's endpoints unchanged (so no loss).
coded_log_blk . m_endpoint_ise_range = neighbor_blk . m_coded_log_blk . m_endpoint_ise_range ;
memcpy ( coded_log_blk . m_endpoints , neighbor_blk . m_coded_log_blk . m_endpoints , num_endpoint_vals ) ;
uint8_t transcode_endpoints [ basist : : NUM_MODE11_ENDPOINTS ] ;
// Requantize the neighbor's endpoints to whatever we'll have to transcode into to make a valid ASTC encoding.
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( neighbor_md . m_cem ,
neighbor_blk . m_coded_log_blk . m_endpoint_ise_range , neighbor_blk . m_coded_log_blk . m_endpoints ,
local_md . m_transcode_endpoint_ise_range , transcode_endpoints ) ;
// Now encode the block using the transcoded endpoints
basist : : half_float decoded_half [ MAX_SUPPORTED_WEIGHT_LEVELS ] [ 3 ] ;
if ( cem = = 7 )
{
status = get_astc_hdr_mode_7_block_colors ( transcode_endpoints , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( local_md . m_weight_ise_range ) , local_md . m_weight_ise_range , local_md . m_transcode_endpoint_ise_range ) ;
}
else
{
status = get_astc_hdr_mode_11_block_colors ( transcode_endpoints , & decoded_half [ 0 ] [ 0 ] , nullptr ,
astc_helpers : : get_ise_levels ( local_md . m_weight_ise_range ) , local_md . m_weight_ise_range , local_md . m_transcode_endpoint_ise_range ) ;
}
if ( ! status )
break ;
uint8_t trial_weights0 [ BLOCK_W * BLOCK_H ] , trial_weights1 [ BLOCK_W * BLOCK_H ] ;
if ( dual_plane )
{
eval_selectors_dual_plane ( local_md . m_dp_channel , BLOCK_W * BLOCK_H , trial_weights0 , trial_weights1 , ( basist : : half_float * ) & half_pixels [ 0 ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( local_md . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
downsample_ise_weights_dual_plane (
local_md . m_weight_ise_range , local_md . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
trial_weights0 , trial_weights1 , coded_log_blk . m_weights ) ;
}
else
{
eval_selectors ( BLOCK_W * BLOCK_H , trial_weights0 , local_md . m_weight_ise_range , ( basist : : half_float * ) & half_pixels [ 0 ] [ 0 ] [ 0 ] , astc_helpers : : get_ise_levels ( local_md . m_weight_ise_range ) , & decoded_half [ 0 ] [ 0 ] , coptions , UINT32_MAX ) ;
downsample_ise_weights (
local_md . m_weight_ise_range , local_md . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
trial_weights0 , coded_log_blk . m_weights ) ;
}
// Transcode these codable weights to ASTC weights.
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples * ( dual_plane ? 2 : 1 ) , coded_log_blk . m_weights , local_md . m_weight_ise_range , transcode_weights , local_md . m_transcode_weight_ise_range ) ;
// Create the block the decoder would transcode into.
astc_helpers : : log_astc_block & decomp_blk = candidate . m_decomp_log_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) local_md . m_cem ;
decomp_blk . m_dual_plane = local_md . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) local_md . m_dp_channel ;
decomp_blk . m_num_partitions = 1 ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) local_md . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) local_md . m_transcode_weight_ise_range ;
memcpy ( decomp_blk . m_endpoints , transcode_endpoints , num_endpoint_vals ) ;
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_blk ) ;
if ( ! validate_log_blk ( decomp_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( BLOCK_CODE , BLOCK_CODE_LEN ) ;
code_block ( candidate . m_coder , candidate . m_coded_log_blk , ( block_mode ) block_mode_iter , em , nullptr ) ;
candidate . m_encoding_type = encoding_type : : cBlock ;
candidate . m_endpoint_mode = em ;
candidate . m_block_mode = bm ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
break ;
}
case endpoint_mode : : cUseLeftDelta :
case endpoint_mode : : cUseUpperDelta :
{
const block_mode_desc & local_md = g_block_mode_descs [ block_mode_iter ] ;
const uint32_t cem = local_md . m_cem ;
if ( local_md . m_num_partitions > 1 )
break ;
if ( ( em = = endpoint_mode : : cUseLeftDelta ) & & ( ! has_left_neighbor ) )
break ;
else if ( ( em = = endpoint_mode : : cUseUpperDelta ) & & ( ! has_upper_neighbor ) )
break ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
int nx = bx , ny = by ;
if ( em = = endpoint_mode : : cUseLeftDelta )
nx - - ;
else
ny - - ;
const candidate_encoding & neighbor_blk = enc_state . coded_blocks ( nx , ny ) ;
if ( neighbor_blk . m_encoding_type = = encoding_type : : cSolid )
break ;
assert ( ( neighbor_blk . m_encoding_type = = encoding_type : : cBlock ) | | ( neighbor_blk . m_encoding_type = = encoding_type : : cReuse ) ) ;
const block_mode_desc & neighbor_md = g_block_mode_descs [ ( uint32_t ) neighbor_blk . m_block_mode ] ;
if ( neighbor_md . m_cem ! = cem )
break ;
assert ( neighbor_md . m_cem = = local_md . m_cem ) ;
const uint32_t grid_x = local_md . m_grid_x , grid_y = local_md . m_grid_y ;
const bool dual_plane = local_md . m_dp ;
const uint32_t num_grid_samples = grid_x * grid_y ;
const uint32_t num_endpoint_vals = get_num_endpoint_vals ( local_md . m_cem ) ;
// Dequantize neighbor's endpoints to ISE 20
uint8_t neighbor_endpoints_ise20 [ basist : : NUM_MODE11_ENDPOINTS ] ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( neighbor_md . m_cem ,
neighbor_blk . m_coded_log_blk . m_endpoint_ise_range , neighbor_blk . m_coded_log_blk . m_endpoints ,
astc_helpers : : BISE_256_LEVELS , neighbor_endpoints_ise20 ) ;
// Requantize neighbor's endpoints to our local desired coding ISE range
uint8_t neighbor_endpoints_coding_ise_local [ basist : : NUM_MODE11_ENDPOINTS ] ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( neighbor_md . m_cem , astc_helpers : : BISE_256_LEVELS , neighbor_endpoints_ise20 , local_md . m_endpoint_ise_range , neighbor_endpoints_coding_ise_local ) ;
uint8_t blk_endpoints [ basist : : NUM_MODE11_ENDPOINTS ] ;
uint8_t blk_weights0 [ NUM_BLOCK_PIXELS ] , blk_weights1 [ NUM_BLOCK_PIXELS ] ;
// Now try to encode the current block using the neighbor's endpoints submode.
double err = 0.0f ;
uint32_t best_submode = 0 ;
if ( cem = = 7 )
{
int maj_index , submode_index ;
decode_cem_7_config ( neighbor_endpoints_ise20 , submode_index , maj_index ) ;
int first_submode = submode_index , last_submode = submode_index ;
err = encode_astc_hdr_block_mode_7 (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) half_pixels , ( vec4F * ) block_pixels_q16 ,
local_md . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
blk_endpoints , blk_weights0 ,
coptions ,
local_md . m_endpoint_ise_range ,
first_submode , last_submode ,
& enc_block_stats ) ;
}
else
{
int maj_index , submode_index ;
decode_cem_11_config ( neighbor_endpoints_ise20 , submode_index , maj_index ) ;
int first_submode = - 1 , last_submode = - 1 ;
if ( maj_index = = 3 )
{
// direct
}
else
{
first_submode = submode_index ;
last_submode = submode_index ;
}
if ( dual_plane )
{
err = encode_astc_hdr_block_mode_11_dual_plane (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) half_pixels , ( vec4F * ) block_pixels_q16 ,
local_md . m_dp_channel ,
local_md . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
blk_endpoints , blk_weights0 , blk_weights1 ,
coptions ,
false ,
local_md . m_endpoint_ise_range ,
false , //uber_mode_flag,
false ,
first_submode , last_submode , true ) ;
}
else
{
err = encode_astc_hdr_block_mode_11 (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) half_pixels , ( vec4F * ) block_pixels_q16 ,
local_md . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
blk_endpoints , blk_weights0 ,
coptions ,
false ,
local_md . m_endpoint_ise_range ,
false , //uber_mode_flag,
false ,
first_submode , last_submode , true ,
mode11_opt_mode ,
& enc_block_stats ) ;
}
}
if ( err = = BIG_FLOAT_VAL )
break ;
uint8_t endpoint_deltas [ basist : : NUM_MODE11_ENDPOINTS ] ;
// TODO: For now, just try 5 bits for each endpoint. Can tune later.
// This isn't right, it's computing the deltas in ISE space.
//const uint32_t NUM_ENDPOINT_DELTA_BITS = 5;
const int total_endpoint_delta_vals = 1 < < NUM_ENDPOINT_DELTA_BITS ;
const int low_delta_limit = - ( total_endpoint_delta_vals / 2 ) , high_delta_limit = ( total_endpoint_delta_vals / 2 ) - 1 ;
const auto & ise_to_rank = astc_helpers : : g_dequant_tables . get_endpoint_tab ( local_md . m_endpoint_ise_range ) . m_ISE_to_rank ;
bool all_deltas_in_limits = true ;
for ( uint32_t i = 0 ; i < num_endpoint_vals ; i + + )
{
int endpoint_delta = ( int ) ise_to_rank [ blk_endpoints [ i ] ] - ( int ) ise_to_rank [ neighbor_endpoints_coding_ise_local [ i ] ] ;
if ( ( endpoint_delta < low_delta_limit ) | | ( endpoint_delta > high_delta_limit ) )
all_deltas_in_limits = false ;
endpoint_deltas [ i ] = ( uint8_t ) ( endpoint_delta + - low_delta_limit ) ;
}
if ( all_deltas_in_limits )
{
coded_log_blk . m_grid_width = ( uint8_t ) grid_x ;
coded_log_blk . m_grid_height = ( uint8_t ) grid_y ;
coded_log_blk . m_dual_plane = ( uint8_t ) dual_plane ;
coded_log_blk . m_color_component_selector = ( uint8_t ) local_md . m_dp_channel ;
coded_log_blk . m_num_partitions = 1 ;
coded_log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) local_md . m_cem ;
coded_log_blk . m_weight_ise_range = ( uint8_t ) local_md . m_weight_ise_range ;
coded_log_blk . m_endpoint_ise_range = ( uint8_t ) local_md . m_endpoint_ise_range ;
memcpy ( coded_log_blk . m_endpoints , blk_endpoints , num_endpoint_vals ) ;
uint8_t transcode_endpoints [ basist : : NUM_MODE11_ENDPOINTS ] ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( local_md . m_cem , local_md . m_endpoint_ise_range , blk_endpoints , local_md . m_transcode_endpoint_ise_range , transcode_endpoints ) ;
if ( dual_plane )
{
downsample_ise_weights_dual_plane (
local_md . m_weight_ise_range , local_md . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
blk_weights0 , blk_weights1 ,
coded_log_blk . m_weights ) ;
}
else
{
downsample_ise_weights (
local_md . m_weight_ise_range , local_md . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
blk_weights0 , coded_log_blk . m_weights ) ;
}
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples * ( dual_plane ? 2 : 1 ) , coded_log_blk . m_weights , local_md . m_weight_ise_range , transcode_weights , local_md . m_transcode_weight_ise_range ) ;
// Create the block the decoder would transcode into.
astc_helpers : : log_astc_block & decomp_blk = candidate . m_decomp_log_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) local_md . m_cem ;
decomp_blk . m_dual_plane = local_md . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) local_md . m_dp_channel ;
decomp_blk . m_num_partitions = 1 ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) local_md . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) local_md . m_transcode_weight_ise_range ;
memcpy ( decomp_blk . m_endpoints , transcode_endpoints , num_endpoint_vals ) ;
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_blk ) ;
if ( ! validate_log_blk ( decomp_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( BLOCK_CODE , BLOCK_CODE_LEN ) ;
code_block ( candidate . m_coder , candidate . m_coded_log_blk , bm , em , endpoint_deltas ) ;
candidate . m_encoding_type = encoding_type : : cBlock ;
candidate . m_endpoint_mode = em ;
candidate . m_block_mode = bm ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
}
break ;
}
case endpoint_mode : : cRaw :
{
//if (candidates.size() == 339)
// fmt_printf("!");
const auto & mode_desc = g_block_mode_descs [ ( uint32_t ) bm ] ;
const uint32_t cem = mode_desc . m_cem ;
//const uint32_t num_endpoint_vals = get_num_endpoint_vals(cem);
const bool dual_plane = mode_desc . m_dp ;
if ( ( global_cfg . m_disable_twothree_subsets ) & & ( mode_desc . m_num_partitions > = 2 ) )
break ;
if ( mode_desc . m_num_partitions = = 3 )
{
assert ( ! dual_plane ) ;
if ( ! has_estimated_parts3 )
break ;
assert ( mode_desc . m_weight_ise_range = = mode_desc . m_transcode_weight_ise_range ) ;
assert ( mode_desc . m_endpoint_ise_range = = mode_desc . m_transcode_endpoint_ise_range ) ;
trial_result res ;
status = encode_block_3_subsets (
res ,
cem ,
mode_desc . m_grid_x , mode_desc . m_grid_y ,
mode_desc . m_weight_ise_range , mode_desc . m_endpoint_ise_range ,
& half_pixels [ 0 ] [ 0 ] , ( vec4F * ) block_pixels_q16 ,
coptions ,
uber_mode_flag ,
best_parts3 , total_parts3 , comp_level , mode11_opt_mode ) ;
if ( ! status )
break ;
assert ( res . m_valid ) ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
coded_log_blk = res . m_log_blk ;
astc_helpers : : log_astc_block & decomp_blk = candidate . m_decomp_log_blk ;
decomp_blk = res . m_log_blk ;
if ( ! validate_log_blk ( decomp_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( BLOCK_CODE , BLOCK_CODE_LEN ) ;
code_block ( candidate . m_coder , candidate . m_coded_log_blk , bm , em , nullptr ) ;
candidate . m_encoding_type = encoding_type : : cBlock ;
candidate . m_endpoint_mode = em ;
candidate . m_block_mode = bm ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
}
else if ( mode_desc . m_num_partitions = = 2 )
{
assert ( ! dual_plane ) ;
if ( ! has_estimated_parts2 )
break ;
assert ( mode_desc . m_weight_ise_range = = mode_desc . m_transcode_weight_ise_range ) ;
assert ( mode_desc . m_endpoint_ise_range = = mode_desc . m_transcode_endpoint_ise_range ) ;
for ( uint32_t est_part_iter = 0 ; est_part_iter < total_parts2 ; est_part_iter + + )
{
trial_result results [ 2 ] ;
assert ( ( ( cem = = 11 ) & & any_2subset_mode11_enabled ) | | ( ( cem = = 7 ) & & any_2subset_mode7_enabled ) ) ;
status = encode_block_2_subsets (
results ,
mode_desc . m_grid_x , mode_desc . m_grid_y ,
mode_desc . m_cem ,
mode_desc . m_weight_ise_range , mode_desc . m_endpoint_ise_range ,
& half_pixels [ 0 ] [ 0 ] , ( vec4F * ) block_pixels_q16 ,
coptions ,
uber_mode_flag ,
( cem = = 11 ) ? best_parts2_mode11 [ est_part_iter ] : best_parts2_mode7 [ est_part_iter ] ,
comp_level ,
mode11_opt_mode ,
true ) ;
if ( ! status )
continue ;
for ( uint32_t r_iter = 0 ; r_iter < 2 ; r_iter + + )
{
const trial_result & res = results [ r_iter ] ;
if ( ! res . m_valid )
continue ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
coded_log_blk = res . m_log_blk ;
astc_helpers : : log_astc_block & decomp_blk = candidate . m_decomp_log_blk ;
decomp_blk = res . m_log_blk ;
if ( ! validate_log_blk ( decomp_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( BLOCK_CODE , BLOCK_CODE_LEN ) ;
code_block ( candidate . m_coder , candidate . m_coded_log_blk , bm , em , nullptr ) ;
candidate . m_encoding_type = encoding_type : : cBlock ;
candidate . m_endpoint_mode = em ;
candidate . m_block_mode = bm ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
} // r_iter
}
}
else
{
// 1 subset
uint8_t blk_weights0 [ BLOCK_W * BLOCK_H ] , blk_weights1 [ BLOCK_W * BLOCK_H ] ;
uint32_t best_submode = 0 ;
candidate_encoding candidate ;
candidate . m_coder . reserve ( 24 ) ;
astc_helpers : : log_astc_block & coded_log_blk = candidate . m_coded_log_blk ;
const uint32_t grid_x = mode_desc . m_grid_x , grid_y = mode_desc . m_grid_y ;
const uint32_t num_grid_samples = grid_x * grid_y ;
const half_vec3 * pBlock_pixels_half = & half_pixels [ 0 ] [ 0 ] ;
const vec4F * pBlock_pixels_q16 = & block_pixels_q16 [ 0 ] [ 0 ] ;
const uint32_t num_grid_samples_dp = num_grid_samples * ( dual_plane ? 2 : 1 ) ;
uint8_t transcode_weights [ MAX_BLOCK_W * MAX_BLOCK_H * 2 ] ;
coded_log_blk . m_grid_width = ( uint8_t ) grid_x ;
coded_log_blk . m_grid_height = ( uint8_t ) grid_y ;
coded_log_blk . m_dual_plane = ( uint8_t ) dual_plane ;
coded_log_blk . m_color_component_selector = ( uint8_t ) mode_desc . m_dp_channel ;
coded_log_blk . m_num_partitions = 1 ;
coded_log_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) mode_desc . m_cem ;
coded_log_blk . m_weight_ise_range = ( uint8_t ) mode_desc . m_weight_ise_range ;
coded_log_blk . m_endpoint_ise_range = ( uint8_t ) mode_desc . m_endpoint_ise_range ;
if ( ( cem = = 11 ) & & ( ! dual_plane ) & & ( ( grid_x < BLOCK_W ) | | ( grid_y < BLOCK_H ) ) )
{
double e = encode_astc_hdr_block_downsampled_mode_11 (
BLOCK_W , BLOCK_H , grid_x , grid_y ,
mode_desc . m_weight_ise_range , mode_desc . m_endpoint_ise_range ,
NUM_BLOCK_PIXELS , ( basist : : half_float ( * ) [ 3 ] ) pBlock_pixels_half , pBlock_pixels_q16 ,
BIG_FLOAT_VAL ,
FIRST_MODE11_SUBMODE_INDEX , MAX_MODE11_SUBMODE_INDEX , false , mode11_opt_mode ,
coded_log_blk . m_endpoints , coded_log_blk . m_weights , best_submode ,
coptions ,
& enc_block_stats ) ;
if ( e = = BIG_FLOAT_VAL )
break ;
}
else
{
if ( cem = = 7 )
{
assert ( ! dual_plane ) ;
double e = encode_astc_hdr_block_mode_7 (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) pBlock_pixels_half , pBlock_pixels_q16 ,
mode_desc . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
coded_log_blk . m_endpoints ,
blk_weights0 ,
coptions ,
mode_desc . m_endpoint_ise_range ,
0 , MAX_MODE7_SUBMODE_INDEX ,
& enc_block_stats ) ;
BASISU_NOTE_UNUSED ( e ) ;
}
else
{
double e ;
if ( dual_plane )
{
e = encode_astc_hdr_block_mode_11_dual_plane (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) pBlock_pixels_half , pBlock_pixels_q16 ,
mode_desc . m_dp_channel ,
mode_desc . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
coded_log_blk . m_endpoints ,
blk_weights0 , blk_weights1 ,
coptions ,
false ,
mode_desc . m_endpoint_ise_range , uber_mode_flag , false , - 1 , 7 , false ) ;
}
else
{
e = encode_astc_hdr_block_mode_11 (
NUM_BLOCK_PIXELS ,
( basist : : half_float ( * ) [ 3 ] ) pBlock_pixels_half , pBlock_pixels_q16 ,
mode_desc . m_weight_ise_range ,
best_submode ,
BIG_FLOAT_VAL ,
coded_log_blk . m_endpoints ,
blk_weights0 ,
coptions ,
false ,
mode_desc . m_endpoint_ise_range , uber_mode_flag , false , - 1 , 7 , false ,
mode11_opt_mode ,
& enc_block_stats ) ;
}
if ( e = = BIG_FLOAT_VAL )
break ;
}
if ( dual_plane )
{
downsample_ise_weights_dual_plane (
mode_desc . m_weight_ise_range , mode_desc . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
blk_weights0 , blk_weights1 ,
coded_log_blk . m_weights ) ;
}
else
{
downsample_ise_weights (
mode_desc . m_weight_ise_range , mode_desc . m_weight_ise_range ,
BLOCK_W , BLOCK_H ,
grid_x , grid_y ,
blk_weights0 , coded_log_blk . m_weights ) ;
if ( ( comp_level > = MIN_REFINE_LEVEL ) & & ( ( grid_x < BLOCK_W ) | | ( grid_y < BLOCK_H ) ) )
{
bool refine_status = refine_endpoints ( cem ,
mode_desc . m_endpoint_ise_range , coded_log_blk . m_endpoints ,
6 , 6 , mode_desc . m_grid_x , mode_desc . m_grid_y ,
coded_log_blk . m_weights , mode_desc . m_weight_ise_range ,
BLOCK_W * BLOCK_H ,
( basist : : half_float ( * ) [ 3 ] ) pBlock_pixels_half , ( vec4F * ) pBlock_pixels_q16 ,
nullptr ,
coptions , mode11_opt_mode ) ;
BASISU_NOTE_UNUSED ( refine_status ) ;
}
}
}
basist : : astc_6x6_hdr : : requantize_astc_weights ( num_grid_samples_dp , coded_log_blk . m_weights , mode_desc . m_weight_ise_range , transcode_weights , mode_desc . m_transcode_weight_ise_range ) ;
// Create the block the decoder would transcode into.
astc_helpers : : log_astc_block & decomp_blk = candidate . m_decomp_log_blk ;
decomp_blk . clear ( ) ;
decomp_blk . m_color_endpoint_modes [ 0 ] = ( uint8_t ) mode_desc . m_cem ;
decomp_blk . m_dual_plane = mode_desc . m_dp ;
decomp_blk . m_color_component_selector = ( uint8_t ) mode_desc . m_dp_channel ;
decomp_blk . m_num_partitions = 1 ;
decomp_blk . m_endpoint_ise_range = ( uint8_t ) mode_desc . m_transcode_endpoint_ise_range ;
decomp_blk . m_weight_ise_range = ( uint8_t ) mode_desc . m_transcode_weight_ise_range ;
basist : : astc_6x6_hdr : : requantize_ise_endpoints ( mode_desc . m_cem , mode_desc . m_endpoint_ise_range , coded_log_blk . m_endpoints , mode_desc . m_transcode_endpoint_ise_range , decomp_blk . m_endpoints ) ;
copy_weight_grid ( dual_plane , grid_x , grid_y , transcode_weights , decomp_blk ) ;
if ( ! validate_log_blk ( decomp_blk ) )
{
fmt_error_printf ( " pack_astc_block() failed \n " ) ;
return false ;
}
status = decode_astc_block ( BLOCK_W , BLOCK_H , decomp_blk , & candidate . m_comp_pixels [ 0 ] [ 0 ] ) ;
if ( ! status )
{
fmt_error_printf ( " decode_astc_block() failed \n " ) ;
return false ;
}
candidate . m_coder . put_bits ( BLOCK_CODE , BLOCK_CODE_LEN ) ;
code_block ( candidate . m_coder , candidate . m_coded_log_blk , bm , em , nullptr ) ;
candidate . m_encoding_type = encoding_type : : cBlock ;
candidate . m_endpoint_mode = em ;
candidate . m_block_mode = bm ;
candidates . emplace_back ( std : : move ( candidate ) ) ;
}
break ;
}
default :
assert ( 0 ) ;
fmt_debug_printf ( " Invalid endpoint mode \n " ) ;
return false ;
} // switch (em)
} // endpoint_mode_iter
} // block_mode_iter
} // is_solid_block
//------------------------------------------------
debug_state . m_total_candidates_considered . fetch_add ( candidates . size_u32 ( ) , std : : memory_order_relaxed ) ;
atomic_max ( debug_state . m_max_candidates_considered , candidates . size_u32 ( ) ) ;
for ( uint32_t candidate_iter = 0 ; candidate_iter < candidates . size_u32 ( ) ; candidate_iter + + )
{
auto & candidate = candidates [ candidate_iter ] ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
linear_rgb_to_itp ( candidate . m_comp_pixels [ y ] [ x ] , candidate . m_comp_pixels_itp [ y ] [ x ] , global_cfg ) ;
}
// Find best overall candidate
double best_t = BIG_FLOAT_VAL ;
int best_candidate_index = - 1 ;
float best_d_ssim = BIG_FLOAT_VAL ;
if ( global_cfg . m_lambda = = 0.0f )
{
for ( uint32_t candidate_iter = 0 ; candidate_iter < candidates . size_u32 ( ) ; candidate_iter + + )
{
const auto & candidate = candidates [ candidate_iter ] ;
float candidate_d_ssim = 1.0f - compute_block_ssim_itp ( BLOCK_W , BLOCK_H , & block_pixels_as_itp [ 0 ] [ 0 ] , & candidate . m_comp_pixels_itp [ 0 ] [ 0 ] ) ;
if ( candidate_d_ssim < best_d_ssim )
best_d_ssim = candidate_d_ssim ;
candidate_d_ssim * = SSIM_WEIGHT ;
float candidate_mse = MSE_WEIGHT * compute_block_mse_itp ( BLOCK_W , BLOCK_H , & block_pixels_as_itp [ 0 ] [ 0 ] , & candidate . m_comp_pixels_itp [ 0 ] [ 0 ] , global_cfg . m_delta_itp_dark_adjustment ) ;
candidate_mse + = candidate_d_ssim ;
float total_deblock_penalty = 0.0f ;
if ( global_cfg . m_deblocking_flag )
{
total_deblock_penalty = calc_deblocking_penalty_itp ( bx , by , width , height , pass_src_img_itp , candidate ) * global_cfg . m_deblock_penalty_weight ;
}
candidate_mse + = total_deblock_penalty * SSIM_WEIGHT ;
if ( ( candidate . m_encoding_type = = encoding_type : : cBlock ) | | ( candidate . m_encoding_type = = encoding_type : : cReuse ) )
{
// Bias the encoder away from 2 level blocks on complex blocks
// TODO: Perhaps only do this on large or non-interpolated grids
if ( complex_block )
{
if ( candidate . m_coded_log_blk . m_weight_ise_range = = astc_helpers : : BISE_2_LEVELS )
{
candidate_mse * = TWO_LEVEL_PENALTY ;
}
}
// Bias the encoder away from smaller weight grids if the block is very complex
// TODO: Use the DCT to compute an approximation of the block energy/variance retained vs. lost by downsampling.
if ( complex_block )
{
if ( ( candidate . m_coded_log_blk . m_grid_width = = 2 ) & & ( candidate . m_coded_log_blk . m_grid_height = = 2 ) )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY ;
else if ( minimum ( candidate . m_coded_log_blk . m_grid_width , candidate . m_coded_log_blk . m_grid_height ) < = 3 )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY ;
else if ( minimum ( candidate . m_coded_log_blk . m_grid_width , candidate . m_coded_log_blk . m_grid_height ) < = 4 )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY ;
}
}
float candidate_t = candidate_mse ;
if ( candidate_t < best_t )
{
best_t = candidate_t ;
best_candidate_index = candidate_iter ;
}
} // candidate_iter
if ( global_cfg . m_gaussian1_fallback & & ( outer_pass = = 0 ) & & ( very_complex_block ) & & ( best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM ) )
{
debug_state . m_total_gaussian1_blocks . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
const float block_y_contrast_ratio = block_hy / ( block_ly + .00000125f ) ;
if ( global_cfg . m_gaussian2_fallback & & ( comp_level > = 1 ) & & ( outer_pass = = 1 ) & & ( very_complex_block ) & & ( best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM ) & &
( block_hy > = 18.0f ) & & ( block_y_contrast_ratio > 150.0f ) & &
( block_avg_y > = 1.5f ) )
{
debug_state . m_total_gaussian2_blocks . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
}
else
{
assert ( enc_state . smooth_block_mse_scales . get_width ( ) > 0 ) ;
// Compute block's perceptual weighting
float perceptual_scale = 0.0f ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
perceptual_scale = basisu : : maximumf ( perceptual_scale , enc_state . smooth_block_mse_scales . at_clamped ( bx * BLOCK_W + x , by * BLOCK_H + y ) ) ;
// Very roughly normalize the computed distortion vs. bits.
perceptual_scale * = 10.0f ;
for ( uint32_t candidate_iter = 0 ; candidate_iter < candidates . size_u32 ( ) ; candidate_iter + + )
{
auto & candidate = candidates [ candidate_iter ] ;
float d_ssim = 1.0f - compute_block_ssim_itp ( BLOCK_W , BLOCK_H , & block_pixels_as_itp [ 0 ] [ 0 ] , & candidate . m_comp_pixels_itp [ 0 ] [ 0 ] ) ;
if ( d_ssim < best_d_ssim )
best_d_ssim = ( float ) d_ssim ;
d_ssim * = SSIM_WEIGHT ;
float candidate_mse = MSE_WEIGHT * compute_block_mse_itp ( BLOCK_W , BLOCK_H , & block_pixels_as_itp [ 0 ] [ 0 ] , & candidate . m_comp_pixels_itp [ 0 ] [ 0 ] , global_cfg . m_delta_itp_dark_adjustment ) ;
candidate_mse + = d_ssim ;
float total_deblock_penalty = 0.0f ;
if ( global_cfg . m_deblocking_flag )
{
total_deblock_penalty = calc_deblocking_penalty_itp ( bx , by , width , height , pass_src_img_itp , candidate ) * global_cfg . m_deblock_penalty_weight ;
}
candidate_mse + = total_deblock_penalty * SSIM_WEIGHT ;
if ( ( candidate . m_encoding_type = = encoding_type : : cBlock ) | | ( candidate . m_encoding_type = = encoding_type : : cReuse ) )
{
// Bias the encoder away from 2 level blocks on complex blocks
if ( complex_block )
{
if ( candidate . m_coded_log_blk . m_weight_ise_range = = astc_helpers : : BISE_2_LEVELS )
{
candidate_mse * = TWO_LEVEL_PENALTY ;
}
}
// Bias the encoder away from smaller weight grids if the block is very complex
if ( complex_block )
{
if ( ( candidate . m_coded_log_blk . m_grid_width = = 2 ) & & ( candidate . m_coded_log_blk . m_grid_height = = 2 ) )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_2X2_MSE_PENALTY ;
else if ( minimum ( candidate . m_coded_log_blk . m_grid_width , candidate . m_coded_log_blk . m_grid_height ) < = 3 )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_3X3_MSE_PENALTY ;
else if ( minimum ( candidate . m_coded_log_blk . m_grid_width , candidate . m_coded_log_blk . m_grid_height ) < = 4 )
candidate_mse * = COMPLEX_BLOCK_WEIGHT_GRID_4X4_MSE_PENALTY ;
}
}
float mode_penalty = 1.0f ;
if ( candidate . m_encoding_type = = encoding_type : : cSolid )
mode_penalty * = SOLID_PENALTY ;
else if ( candidate . m_encoding_type = = encoding_type : : cReuse )
mode_penalty * = REUSE_PENALTY ;
else if ( candidate . m_encoding_type = = encoding_type : : cRun )
mode_penalty * = ( complex_block ? RUN_PENALTY * 2.0f : RUN_PENALTY ) ;
float candidate_bits = ( float ) candidate . m_coder . get_total_bits ( ) ;
float candidate_d = candidate_mse * mode_penalty ;
const float D_POWER = 2.0f ;
float candidate_t = perceptual_scale * powf ( candidate_d , D_POWER ) + candidate_bits * ( global_cfg . m_lambda * 1000.0f ) ;
candidate . m_t = candidate_t ;
candidate . m_d = candidate_d ;
candidate . m_bits = candidate_bits ;
if ( candidate_t < best_t )
{
best_t = candidate_t ;
best_candidate_index = candidate_iter ;
}
} // candidate_iter
if ( global_cfg . m_gaussian1_fallback & & ( outer_pass = = 0 ) & & ( very_complex_block ) & & ( best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH1_D_SSIM ) )
{
debug_state . m_total_gaussian1_blocks . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
const float block_y_contrast_ratio = block_hy / ( block_ly + .00000125f ) ;
if ( global_cfg . m_gaussian2_fallback & & ( comp_level > = 1 ) & & ( outer_pass = = 1 ) & & ( very_complex_block ) & & ( best_d_ssim > SWITCH_TO_GAUSSIAN_FILTERED_THRESH2_D_SSIM ) & &
( block_hy > = 18.0f ) & & ( block_y_contrast_ratio > 150.0f ) & &
( block_avg_y > = 1.5f ) )
{
debug_state . m_total_gaussian2_blocks . fetch_add ( 1 , std : : memory_order_relaxed ) ;
continue ;
}
if ( global_cfg . m_rdo_candidate_diversity_boost )
{
// candidate diversity boosting - consider candidates along/near the Pareto front
const candidate_encoding & comp_candidate = candidates [ best_candidate_index ] ;
float best_d = BIG_FLOAT_VAL ;
for ( uint32_t candidate_iter = 0 ; candidate_iter < candidates . size_u32 ( ) ; candidate_iter + + )
{
const auto & candidate = candidates [ candidate_iter ] ;
if ( candidate . m_bits < = comp_candidate . m_bits * global_cfg . m_rdo_candidate_diversity_boost_bit_window_weight )
{
if ( candidate . m_d < best_d )
{
best_d = candidate . m_d ;
best_candidate_index = candidate_iter ;
}
}
}
}
// candidate JND optimization - if there's a cheaper to code candidate that is nearly equivalent visually to the best candidate chose, choose that
if ( global_cfg . m_jnd_optimization )
{
const candidate_encoding & cur_comp_candidate = candidates [ best_candidate_index ] ;
float new_best_candidate_bits = BIG_FLOAT_VAL ;
int new_best_candidate_index = - 1 ;
for ( uint32_t candidate_iter = 0 ; candidate_iter < candidates . size_u32 ( ) ; candidate_iter + + )
{
if ( ( int ) candidate_iter = = best_candidate_index )
continue ;
const auto & candidate = candidates [ candidate_iter ] ;
if ( candidate . m_bits > = cur_comp_candidate . m_bits )
continue ;
float max_delta_itp = 0.0f ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
{
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
{
float delta_itp = compute_pixel_delta_itp ( cur_comp_candidate . m_comp_pixels_itp [ y ] [ x ] , candidate . m_comp_pixels_itp [ y ] [ x ] , block_pixels_as_itp [ y ] [ x ] , global_cfg . m_delta_itp_dark_adjustment ) ;
max_delta_itp = maximum ( max_delta_itp , delta_itp ) ;
if ( max_delta_itp > = global_cfg . m_jnd_delta_itp_thresh )
goto skip ;
}
}
skip :
if ( max_delta_itp > = global_cfg . m_jnd_delta_itp_thresh )
continue ;
if ( candidate . m_bits < new_best_candidate_bits )
{
new_best_candidate_bits = candidate . m_bits ;
new_best_candidate_index = candidate_iter ;
}
}
if ( new_best_candidate_index ! = - 1 )
{
best_candidate_index = new_best_candidate_index ;
debug_state . m_total_jnd_replacements . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
}
} // if (lambda == 0.0f)
if ( global_cfg . m_debug_images )
{
std : : lock_guard < std : : mutex > lck ( debug_state . m_stat_vis_mutex ) ;
debug_state . m_stat_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , vec4F ( best_d_ssim , max_std_dev , lowpass_std_dev , 1.0f ) ) ;
}
if ( best_candidate_index < 0 )
{
assert ( best_candidate_index > = 0 ) ;
fmt_error_printf ( " No candidates! \n " ) ;
return false ;
}
const auto & best_candidate = candidates [ best_candidate_index ] ;
assert ( best_candidate . m_encoding_type ! = encoding_type : : cInvalid ) ;
if ( best_candidate . m_encoding_type = = encoding_type : : cRun )
{
if ( ! prev_run_len )
{
if ( prev_encoding . get_total_bits ( ) )
{
# if SYNC_MARKERS
strip_coded_bits . put_bits ( 0xDEAD , 16 ) ;
# endif
strip_coded_bits . append ( prev_encoding ) ;
}
assert ( best_candidate . m_coder . get_total_bits ( ) ) ;
prev_encoding = best_candidate . m_coder ;
prev_run_len = 1 ;
}
else
{
prev_run_len + + ;
const uint32_t prev_run_bits = prev_encoding . get_total_bits_u32 ( ) ;
assert ( prev_run_bits ) ;
BASISU_NOTE_UNUSED ( prev_run_bits ) ;
const uint32_t num_dummy_bits = best_candidate . m_coder . get_total_bits_u32 ( ) ;
BASISU_NOTE_UNUSED ( num_dummy_bits ) ;
// Rewrite the previous encoding to extend the run length.
prev_encoding . restart ( ) ;
prev_encoding . put_bits ( RUN_CODE , RUN_CODE_LEN ) ;
prev_encoding . put_vlc ( prev_run_len - 1 , 5 ) ;
assert ( prev_encoding . get_total_bits ( ) = = prev_run_bits + num_dummy_bits ) ;
}
}
else
{
if ( prev_encoding . get_total_bits ( ) )
{
# if SYNC_MARKERS
strip_coded_bits . put_bits ( 0xDEAD , 16 ) ;
# endif
strip_coded_bits . append ( prev_encoding ) ;
}
prev_encoding = best_candidate . m_coder ;
prev_run_len = 0 ;
}
memcpy ( prev_comp_pixels , best_candidate . m_comp_pixels , sizeof ( vec3F ) * BLOCK_W * BLOCK_H ) ;
prev_candidate_encoding = best_candidate ;
if ( best_candidate . m_encoding_type ! = encoding_type : : cRun )
prev_non_run_candidate_encoding = best_candidate ;
{
std : : lock_guard < std : : mutex > lck ( debug_state . m_stats_mutex ) ;
debug_state . m_encoding_type_hist [ ( uint32_t ) best_candidate . m_encoding_type ] + + ;
if ( best_candidate . m_encoding_type = = encoding_type : : cBlock )
{
debug_state . m_endpoint_mode_hist [ ( uint32_t ) best_candidate . m_endpoint_mode ] + + ;
}
if ( ( best_candidate . m_encoding_type = = encoding_type : : cReuse ) | | ( best_candidate . m_encoding_type = = encoding_type : : cBlock ) )
{
const uint32_t bm_index = ( uint32_t ) best_candidate . m_block_mode ;
assert ( bm_index < ( uint32_t ) block_mode : : cBMTotalModes ) ;
debug_state . m_block_mode_hist [ bm_index ] + + ;
debug_state . m_block_mode_total_bits [ bm_index ] + = best_candidate . m_coder . get_total_bits ( ) ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
debug_state . m_block_mode_comp_stats [ bm_index ] [ i ] . push_back ( half_comp_stats [ i ] ) ;
debug_state . m_block_mode_comparative_stats [ bm_index ] [ i ] . push_back ( half_cross_chan_stats [ i ] ) ;
}
}
if ( best_candidate . m_encoding_type = = encoding_type : : cReuse )
{
debug_state . m_reuse_num_parts [ best_candidate . m_coded_log_blk . m_num_partitions ] . fetch_add ( 1 , std : : memory_order_relaxed ) ;
if ( best_candidate . m_coded_log_blk . m_dual_plane )
debug_state . m_reuse_total_dp . fetch_add ( 1 , std : : memory_order_relaxed ) ;
}
}
enc_state . coded_blocks ( bx , by ) = prev_non_run_candidate_encoding ;
// Update decoded image
vec4F decoded_float_pixels [ BLOCK_H ] [ BLOCK_W ] ;
for ( uint32_t y = 0 ; y < BLOCK_H ; y + + )
for ( uint32_t x = 0 ; x < BLOCK_W ; x + + )
decoded_float_pixels [ y ] [ x ] = best_candidate . m_comp_pixels [ y ] [ x ] ;
enc_state . packed_img . set_block_clipped ( ( vec4F * ) decoded_float_pixels , bx * BLOCK_W , by * BLOCK_H , BLOCK_W , BLOCK_H ) ;
status = astc_helpers : : pack_astc_block ( enc_state . final_astc_blocks ( bx , by ) , best_candidate . m_decomp_log_blk , nullptr , nullptr ) ;
if ( ! status )
{
fmt_error_printf ( " Failed packing block \n " ) ;
return false ;
}
const uint32_t r = debug_state . m_total_blocks_compressed . fetch_add ( 1 , std : : memory_order_relaxed ) ;
if ( ( r & 2047 ) = = 2047 )
{
if ( global_cfg . m_status_output )
{
basisu : : fmt_printf ( " {} of {} total blocks compressed, {3.2}% \n " , r , total_blocks , ( r * 100.0f ) / total_blocks ) ;
}
}
if ( ( global_cfg . m_debug_images ) & &
( ( best_candidate . m_encoding_type ! = encoding_type : : cRun ) & & ( best_candidate . m_encoding_type ! = encoding_type : : cSolid ) ) )
{
std : : lock_guard < std : : mutex > lck ( debug_state . m_vis_image_mutex ) ;
if ( best_candidate . m_decomp_log_blk . m_num_partitions = = 2 )
{
const int part2_unique_index = g_part2_seed_to_unique_index [ best_candidate . m_decomp_log_blk . m_partition_id ] ;
assert ( ( part2_unique_index > = 0 ) & & ( part2_unique_index < ( int ) NUM_UNIQUE_PARTITIONS2 ) ) ;
const partition_pattern_vec & pat = g_partitions2 [ part2_unique_index ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
const uint32_t p = pat [ x + y * 6 ] ;
debug_state . m_part_vis . set_clipped ( bx * 6 + x , by * 6 + y , color_rgba ( p ? 100 : 0 , 128 , p ? 100 : 0 , 255 ) ) ;
} // x
} // y
}
else if ( best_candidate . m_decomp_log_blk . m_num_partitions = = 3 )
{
//part_vis.fill_box(bx * 6, by * 6, 6, 6, color_rgba(0, 0, 255, 255));
const int part3_unique_index = g_part3_seed_to_unique_index [ best_candidate . m_decomp_log_blk . m_partition_id ] ;
assert ( ( part3_unique_index > = 0 ) & & ( part3_unique_index < ( int ) NUM_UNIQUE_PARTITIONS3 ) ) ;
const partition_pattern_vec & pat = g_partitions3 [ part3_unique_index ] ;
for ( uint32_t y = 0 ; y < 6 ; y + + )
{
for ( uint32_t x = 0 ; x < 6 ; x + + )
{
const uint32_t p = pat [ x + y * 6 ] ;
color_rgba c ( 0 , 0 , 150 , 255 ) ;
if ( p = = 1 )
c . set ( 100 , 0 , 150 , 255 ) ;
else if ( p = = 2 )
c . set ( 0 , 100 , 150 , 255 ) ;
debug_state . m_part_vis . set_clipped ( bx * 6 + x , by * 6 + y , c ) ;
} // x
} // y
}
else if ( best_candidate . m_decomp_log_blk . m_dual_plane )
{
debug_state . m_part_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , color_rgba ( 255 , 0 , 255 , 255 ) ) ;
}
else
{
debug_state . m_part_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , color_rgba ( 255 , 0 , 0 , 255 ) ) ;
}
color_rgba c ;
c . set ( ( best_candidate . m_coded_log_blk . m_grid_width * best_candidate . m_coded_log_blk . m_grid_height * 255 + 18 ) / 36 ) ;
debug_state . m_grid_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , c ) ;
c . set ( 0 , 0 , 0 , 255 ) ;
if ( complex_block )
c [ 0 ] = 255 ;
if ( very_complex_block )
c [ 1 ] = 255 ;
if ( outer_pass = = 2 )
c [ 2 ] = 255 ;
else if ( outer_pass = = 1 )
c [ 2 ] = 128 ;
debug_state . m_mode_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , c ) ;
c . set ( 0 , 255 , 0 , 255 ) ;
if ( best_candidate . m_coded_log_blk . m_color_endpoint_modes [ 0 ] = = 7 )
c . set ( 255 , 0 , 0 , 255 ) ;
debug_state . m_mode_vis2 . fill_box ( bx * 6 , by * 6 , 6 , 6 , c ) ;
switch ( best_candidate . m_encoding_type )
{
case encoding_type : : cRun :
c . set ( 0 , 0 , 0 , 255 ) ;
break ;
case encoding_type : : cSolid :
c . set ( 128 , 128 , 128 , 255 ) ; // dark grey
break ;
case encoding_type : : cReuse :
c . set ( 255 , 255 , 0 , 255 ) ; // yellow
break ;
case encoding_type : : cBlock :
{
switch ( best_candidate . m_endpoint_mode )
{
case endpoint_mode : : cRaw :
c . set ( 255 , 0 , 0 , 255 ) ; // red
break ;
case endpoint_mode : : cUseLeft :
c . set ( 0 , 0 , 255 , 255 ) ; // blue
break ;
case endpoint_mode : : cUseUpper :
c . set ( 0 , 0 , 192 , 255 ) ; // darker blue
break ;
case endpoint_mode : : cUseLeftDelta :
c . set ( 0 , 255 , 0 , 255 ) ; // green
break ;
case endpoint_mode : : cUseUpperDelta :
c . set ( 0 , 192 , 0 , 255 ) ; // darker green
break ;
default :
break ;
}
break ;
}
default :
break ;
}
if ( filtered_x_err < filtered_y_err )
c [ 3 ] = 0 ;
else
c [ 3 ] = 255 ;
debug_state . m_enc_vis . fill_box ( bx * 6 , by * 6 , 6 , 6 , c ) ;
}
break ;
} // outer_pass
} // bx
} // by
if ( prev_encoding . get_total_bits ( ) )
{
# if SYNC_MARKERS
strip_coded_bits . put_bits ( 0xDEAD , 16 ) ;
# endif
strip_coded_bits . append ( prev_encoding ) ;
}
return true ;
}
bool g_initialized = false ;
void global_init ( )
{
if ( g_initialized )
return ;
interval_timer tm ;
tm . start ( ) ;
init_pq_tables ( ) ;
init_partitions2_6x6 ( ) ;
init_partitions3_6x6 ( ) ;
init_contrib_lists ( ) ;
g_initialized = true ;
//fmt_printf("astc_6x6_hdr::global_init() total time: {}\n", tm.get_elapsed_secs());
}
bool compress_photo ( const basisu : : imagef & orig_src_img , const astc_hdr_6x6_global_config & orig_global_cfg , job_pool * pJob_pool ,
basisu : : uint8_vec & intermediate_tex_data , basisu : : uint8_vec & astc_tex_data , result_metrics & metrics )
{
assert ( g_initialized ) ;
if ( ! g_initialized )
return false ;
assert ( pJob_pool ) ;
if ( orig_global_cfg . m_debug_output )
{
fmt_debug_printf ( " ------ astc_6x6_hdr::compress_photo: \n " ) ;
fmt_debug_printf ( " Source image dimensions: {}x{} \n " , orig_src_img . get_width ( ) , orig_src_img . get_height ( ) ) ;
fmt_debug_printf ( " Job pool total threads: {} \n " , ( uint64_t ) pJob_pool - > get_total_threads ( ) ) ;
orig_global_cfg . print ( ) ;
}
if ( ! orig_src_img . get_width ( ) | | ! orig_src_img . get_height ( ) )
{
assert ( false ) ;
fmt_error_printf ( " compress_photo: Invalid source image \n " ) ;
return false ;
}
astc_hdr_6x6_global_config global_cfg ( orig_global_cfg ) ;
uastc_hdr_6x6_encode_state enc_state ;
enc_state . master_coptions . m_q_log_bias = Q_LOG_BIAS_6x6 ;
enc_state . src_img = orig_src_img ;
//src_img.crop(256, 256);
const uint32_t width = enc_state . src_img . get_width ( ) ;
const uint32_t height = enc_state . src_img . get_height ( ) ;
const uint32_t num_blocks_x = enc_state . src_img . get_block_width ( BLOCK_W ) ;
const uint32_t num_blocks_y = enc_state . src_img . get_block_height ( BLOCK_H ) ;
const uint32_t total_blocks = num_blocks_x * num_blocks_y ;
for ( uint32_t y = 0 ; y < height ; y + + )
{
for ( uint32_t x = 0 ; x < width ; x + + )
{
for ( uint32_t c = 0 ; c < 3 ; c + + )
{
float f = enc_state . src_img ( x , y ) [ c ] ;
if ( std : : isinf ( f ) | | std : : isnan ( f ) | | ( f < 0.0f ) )
f = 0 ;
else if ( f > basist : : ASTC_HDR_MAX_VAL )
f = basist : : ASTC_HDR_MAX_VAL ;
enc_state . src_img ( x , y ) [ c ] = f ;
} // c
} // x
} // y
if ( global_cfg . m_debug_images )
{
write_exr ( ( global_cfg . m_debug_image_prefix + " orig.exr " ) . c_str ( ) , enc_state . src_img , 3 , 0 ) ;
}
image src_img_compressed ;
tonemap_image_compressive2 ( src_img_compressed , enc_state . src_img ) ;
if ( global_cfg . m_debug_images )
{
save_png ( global_cfg . m_debug_image_prefix + " compressive_tone_map.png " , src_img_compressed ) ;
}
smooth_map_params rp ;
rp . m_debug_images = global_cfg . m_debug_images ;
if ( global_cfg . m_lambda ! = 0.0f )
{
if ( global_cfg . m_status_output )
fmt_printf ( " Creating RDO perceptual weighting maps \n " ) ;
create_smooth_maps2 ( enc_state . smooth_block_mse_scales , src_img_compressed , rp ) ;
}
if ( global_cfg . m_status_output )
fmt_printf ( " Blurring image \n " ) ;
enc_state . src_img_filtered1 . resize ( width , height ) ;
image_resample ( enc_state . src_img , enc_state . src_img_filtered1 , " gaussian " , global_cfg . m_gaussian1_strength ) ; //1.45f);
enc_state . src_img_filtered2 . resize ( width , height ) ;
image_resample ( enc_state . src_img , enc_state . src_img_filtered2 , " gaussian " , global_cfg . m_gaussian2_strength ) ; //1.83f);
if ( global_cfg . m_debug_images )
{
write_exr ( ( global_cfg . m_debug_image_prefix + " blurred1.exr " ) . c_str ( ) , enc_state . src_img_filtered1 , 3 , 0 ) ;
write_exr ( ( global_cfg . m_debug_image_prefix + " blurred2.exr " ) . c_str ( ) , enc_state . src_img_filtered2 , 3 , 0 ) ;
}
if ( global_cfg . m_status_output )
fmt_printf ( " Transforming to ITP \n " ) ;
enc_state . src_img_itp . resize ( width , height ) ;
convet_rgb_image_to_itp ( enc_state . src_img , enc_state . src_img_itp , global_cfg ) ;
enc_state . src_img_filtered1_itp . resize ( width , height ) ;
convet_rgb_image_to_itp ( enc_state . src_img_filtered1 , enc_state . src_img_filtered1_itp , global_cfg ) ;
enc_state . src_img_filtered2_itp . resize ( width , height ) ;
convet_rgb_image_to_itp ( enc_state . src_img_filtered2 , enc_state . src_img_filtered2_itp , global_cfg ) ;
if ( global_cfg . m_lambda = = 0.0f )
global_cfg . m_favor_higher_compression = false ;
uint32_t total_strips = 0 , rows_per_strip = 0 ;
if ( ! calc_strip_size ( global_cfg . m_lambda , num_blocks_y , ( uint32_t ) pJob_pool - > get_total_threads ( ) , global_cfg . m_force_one_strip , total_strips , rows_per_strip , global_cfg ) )
{
fmt_error_printf ( " compress_photo: Failed computing strip sizes \n " ) ;
return false ;
}
if ( global_cfg . m_debug_output )
fmt_printf ( " lambda: {}, comp_level: {}, highest_comp_level: {}, extra patterns: {} \n " , global_cfg . m_lambda , global_cfg . m_master_comp_level , global_cfg . m_highest_comp_level , global_cfg . m_extra_patterns_flag ) ;
enc_state . coded_blocks . resize ( num_blocks_x , num_blocks_y ) ;
bitwise_coder coded_bits ;
coded_bits . put_bits ( 0xABCD , 16 ) ;
coded_bits . put_bits ( width , 16 ) ;
coded_bits . put_bits ( height , 16 ) ;
enc_state . packed_img . resize ( width , height ) ;
enc_state . strip_bits . resize ( total_strips ) ;
enc_state . final_astc_blocks . resize ( num_blocks_x , num_blocks_y ) ;
uastc_hdr_6x6_debug_state debug_state ;
if ( global_cfg . m_debug_images )
debug_state . init ( width , height ) ;
else
debug_state . init ( 0 , 0 ) ;
interval_timer tm ;
tm . start ( ) ;
std : : atomic_bool any_failed_flag ;
any_failed_flag . store ( false ) ;
for ( uint32_t strip_index = 0 ; strip_index < total_strips ; strip_index + + )
{
const uint32_t strip_first_by = strip_index * rows_per_strip ;
uint32_t strip_last_by = minimum < uint32_t > ( strip_first_by + rows_per_strip - 1 , num_blocks_y ) ;
if ( strip_index = = ( total_strips - 1 ) )
strip_last_by = num_blocks_y - 1 ;
pJob_pool - > add_job ( [ & any_failed_flag , & global_cfg , & debug_state , & enc_state ,
strip_index , total_strips , strip_first_by , strip_last_by ,
num_blocks_x , num_blocks_y , total_blocks , width , height ]
{
if ( ! any_failed_flag )
{
bool status = compress_strip_task (
strip_index , total_strips , strip_first_by , strip_last_by ,
num_blocks_x , num_blocks_y , total_blocks , width , height ,
global_cfg , debug_state , enc_state ) ;
if ( ! status )
{
fmt_error_printf ( " compress_photo: compress_strip_task() failed \n " ) ;
any_failed_flag . store ( true , std : : memory_order_relaxed ) ;
}
}
} ) ;
if ( any_failed_flag )
break ;
} // strip_index
pJob_pool - > wait_for_all ( ) ;
if ( any_failed_flag )
{
fmt_error_printf ( " One or more strips failed during compression \n " ) ;
return false ;
}
if ( global_cfg . m_debug_output )
fmt_printf ( " Encoding time: {} secs \n " , tm . get_elapsed_secs ( ) ) ;
if ( global_cfg . m_debug_output )
debug_state . print ( total_blocks ) ;
if ( global_cfg . m_debug_images )
{
save_png ( global_cfg . m_debug_image_prefix + " part_vis.png " , debug_state . m_part_vis ) ;
save_png ( global_cfg . m_debug_image_prefix + " grid_vis.png " , debug_state . m_grid_vis ) ;
save_png ( global_cfg . m_debug_image_prefix + " mode_vis.png " , debug_state . m_mode_vis ) ;
save_png ( global_cfg . m_debug_image_prefix + " mode_vis2.png " , debug_state . m_mode_vis2 ) ;
save_png ( global_cfg . m_debug_image_prefix + " enc_vis.png " , debug_state . m_enc_vis ) ;
write_exr ( ( global_cfg . m_debug_image_prefix + " stat_vis.exr " ) . c_str ( ) , debug_state . m_stat_vis , 3 , 0 ) ;
}
for ( uint32_t i = 0 ; i < total_strips ; i + + )
coded_bits . append ( enc_state . strip_bits [ i ] ) ;
coded_bits . put_bits ( 0xA742 , 16 ) ;
coded_bits . flush ( ) ;
if ( global_cfg . m_output_images )
{
write_exr ( ( global_cfg . m_output_image_prefix + " comp.exr " ) . c_str ( ) , enc_state . packed_img , 3 , 0 ) ;
}
if ( global_cfg . m_debug_output )
fmt_printf ( " \n Total intermediate output bits/pixel: {3.4} \n " , ( float ) coded_bits . get_total_bits ( ) / ( float ) ( width * height ) ) ;
vector2D < astc_helpers : : astc_block > decoded_blocks1 ;
vector2D < astc_helpers : : astc_block > decoded_blocks2 ;
if ( global_cfg . m_debug_output )
fmt_printf ( " decode_file \n " ) ;
uint32_t unpacked_width = 0 , unpacked_height = 0 ;
bool status = decode_file ( coded_bits . get_bytes ( ) , decoded_blocks1 , unpacked_width , unpacked_height ) ;
if ( ! status )
{
fmt_error_printf ( " decode_file() failed \n " ) ;
return false ;
}
if ( global_cfg . m_debug_output )
fmt_printf ( " decode_6x6_hdr \n " ) ;
status = decode_6x6_hdr ( coded_bits . get_bytes ( ) . get_ptr ( ) , coded_bits . get_bytes ( ) . size_in_bytes_u32 ( ) , decoded_blocks2 , unpacked_width , unpacked_height ) ;
if ( ! status )
{
fmt_error_printf ( " decode_6x6_hdr_file() failed \n " ) ;
return false ;
}
if ( ( enc_state . final_astc_blocks . get_width ( ) ! = decoded_blocks1 . get_width ( ) ) | |
( enc_state . final_astc_blocks . get_height ( ) ! = decoded_blocks1 . get_height ( ) ) )
{
fmt_error_printf ( " Decode size mismatch with decode_file \n " ) ;
return false ;
}
if ( ( enc_state . final_astc_blocks . get_width ( ) ! = decoded_blocks2 . get_width ( ) ) | |
( enc_state . final_astc_blocks . get_height ( ) ! = decoded_blocks2 . get_height ( ) ) )
{
fmt_error_printf ( " Decode size mismatch with decode_6x6_hdr_file \n " ) ;
return false ;
}
if ( memcmp ( decoded_blocks1 . get_ptr ( ) , enc_state . final_astc_blocks . get_ptr ( ) , decoded_blocks1 . size_in_bytes ( ) ) ! = 0 )
{
fmt_error_printf ( " Decoded ASTC blocks verification failed \n " ) ;
return false ;
}
if ( memcmp ( decoded_blocks2 . get_ptr ( ) , enc_state . final_astc_blocks . get_ptr ( ) , decoded_blocks2 . size_in_bytes ( ) ) ! = 0 )
{
fmt_error_printf ( " Decoded ASTC blocks verification failed \n " ) ;
return false ;
}
if ( global_cfg . m_debug_output )
basisu : : fmt_printf ( " Decoded ASTC verification checks succeeded \n " ) ;
if ( global_cfg . m_output_images )
{
if ( write_astc_file ( ( global_cfg . m_output_image_prefix + " decoded.astc " ) . c_str ( ) , decoded_blocks1 . get_ptr ( ) , BLOCK_W , BLOCK_H , width , height ) )
{
basisu : : platform_sleep ( 20 ) ;
uint8_vec astc_file_data ;
if ( read_file_to_vec ( ( global_cfg . m_output_image_prefix + " decoded.astc " ) . c_str ( ) , astc_file_data ) )
{
if ( astc_file_data . size ( ) > 16 )
{
astc_file_data . erase ( 0 , 16 ) ;
size_t comp_size = 0 ;
void * pComp_data = tdefl_compress_mem_to_heap ( & astc_file_data [ 0 ] , astc_file_data . size ( ) , & comp_size , TDEFL_MAX_PROBES_MASK ) ;
mz_free ( pComp_data ) ;
if ( global_cfg . m_debug_output )
{
fmt_printf ( " .ASTC file size (less header): {}, bits/pixel: {}, Deflate bits/pixel: {} \n " ,
( uint64_t ) astc_file_data . size ( ) ,
( float ) astc_file_data . size ( ) * 8.0f / ( float ) ( width * height ) ,
( float ) comp_size * 8.0f / ( float ) ( width * height ) ) ;
}
}
}
}
}
// Must decode all the blocks (even padded rows/cols) to match what the transcoder does.
imagef unpacked_astc_img ( num_blocks_x * 6 , num_blocks_y * 6 ) ;
imagef unpacked_astc_google_img ( num_blocks_x * 6 , num_blocks_y * 6 ) ;
for ( uint32_t y = 0 ; y < decoded_blocks1 . get_height ( ) ; y + + )
{
for ( uint32_t x = 0 ; x < decoded_blocks1 . get_width ( ) ; x + + )
{
const auto & phys_blk = decoded_blocks1 ( x , y ) ;
vec4F pixels [ MAX_BLOCK_W * MAX_BLOCK_H ] ;
status = unpack_physical_astc_block ( & phys_blk , BLOCK_W , BLOCK_H , pixels ) ;
if ( ! status )
{
fmt_error_printf ( " unpack_physical_astc_block() failed \n " ) ;
return false ;
}
unpacked_astc_img . set_block_clipped ( pixels , x * BLOCK_W , y * BLOCK_H , BLOCK_W , BLOCK_H ) ;
vec4F pixels_google [ MAX_BLOCK_W * MAX_BLOCK_H ] ;
status = unpack_physical_astc_block_google ( & phys_blk , BLOCK_W , BLOCK_H , pixels_google ) ;
if ( ! status )
{
fmt_error_printf ( " unpack_physical_astc_block_google() failed \n " ) ;
return false ;
}
unpacked_astc_google_img . set_block_clipped ( pixels_google , x * BLOCK_W , y * BLOCK_H , BLOCK_W , BLOCK_H ) ;
for ( uint32_t i = 0 ; i < 36 ; i + + )
{
if ( pixels [ i ] ! = pixels_google [ i ] )
{
fmt_error_printf ( " pixel unpack mismatch \n " ) ;
return false ;
}
}
}
}
if ( global_cfg . m_debug_output )
fmt_printf ( " \n Unpack succeeded \n " ) ;
imagef unpacked_bc6h_img ;
{
vector2D < basist : : bc6h_block > bc6h_blocks ;
fast_bc6h_params enc_params ;
bool pack_status = pack_bc6h_image ( unpacked_astc_img , bc6h_blocks , & unpacked_bc6h_img , enc_params ) ;
if ( ! pack_status )
{
fmt_error_printf ( " pack_bc6h_image() failed! " ) ;
return false ;
}
unpacked_bc6h_img . crop ( width , height ) ;
if ( global_cfg . m_output_images )
{
write_exr ( ( global_cfg . m_output_image_prefix + " unpacked_bc6h.exr " ) . c_str ( ) , unpacked_bc6h_img , 3 , 0 ) ;
}
}
unpacked_astc_img . crop ( width , height ) ;
unpacked_astc_google_img . crop ( width , height ) ;
if ( global_cfg . m_output_images )
{
write_exr ( ( global_cfg . m_output_image_prefix + " unpacked_astc.exr " ) . c_str ( ) , unpacked_astc_img , 3 , 0 ) ;
write_exr ( ( global_cfg . m_output_image_prefix + " unpacked_google_astc.exr " ) . c_str ( ) , unpacked_astc_google_img , 3 , 0 ) ;
}
// ASTC metrics
if ( global_cfg . m_image_stats )
{
image_metrics im ;
if ( global_cfg . m_debug_output )
printf ( " \n ASTC log2 float error metrics: \n " ) ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
im . calc ( enc_state . src_img , unpacked_astc_img , i , 1 , true , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " %c: " , " RGBA " [ i ] ) ;
im . print_hp ( ) ;
}
}
metrics . m_im_astc_log2 . calc ( enc_state . src_img , unpacked_astc_img , 0 , 3 , true , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " RGB: " ) ;
metrics . m_im_astc_log2 . print_hp ( ) ;
printf ( " \n " ) ;
}
}
if ( global_cfg . m_image_stats )
{
image_metrics im ;
if ( global_cfg . m_debug_output )
printf ( " ASTC half float space error metrics (a piecewise linear approximation of log2 error): \n " ) ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
im . calc_half ( enc_state . src_img , unpacked_astc_img , i , 1 , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " %c: " , " RGBA " [ i ] ) ;
im . print_hp ( ) ;
}
}
metrics . m_im_astc_half . calc_half ( enc_state . src_img , unpacked_astc_img , 0 , 3 , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " RGB: " ) ;
metrics . m_im_astc_half . print_hp ( ) ;
}
}
// BC6H metrics
if ( global_cfg . m_image_stats )
{
image_metrics im ;
if ( global_cfg . m_debug_output )
printf ( " \n BC6H log2 float error metrics: \n " ) ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
im . calc ( enc_state . src_img , unpacked_bc6h_img , i , 1 , true , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " %c: " , " RGBA " [ i ] ) ;
im . print_hp ( ) ;
}
}
metrics . m_im_bc6h_log2 . calc ( enc_state . src_img , unpacked_bc6h_img , 0 , 3 , true , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " RGB: " ) ;
metrics . m_im_bc6h_log2 . print_hp ( ) ;
printf ( " \n " ) ;
}
}
if ( global_cfg . m_image_stats )
{
image_metrics im ;
if ( global_cfg . m_debug_output )
printf ( " BC6H half float space error metrics (a piecewise linear approximation of log2 error): \n " ) ;
for ( uint32_t i = 0 ; i < 3 ; i + + )
{
im . calc_half ( enc_state . src_img , unpacked_bc6h_img , i , 1 , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " %c: " , " RGBA " [ i ] ) ;
im . print_hp ( ) ;
}
}
metrics . m_im_bc6h_half . calc_half ( enc_state . src_img , unpacked_bc6h_img , 0 , 3 , true ) ;
if ( global_cfg . m_debug_output )
{
printf ( " RGB: " ) ;
metrics . m_im_bc6h_half . print_hp ( ) ;
printf ( " \n " ) ;
}
}
intermediate_tex_data . swap ( coded_bits . get_bytes ( ) ) ;
astc_tex_data . resize ( decoded_blocks1 . size_in_bytes ( ) ) ;
memcpy ( astc_tex_data . data ( ) , decoded_blocks1 . get_ptr ( ) , decoded_blocks1 . size_in_bytes ( ) ) ;
return true ;
}
} // namespace astc_6x6_hdr