Merge pull request #103968 from Chubercik/basis_universal-1.60

basis_universal: Update to 1.60
2025-12-08 06:09:55 +00:00 · 2025-04-27 19:21:17 -05:00 · 2025-04-27 19:21:17 -05:00 · be994d59c9
commit be994d59c9
parent 69f0eb5de5 246b062bd6
43 changed files with 29563 additions and 6572 deletions
--- a/modules/basis_universal/SCsub
+++ b/modules/basis_universal/SCsub
@ -20,22 +20,24 @@ basisu_encoder = env.editor_build
 if basisu_encoder:
    encoder_sources = [
        "3rdparty/android_astc_decomp.cpp",
-        "basisu_astc_hdr_enc.cpp",
+        "basisu_astc_hdr_6x6_enc.cpp",
+        "basisu_astc_hdr_common.cpp",
        "basisu_backend.cpp",
        "basisu_basis_file.cpp",
        "basisu_bc7enc.cpp",
-        "basisu_opencl.cpp",
        "basisu_comp.cpp",
        "basisu_enc.cpp",
        "basisu_etc.cpp",
        "basisu_frontend.cpp",
        "basisu_gpu_texture.cpp",
        "basisu_kernels_sse.cpp",
+        "basisu_opencl.cpp",
        "basisu_pvrtc1_4.cpp",
-        "basisu_resampler.cpp",
        "basisu_resample_filters.cpp",
+        "basisu_resampler.cpp",
        "basisu_ssim.cpp",
        "basisu_uastc_enc.cpp",
+        "basisu_uastc_hdr_4x4_enc.cpp",
        "pvpngreader.cpp",
    ]
    encoder_sources = [thirdparty_dir + "encoder/" + file for file in encoder_sources]
--- a/modules/basis_universal/image_compress_basisu.cpp
+++ b/modules/basis_universal/image_compress_basisu.cpp
@ -101,13 +101,13 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 	basisu::basis_compressor_params params;

 	params.m_uastc = true;
-	params.m_quality_level = basisu::BASISU_QUALITY_MIN;
-	params.m_pack_uastc_flags &= ~basisu::cPackUASTCLevelMask;
-	params.m_pack_uastc_flags |= basisu::cPackUASTCLevelFastest;
+	params.m_etc1s_quality_level = basisu::BASISU_QUALITY_MIN;
+	params.m_pack_uastc_ldr_4x4_flags &= ~basisu::cPackUASTCLevelMask;
+	params.m_pack_uastc_ldr_4x4_flags |= basisu::cPackUASTCLevelFastest;

-	params.m_rdo_uastc = 0.0f;
-	params.m_rdo_uastc_quality_scalar = 0.0f;
-	params.m_rdo_uastc_dict_size = 1024;
+	params.m_rdo_uastc_ldr_4x4 = 0.0f;
+	params.m_rdo_uastc_ldr_4x4_quality_scalar = 0.0f;
+	params.m_rdo_uastc_ldr_4x4_dict_size = 1024;

 	params.m_mip_fast = true;
 	params.m_multithreading = true;
@ -127,7 +127,7 @@ Vector<uint8_t> basis_universal_packer(const Ref<Image> &p_image, Image::UsedCha
 	if (is_hdr) {
 		decompress_format = BASIS_DECOMPRESS_HDR_RGB;
 		params.m_hdr = true;
-		params.m_uastc_hdr_options.set_quality_level(0);
+		params.m_uastc_hdr_4x4_options.set_quality_level(0);

 	} else {
 		switch (p_channels) {
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@ -74,7 +74,7 @@ Files extracted from upstream source:
 ## basis_universal

 - Upstream: https://github.com/BinomialLLC/basis_universal
- Version: 1.50.0 (051ad6d8a64bb95a79e8601c317055fd1782ad3e, 2024)
+- Version: 1.60 (323239a6a5ffa57d6570cfc403be99156e33a8b0, 2025)
 - License: Apache 2.0

 Files extracted from upstream source:
@ -89,6 +89,8 @@ Patches:
 - `0002-external-jpgd.patch` (GH-88508)
 - `0003-external-tinyexr.patch` (GH-97582)
 - `0004-remove-tinydds-qoi.patch` (GH-97582)
+- `0005-windows-illegal-character.patch` (GH-103968)
+- `0006-ambiguous-calls.patch` (GH-103968)


 ## brotli
@ -523,6 +525,7 @@ Patches:

 - `0001-external-basisu.patch` (GH-76572)
 - `0002-disable-astc-block-ext.patch` (GH-76572)
+- `0003-basisu-1.60.patch` (GH-103968)


 ## libogg
--- a/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
+++ b/thirdparty/basis_universal/encoder/3rdparty/android_astc_decomp.cpp
@ -836,9 +836,11 @@ void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream&
    m[4]            = data.getNext(numBits);
    deUint32 T7     = data.getNext(1);

+#ifndef __EMSCRIPTEN__
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough="            
+#endif  
 #endif
    switch (numValues)
    {
@ -851,8 +853,10 @@ void decodeISETritBlock (ISEDecodedResult* dst, int numValues, BitAccessStream&
        default:
            DE_ASSERT(false);
    }
+#ifndef __EMSCRIPTEN__
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
+#endif 
 #endif

    const deUint32 T = (T7 << 7) | (T56 << 5) | (T4 << 4) | (T23 << 2) | (T01 << 0);
@ -898,9 +902,11 @@ void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream&
    m[2]            = data.getNext(numBits);
    deUint32 Q56    = data.getNext(2);

+#ifndef __EMSCRIPTEN__
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wimplicit-fallthrough="            
+#endif  
 #endif
    switch (numValues)
    {
@ -911,8 +917,10 @@ void decodeISEQuintBlock (ISEDecodedResult* dst, int numValues, BitAccessStream&
        default:
            DE_ASSERT(false);
    }
+#ifndef __EMSCRIPTEN__
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
+#endif 
 #endif

    const deUint32 Q = (Q56 << 5) | (Q34 << 3) | (Q012 << 0);
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.cpp
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h
@ -0,0 +1,129 @@
+// File: basisu_astc_hdr_6x6_enc.h
+#pragma once
+#include "basisu_enc.h"
+#include "../transcoder/basisu_astc_hdr_core.h"
+
+namespace astc_6x6_hdr
+{
+	const uint32_t ASTC_HDR_6X6_MAX_USER_COMP_LEVEL = 12;
+
+	const uint32_t ASTC_HDR_6X6_MAX_COMP_LEVEL = 4;
+	
+	const float LDR_BLACK_BIAS = 0.0f;// .49f;
+		
+	// Note: This struct is copied several times, so do not place any heavyweight objects in here.
+	struct astc_hdr_6x6_global_config
+	{
+		// Important: The Delta ITP colorspace error metric we use internally makes several assumptions about the nature of the HDR RGB inputs supplied to the encoder.
+		// This encoder computes colorspace error in the ICtCp (or more accurately the delta ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important.
+		// By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m^2), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light).
+		// If the m_rec2020_bt2100_color_gamut flag is true, the input colorspace is treated as REC 2020/BT.2100 (which is wider than 709).
+		// For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). 
+		// SDR upconversion to normalized [0,1] (i.e. non-absolute) luminances may work but is not supported because ITP errors will not be predicted correctly.
+		bool m_rec2020_bt2100_color_gamut = false; 
+
+		// levels 0-3 normal levels, 4=exhaustive
+		uint32_t m_master_comp_level = 0;
+		uint32_t m_highest_comp_level = 1;
+
+		float m_lambda = 0.0f;
+
+		bool m_extra_patterns_flag = false; // def to false, works in comp levels [1,4]
+		bool m_brute_force_partition_matching = false; // def to false
+
+		bool m_jnd_optimization = false; // defaults to false for HDR inputs, on SDR upconverted images this can default to enabled
+		float m_jnd_delta_itp_thresh = .75f;
+
+		bool m_force_one_strip = false;
+				
+		bool m_gaussian1_fallback = true; // def to true, if this is disabled m_gaussian2_fallback should be disabled too
+		float m_gaussian1_strength = 1.45f;
+
+		bool m_gaussian2_fallback = true; // def to true, hopefully rarely kicks in
+		float m_gaussian2_strength = 1.83f;
+				
+		// m_disable_delta_endpoint_usage may give a slight increase in RDO ASTC encoding efficiency. It's also faster.
+		bool m_disable_delta_endpoint_usage = false;
+
+		// Scale up Delta ITP errors for very dark pixels, assuming they will be brightly exposed > 1.0x.
+		// We don't know if the output will be exposed, or not. If heavily exposed, our JND calculations will not be conservative enough.
+		bool m_delta_itp_dark_adjustment = true;
+
+		bool m_debug_images = false;
+		std::string m_debug_image_prefix = "dbg_astc_hdr_6x6_devel_";
+
+		bool m_output_images = false;
+		std::string m_output_image_prefix = "dbg_astc_hdr_6x6_output_";
+
+		bool m_debug_output = false;
+		bool m_image_stats = false;
+		bool m_status_output = false;
+
+		//-------------------------------------------------------------------------------------
+		// Very low level/devel parameters - intended for development. Best not to change them.
+		//-------------------------------------------------------------------------------------
+		bool m_deblocking_flag = true;
+		float m_deblock_penalty_weight = .03f;
+		bool m_disable_twothree_subsets = false; // def to false
+		bool m_use_solid_blocks = true; // def to true
+		bool m_use_runs = true; // def to true
+		bool m_block_stat_optimizations_flag = true; // def to true	
+
+		bool m_rdo_candidate_diversity_boost = true; // def to true
+		float m_rdo_candidate_diversity_boost_bit_window_weight = 1.2f;
+
+		bool m_favor_higher_compression = true; // utilize all modes
+		uint32_t m_num_reuse_xy_deltas = basist::astc_6x6_hdr::NUM_REUSE_XY_DELTAS;
+
+		void print() const
+		{
+			basisu::fmt_debug_printf("m_master_comp_level: {}, m_highest_comp_level: {}\n", m_master_comp_level, m_highest_comp_level);
+			basisu::fmt_debug_printf("m_lambda: {}\n", m_lambda);
+			basisu::fmt_debug_printf("m_rec2020_bt2100_color_gamut: {}\n", m_rec2020_bt2100_color_gamut);
+			basisu::fmt_debug_printf("m_extra_patterns_flag: {}, m_brute_force_partition_matching: {}\n", m_extra_patterns_flag, m_brute_force_partition_matching);
+			basisu::fmt_debug_printf("m_jnd_optimization: {}, m_jnd_delta_itp_thresh: {}\n", m_jnd_optimization, m_jnd_delta_itp_thresh);
+			basisu::fmt_debug_printf("m_force_one_strip: {}\n", m_force_one_strip);
+			basisu::fmt_debug_printf("m_gaussian1_fallback: {}, m_gaussian1_strength: {}\n", m_gaussian1_fallback, m_gaussian1_strength);
+			basisu::fmt_debug_printf("m_gaussian2_fallback: {}, m_gaussian2_strength: {}\n", m_gaussian2_fallback, m_gaussian2_strength);
+			basisu::fmt_debug_printf("m_disable_delta_endpoint_usage: {}\n", m_disable_delta_endpoint_usage);
+			basisu::fmt_debug_printf("m_delta_itp_dark_adjustment: {}\n", m_delta_itp_dark_adjustment);
+			basisu::fmt_debug_printf("m_debug_images: {}, m_debug_image_prefix: {}\n", m_debug_images, m_debug_image_prefix);
+			basisu::fmt_debug_printf("m_output_images: {}, m_output_image_prefix: {}\n", m_output_images, m_output_image_prefix);
+			basisu::fmt_debug_printf("m_image_stats: {}, m_status_output: {}\n", m_image_stats, m_status_output);
+			basisu::fmt_debug_printf("m_deblocking_flag: {}, m_deblock_penalty_weight: {}\n", m_deblocking_flag, m_deblock_penalty_weight);
+			basisu::fmt_debug_printf("m_disable_twothree_subsets: {}, m_use_solid_blocks: {}\n", m_disable_twothree_subsets, m_use_solid_blocks);
+			basisu::fmt_debug_printf("m_use_runs: {}, m_block_stat_optimizations_flag: {}\n", m_use_runs, m_block_stat_optimizations_flag);
+			basisu::fmt_debug_printf("m_rdo_candidate_diversity_boost: {}, m_rdo_candidate_diversity_boost_bit_window_weight: {}\n", m_rdo_candidate_diversity_boost, m_rdo_candidate_diversity_boost_bit_window_weight);
+			basisu::fmt_debug_printf("m_favor_higher_compression: {}, m_num_reuse_xy_deltas: {}\n", m_favor_higher_compression, m_num_reuse_xy_deltas);
+		}
+				
+		astc_hdr_6x6_global_config()
+		{
+		}
+
+		void clear()
+		{
+			astc_hdr_6x6_global_config def;
+			std::swap(*this, def);
+		}
+
+		// Max level is ASTC_HDR_6X6_MAX_USER_COMP_LEVEL
+		void set_user_level(int level);
+	};
+
+	void global_init();
+
+	struct result_metrics
+	{
+		basisu::image_metrics m_im_astc_log2;
+		basisu::image_metrics m_im_astc_half;
+
+		basisu::image_metrics m_im_bc6h_log2;
+		basisu::image_metrics m_im_bc6h_half;
+	};
+	
+	// The input image should be unpadded to 6x6 boundaries, i.e. the original unexpanded image.
+	bool compress_photo(const basisu::imagef& orig_src_img, const astc_hdr_6x6_global_config& global_cfg, basisu::job_pool* pJob_pool,
+		basisu::uint8_vec& intermediate_tex_data, basisu::uint8_vec& astc_tex_data, result_metrics& metrics);
+
+} // namespace uastc_6x6_hdr
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.cpp
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.h
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_common.h
@ -0,0 +1,423 @@
+// File: basisu_astc_hdr_common.h
+#pragma once
+#include "basisu_enc.h"
+#include "basisu_gpu_texture.h"
+#include "../transcoder/basisu_astc_helpers.h"
+#include "../transcoder/basisu_astc_hdr_core.h"
+
+namespace basisu
+{
+	const uint32_t MAX_ASTC_HDR_BLOCK_W = 6, MAX_ASTC_HDR_BLOCK_H = 6;
+	const uint32_t MAX_ASTC_HDR_ENC_BLOCK_PIXELS = 6 * 6;
+
+	const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
+	const uint32_t MODE7_TOTAL_SUBMODES = 6;
+		
+	// [ise_range][0] = # levels
+	// [ise_range][1...] = lerp value [0,64]
+	// in ASTC order
+	// Supported ISE weight ranges: 0 to 11, 12 total
+	const uint32_t MIN_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_2_LEVELS; // ISE 0=2 levels
+	const uint32_t MAX_SUPPORTED_ISE_WEIGHT_INDEX = astc_helpers::BISE_32_LEVELS; // ISE 11=16 levels
+	const uint32_t MIN_SUPPORTED_WEIGHT_LEVELS = 2;
+	const uint32_t MAX_SUPPORTED_WEIGHT_LEVELS = 32;
+
+	extern const uint8_t g_ise_weight_lerps[MAX_SUPPORTED_ISE_WEIGHT_INDEX + 1][33];
+
+	const float Q_LOG_BIAS_4x4 = .125f; // the original UASTC HDR 4x4 log bias
+	const float Q_LOG_BIAS_6x6 = 1.0f; // the log bias both encoders use now
+
+	const float LDR_TO_HDR_NITS = 100.0f;
+
+	struct astc_hdr_codec_base_options
+	{
+		float m_r_err_scale, m_g_err_scale;
+		float m_q_log_bias;
+		
+		bool m_ultra_quant;
+		
+		// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
+		bool m_allow_uber_mode;
+
+		bool m_mode7_full_s_optimization;
+
+		bool m_take_first_non_clamping_mode11_submode;
+		bool m_take_first_non_clamping_mode7_submode;
+
+		bool m_disable_weight_plane_optimization;
+		
+		astc_hdr_codec_base_options() { init(); }
+
+		void init();
+	};
+
+	inline int get_bit(
+		int src_val, int src_bit)
+	{
+		assert(src_bit >= 0 && src_bit <= 31);
+		int bit = (src_val >> src_bit) & 1;
+		return bit;
+	}
+
+	inline void pack_bit(
+		int& dst, int dst_bit,
+		int src_val, int src_bit = 0)
+	{
+		assert(dst_bit >= 0 && dst_bit <= 31);
+		int bit = get_bit(src_val, src_bit);
+		dst |= (bit << dst_bit);
+	}
+
+	inline uint32_t get_max_qlog(uint32_t bits)
+	{
+		switch (bits)
+		{
+		case 7: return basist::MAX_QLOG7;
+		case 8: return basist::MAX_QLOG8;
+		case 9: return basist::MAX_QLOG9;
+		case 10: return basist::MAX_QLOG10;
+		case 11: return basist::MAX_QLOG11;
+		case 12: return basist::MAX_QLOG12;
+		case 16: return basist::MAX_QLOG16;
+		default: assert(0); break;
+		}
+		return 0;
+	}
+
+#if 0
+	inline float get_max_qlog_val(uint32_t bits)
+	{
+		switch (bits)
+		{
+		case 7: return MAX_QLOG7_VAL;
+		case 8: return MAX_QLOG8_VAL;
+		case 9: return MAX_QLOG9_VAL;
+		case 10: return MAX_QLOG10_VAL;
+		case 11: return MAX_QLOG11_VAL;
+		case 12: return MAX_QLOG12_VAL;
+		case 16: return MAX_QLOG16_VAL;
+		default: assert(0); break;
+		}
+		return 0;
+	}
+#endif
+
+#if 0
+	// Input is the low 11 bits of the qlog
+	// Returns the 10-bit mantissa of the half float value
+	int qlog11_to_half_float_mantissa(int M)
+	{
+		assert(M <= 0x7FF);
+		int Mt;
+		if (M < 512)
+			Mt = 3 * M;
+		else if (M >= 1536)
+			Mt = 5 * M - 2048;
+		else
+			Mt = 4 * M - 512;
+		return (Mt >> 3);
+	}
+#endif
+
+	// Input is the 10-bit mantissa of the half float value
+	// Output is the 11-bit qlog value
+	// Inverse of qlog11_to_half_float_mantissa()
+	inline int half_float_mantissa_to_qlog11(int hf)
+	{
+		int q0 = (hf * 8 + 2) / 3;
+		int q1 = (hf * 8 + 2048 + 4) / 5;
+
+		if (q0 < 512)
+			return q0;
+		else if (q1 >= 1536)
+			return q1;
+
+		int q2 = (hf * 8 + 512 + 2) / 4;
+		return q2;
+	}
+
+	inline int half_to_qlog16(int hf)
+	{
+		assert(!basist::half_is_signed((basist::half_float)hf) && !basist::is_half_inf_or_nan((basist::half_float)hf));
+
+		// extract 5 bits exponent, which is carried through to qlog16 unchanged
+		const int exp = (hf >> 10) & 0x1F;
+
+		// extract and invert the 10 bit mantissa to nearest qlog11 (should be lossless)
+		const int mantissa = half_float_mantissa_to_qlog11(hf & 0x3FF);
+		assert(mantissa <= 0x7FF);
+
+		// Now combine to qlog16, which is what ASTC HDR interpolates using the [0-64] weights.
+		uint32_t qlog16 = (exp << 11) | mantissa;
+
+		// should be a lossless operation
+		assert(astc_helpers::qlog16_to_half(qlog16) == hf);
+
+		return qlog16;
+	}
+
+	void interpolate_qlog12_colors(
+		const int e[2][3],
+		basist::half_float* pDecoded_half,
+		vec3F* pDecoded_float,
+		uint32_t n, uint32_t ise_weight_range);
+
+	bool get_astc_hdr_mode_11_block_colors(
+		const uint8_t* pEndpoints,
+		basist::half_float* pDecoded_half,
+		vec3F* pDecoded_float,
+		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
+
+	bool get_astc_hdr_mode_7_block_colors(
+		const uint8_t* pEndpoints,
+		basist::half_float* pDecoded_half,
+		vec3F* pDecoded_float,
+		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
+			
+	// Fast high precision piecewise linear approximation of log2(bias+x).
+	// Half may be zero, positive or denormal. No NaN/Inf/negative.
+	BASISU_FORCE_INLINE double q(basist::half_float x, float log_bias)
+	{
+		union { float f; int32_t i; uint32_t u; } fi;
+
+		fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
+
+		assert(fi.f >= 0.0f);
+						
+		fi.f += log_bias;
+
+		return (double)fi.u; // approx log2f(fi.f), need to return double for the precision
+	}
+
+	BASISU_FORCE_INLINE uint32_t q2(basist::half_float x, float log_bias)
+	{
+		union { float f; int32_t i; uint32_t u; } fi;
+
+		fi.f = fast_half_to_float_pos_not_inf_or_nan(x);
+
+		assert(fi.f >= 0.0f);
+		
+		fi.f += log_bias;
+
+		return fi.u;
+	}
+
+	double eval_selectors(
+		uint32_t num_pixels,
+		uint8_t* pWeights,
+		uint32_t ise_weight_range,
+		const basist::half_float* pBlock_pixels_half,
+		uint32_t num_weight_levels,
+		const basist::half_float* pDecoded_half,
+		const astc_hdr_codec_base_options& coptions,
+		uint32_t usable_selector_bitmask = UINT32_MAX);
+
+	double eval_selectors_dual_plane(
+		uint32_t channel_index,
+		uint32_t num_pixels,
+		uint8_t* pWeights0, uint8_t* pWeights1,
+		const basist::half_float* pBlock_pixels_half,
+		uint32_t num_weight_levels,
+		const basist::half_float* pDecoded_half,
+		const astc_hdr_codec_base_options& coptions,
+		uint32_t usable_selector_bitmask = UINT32_MAX);
+
+	double compute_block_error(uint32_t num_pixels, const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_base_options& coptions);
+
+	const uint32_t FIRST_MODE7_SUBMODE_INDEX = 0;
+	const uint32_t MAX_MODE7_SUBMODE_INDEX = 5;
+
+	bool pack_mode7(
+		const vec3F& high_color_q16, const float s_q16,
+		uint32_t ise_endpoint_range, uint8_t* pEndpoints,
+		uint32_t ise_weight_range, // only used for determining biasing during CEM 7 packing
+		const astc_hdr_codec_base_options& coptions,
+		int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
+
+	bool try_mode7(
+		uint32_t num_pixels,
+		uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
+		const vec3F& high_color_q16, const float s_q16,
+		const basist::half_float block_pixels_half[][3],
+		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions,
+		uint32_t ise_endpoint_range,
+		int32_t first_submode = 0, int32_t last_submode = MAX_MODE7_SUBMODE_INDEX);
+
+	bool pack_mode11(
+		const vec3F& low_color_q16, const vec3F& high_color_q16,
+		uint32_t ise_endpoint_range, uint8_t* pEndpoints,
+		const astc_hdr_codec_base_options& coptions,
+		bool direct_only, int32_t first_submode, int32_t last_submode, bool ignore_clamping, uint32_t& submode_used);
+
+	bool try_mode11(uint32_t num_pixels,
+		uint8_t* pEndpoints, uint8_t* pWeights, double& cur_block_error, uint32_t& submode_used,
+		const vec3F& low_color_q16, const vec3F& high_color_q16,
+		const basist::half_float block_pixels_half[][3],
+		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
+		bool constrain_ise_weight_selectors,
+		int32_t first_submode, int32_t last_submode, bool ignore_clamping);
+
+	bool try_mode11_dual_plane(uint32_t channel_index, uint32_t num_pixels,
+		uint8_t* pEndpoints, uint8_t* pWeights0, uint8_t* pWeights1, double& cur_block_error, uint32_t& submode_used,
+		const vec3F& low_color_q16, const vec3F& high_color_q16,
+		const basist::half_float block_pixels_half[][3],
+		uint32_t num_weight_levels, uint32_t ise_weight_range, const astc_hdr_codec_base_options& coptions, bool direct_only, uint32_t ise_endpoint_range,
+		bool constrain_ise_weight_selectors,
+		int32_t first_submode, int32_t last_submode, bool ignore_clamping);
+
+	const int FIRST_MODE11_SUBMODE_INDEX = -1;
+	const int MAX_MODE11_SUBMODE_INDEX = 7;
+
+	enum opt_mode_t
+	{
+		cNoOpt,
+		cOrdinaryLeastSquares,
+		cWeightedLeastSquares,
+		cWeightedLeastSquaresHeavy,
+		cWeightedAverage
+	};
+
+	struct encode_astc_block_stats
+	{
+		uint32_t m_num_pixels;
+		vec3F m_mean_q16;
+		vec3F m_axis_q16;
+
+		void init(uint32_t num_pixels, const vec4F pBlock_pixels_q16[]);
+	};
+
+	double encode_astc_hdr_block_mode_11(
+		uint32_t num_pixels,
+		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
+		uint32_t ise_weight_range,
+		uint32_t& best_submode,
+		double cur_block_error,
+		uint8_t* blk_endpoints, uint8_t* blk_weights,
+		const astc_hdr_codec_base_options& coptions,
+		bool direct_only,
+		uint32_t ise_endpoint_range,
+		bool uber_mode,
+		bool constrain_ise_weight_selectors,
+		int32_t first_submode, int32_t last_submode, bool ignore_clamping, 
+		opt_mode_t opt_mode, 
+		const encode_astc_block_stats *pBlock_stats = nullptr);
+
+	double encode_astc_hdr_block_downsampled_mode_11(
+		uint32_t block_x, uint32_t block_y, uint32_t grid_x, uint32_t grid_y,
+		uint32_t ise_weight_range, uint32_t ise_endpoint_range,
+		uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
+		double cur_block_error,
+		int32_t first_submode, int32_t last_submode, bool ignore_clamping, opt_mode_t opt_mode,
+		uint8_t* pBlk_endpoints, uint8_t* pBlk_weights, uint32_t& best_submode,
+		const astc_hdr_codec_base_options& coptions,
+		const encode_astc_block_stats* pBlock_stats = nullptr);
+
+	double encode_astc_hdr_block_mode_11_dual_plane(
+		uint32_t num_pixels,
+		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
+		uint32_t channel_index,		// 0-2
+		uint32_t ise_weight_range,
+		uint32_t& best_submode,
+		double cur_block_error,
+		uint8_t* blk_endpoints, uint8_t* blk_weights0, uint8_t* blk_weights1,
+		const astc_hdr_codec_base_options& coptions,
+		bool direct_only,
+		uint32_t ise_endpoint_range,
+		bool uber_mode,
+		bool constrain_ise_weight_selectors,
+		int32_t first_submode, int32_t last_submode, 
+		bool ignore_clamping);
+
+	double encode_astc_hdr_block_mode_7(
+		uint32_t num_pixels,
+		const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
+		uint32_t ise_weight_range,
+		uint32_t& best_submode,
+		double cur_block_error,
+		uint8_t* blk_endpoints,  //[4]
+		uint8_t* blk_weights, // [num_pixels]
+		const astc_hdr_codec_base_options& coptions,
+		uint32_t ise_endpoint_range, 
+		int first_submode = 0, int last_submode = MAX_MODE7_SUBMODE_INDEX, 
+		const encode_astc_block_stats *pBlock_stats = nullptr);
+
+	//--------------------------------------------------------------------------------------------------------------------------
+
+	struct mode11_log_desc
+	{
+		int32_t m_submode;
+		int32_t m_maj_comp;
+
+		// Or R0, G0, B0 if maj_comp==3 (direct)
+		int32_t m_a;  // positive
+		int32_t m_c;  // positive
+		int32_t m_b0; // positive
+
+		// Or R1, G1, B1 if maj_comp==3 (direct)
+		int32_t m_b1; // positive
+		int32_t m_d0; // if not direct, is signed
+		int32_t m_d1; // if not direct, is signed
+
+		// limits if not direct
+		int32_t m_a_bits, m_c_bits, m_b_bits, m_d_bits;
+		int32_t m_max_a_val, m_max_c_val, m_max_b_val, m_min_d_val, m_max_d_val;
+
+		void clear() { clear_obj(*this); }
+
+		bool is_direct() const { return m_maj_comp == 3; }
+	};
+
+	//--------------------------------------------------------------------------------------------------------------------------
+	bool pack_astc_mode7_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& rgb_q16, float s_q16, int& max_clamp_mag, uint32_t ise_weight_range, bool early_out_if_clamped, int max_clamp_mag_accept_thresh);
+
+	bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, int val_q[2][3], int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
+	bool pack_astc_mode11_submode(uint32_t submode, uint8_t* pEndpoints, const vec3F& low_q16, const vec3F& high_q16, int& max_clamp_mag, bool early_out_if_clamped = false, int max_clamp_mag_accept_thresh = 0);
+	void pack_astc_mode11_direct(uint8_t* pEndpoints, vec3F l_q16, vec3F h_q16);
+	
+	bool pack_mode11(mode11_log_desc& desc, uint8_t* pEndpoints);
+	void unpack_mode11(const uint8_t* pEndpoints, mode11_log_desc& desc);
+
+	void decode_cem_11_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
+	void decode_cem_7_config(const uint8_t* pEndpoints, int& submode_index, int& maj_index);
+		
+	void dequantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_raw_weights);
+
+	const float* get_6x6_downsample_matrix(uint32_t grid_width, uint32_t grid_height);
+	
+	void downsample_weight_grid(
+		const float* pMatrix_weights,
+		uint32_t bx, uint32_t by,		// source/from dimension (block size)
+		uint32_t wx, uint32_t wy,		// dest/to dimension (grid size)
+		const uint8_t* pSrc_weights,	// these are dequantized weights, NOT ISE symbols, [by][bx]
+		uint8_t* pDst_weights);			// [wy][wx]
+
+	void downsample_ise_weights(
+		uint32_t weight_ise_range, uint32_t quant_weight_ise_range,
+		uint32_t block_w, uint32_t block_h,
+		uint32_t grid_w, uint32_t grid_h,
+		const uint8_t* pSrc_weights, uint8_t* pDst_weights);
+
+	void downsample_ise_weights_dual_plane(
+		uint32_t dequant_weight_ise_range, uint32_t quant_weight_ise_range,
+		uint32_t block_w, uint32_t block_h,
+		uint32_t grid_w, uint32_t grid_h,
+		const uint8_t* pSrc_weights0, const uint8_t* pSrc_weights1,
+		uint8_t* pDst_weights);
+
+	bool refine_endpoints(
+		uint32_t cem,
+		uint32_t endpoint_ise_range,
+		uint8_t* pEndpoint_vals, // the endpoints to optimize
+		uint32_t block_w, uint32_t block_h, // block dimensions
+		uint32_t grid_w, uint32_t grid_h, const uint8_t* pWeights, uint32_t weight_ise_range, // weight grid
+		uint32_t num_pixels, const basist::half_float pBlock_pixels_half[][3], const vec4F pBlock_pixels_q16[],
+		const uint8_t* pPixel_block_ofs, // maps this subset's pixels to block offsets
+		astc_hdr_codec_base_options& coptions, opt_mode_t opt_mode);
+	
+	extern bool g_astc_hdr_enc_initialized;
+
+	// This MUST be called before encoding any blocks.
+	void astc_hdr_enc_init();
+
+} // namespace basisu
+
--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_enc.cpp
--- a/thirdparty/basis_universal/encoder/basisu_comp.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp
--- a/thirdparty/basis_universal/encoder/basisu_comp.h
+++ b/thirdparty/basis_universal/encoder/basisu_comp.h
@ -18,10 +18,11 @@
 #include "basisu_basis_file.h"
 #include "../transcoder/basisu_transcoder.h"
 #include "basisu_uastc_enc.h"
-#include "basisu_astc_hdr_enc.h"
+#include "basisu_uastc_hdr_4x4_enc.h"
+#include "basisu_astc_hdr_6x6_enc.h"

-#define BASISU_LIB_VERSION 150
-#define BASISU_LIB_VERSION_STRING "1.50"
+#define BASISU_LIB_VERSION 160
+#define BASISU_LIB_VERSION_STRING "1.60"

 #ifndef BASISD_SUPPORT_KTX2
 	#error BASISD_SUPPORT_KTX2 is undefined
@ -76,6 +77,8 @@ namespace basisu
 			m_height = 0;
 						
 			m_basis_rgb_avg_psnr = 0.0f;
+			m_basis_rgb_avg_log2_psnr = 0.0f;
+
 			m_basis_rgba_avg_psnr = 0.0f;
 			m_basis_a_avg_psnr = 0.0f;
 			m_basis_luma_709_psnr = 0.0f;
@ -83,6 +86,7 @@ namespace basisu
 			m_basis_luma_709_ssim = 0.0f;

 			m_basis_rgb_avg_bc6h_psnr = 0.0f;
+			m_basis_rgb_avg_bc6h_log2_psnr = 0.0f;

 			m_bc7_rgb_avg_psnr = 0.0f;
 			m_bc7_rgba_avg_psnr = 0.0f;
@ -105,6 +109,8 @@ namespace basisu

 		// .basis/.ktx2 compressed (LDR: ETC1S or UASTC statistics, HDR: transcoded BC6H statistics)
 		float m_basis_rgb_avg_psnr;
+		float m_basis_rgb_avg_log2_psnr;
+
 		float m_basis_rgba_avg_psnr;
 		float m_basis_a_avg_psnr;
 		float m_basis_luma_709_psnr;
@ -113,6 +119,7 @@ namespace basisu

 		// UASTC HDR only.
 		float m_basis_rgb_avg_bc6h_psnr;
+		float m_basis_rgb_avg_bc6h_log2_psnr;

 		// LDR: BC7 statistics
 		float m_bc7_rgb_avg_psnr;
@ -131,6 +138,17 @@ namespace basisu
 		bool m_opencl_failed;
 	};

+	enum class hdr_modes
+	{
+		// standard but constrained ASTC HDR 4x4 tex data that can be rapidly transcoded to BC6H
+		cUASTC_HDR_4X4, 
+		// standard RDO optimized or non-RDO (highest quality) ASTC HDR 6x6 tex data that can be rapidly re-encoded to BC6H
+		cASTC_HDR_6X6,
+		// a custom intermediate format based off ASTC HDR that can be rapidly decoded straight to ASTC HDR or re-encoded to BC6H
+		cASTC_HDR_6X6_INTERMEDIATE,
+		cTotal
+	};
+
 	template<bool def>
 	struct bool_param
 	{
@ -220,21 +238,23 @@ namespace basisu
 			m_endpoint_rdo_thresh(BASISU_DEFAULT_ENDPOINT_RDO_THRESH, 0.0f, 1e+10f),
 			m_mip_scale(1.0f, .000125f, 4.0f),
 			m_mip_smallest_dimension(1, 1, 16384),
-			m_max_endpoint_clusters(512),
-			m_max_selector_clusters(512),
-			m_quality_level(-1),
-			m_pack_uastc_flags(cPackUASTCLevelDefault),
-			m_rdo_uastc_quality_scalar(1.0f, 0.001f, 50.0f),
-			m_rdo_uastc_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX),
-			m_rdo_uastc_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f),
-			m_rdo_uastc_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f),
-			m_rdo_uastc_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f),
-			m_rdo_uastc_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f),
+			m_etc1s_max_endpoint_clusters(512),
+			m_etc1s_max_selector_clusters(512),
+			m_etc1s_quality_level(-1),
+			m_pack_uastc_ldr_4x4_flags(cPackUASTCLevelDefault),
+			m_rdo_uastc_ldr_4x4_quality_scalar(1.0f, 0.001f, 50.0f),
+			m_rdo_uastc_ldr_4x4_dict_size(BASISU_RDO_UASTC_DICT_SIZE_DEFAULT, BASISU_RDO_UASTC_DICT_SIZE_MIN, BASISU_RDO_UASTC_DICT_SIZE_MAX),
+			m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale(UASTC_RDO_DEFAULT_SMOOTH_BLOCK_MAX_ERROR_SCALE, 1.0f, 300.0f),
+			m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev(UASTC_RDO_DEFAULT_MAX_SMOOTH_BLOCK_STD_DEV, .01f, 65536.0f),
+			m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio(UASTC_RDO_DEFAULT_MAX_ALLOWED_RMS_INCREASE_RATIO, .01f, 100.0f),
+			m_rdo_uastc_ldr_4x4_skip_block_rms_thresh(UASTC_RDO_DEFAULT_SKIP_BLOCK_RMS_THRESH, .01f, 100.0f),
 			m_resample_width(0, 1, 16384),
 			m_resample_height(0, 1, 16384),
 			m_resample_factor(0.0f, .00125f, 100.0f),
 			m_ktx2_uastc_supercompression(basist::KTX2_SS_NONE),
 			m_ktx2_zstd_supercompression_level(6, INT_MIN, INT_MAX),
+			m_ldr_hdr_upconversion_nit_multiplier(0.0f, 0.0f, basist::MAX_HALF_FLOAT),
+			m_ldr_hdr_upconversion_black_bias(0.0f, 0.0f, 1.0f),
 			m_pJob_pool(nullptr)
 		{
 			clear();
@ -243,6 +263,9 @@ namespace basisu
 		void clear()
 		{
 			m_uastc.clear();
+			m_hdr.clear();
+			m_hdr_mode = hdr_modes::cUASTC_HDR_4X4;
+
 			m_use_opencl.clear();
 			m_status_output.clear();

@ -290,24 +313,24 @@ namespace basisu
 			m_mip_fast.clear();
 			m_mip_smallest_dimension.clear();

-			m_max_endpoint_clusters = 0;
-			m_max_selector_clusters = 0;
-			m_quality_level = -1;
+			m_etc1s_max_endpoint_clusters = 0;
+			m_etc1s_max_selector_clusters = 0;
+			m_etc1s_quality_level = -1;

 			m_tex_type = basist::cBASISTexType2D;
 			m_userdata0 = 0;
 			m_userdata1 = 0;
 			m_us_per_frame = 0;

-			m_pack_uastc_flags = cPackUASTCLevelDefault;
-			m_rdo_uastc.clear();
-			m_rdo_uastc_quality_scalar.clear();
-			m_rdo_uastc_max_smooth_block_error_scale.clear();
-			m_rdo_uastc_smooth_block_max_std_dev.clear();
-			m_rdo_uastc_max_allowed_rms_increase_ratio.clear();
-			m_rdo_uastc_skip_block_rms_thresh.clear();
-			m_rdo_uastc_favor_simpler_modes_in_rdo_mode.clear();
-			m_rdo_uastc_multithreading.clear();
+			m_pack_uastc_ldr_4x4_flags = cPackUASTCLevelDefault;
+			m_rdo_uastc_ldr_4x4.clear();
+			m_rdo_uastc_ldr_4x4_quality_scalar.clear();
+			m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale.clear();
+			m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev.clear();
+			m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio.clear();
+			m_rdo_uastc_ldr_4x4_skip_block_rms_thresh.clear();
+			m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode.clear();
+			m_rdo_uastc_ldr_4x4_multithreading.clear();

 			m_resample_width.clear();
 			m_resample_height.clear();
@ -323,19 +346,80 @@ namespace basisu

 			m_validate_output_data.clear();

-			m_hdr_ldr_srgb_to_linear_conversion.clear();
+			m_ldr_hdr_upconversion_srgb_to_linear.clear();

 			m_hdr_favor_astc.clear();
 			
+			m_uastc_hdr_4x4_options.init();
+			m_astc_hdr_6x6_options.clear();
+
+			m_ldr_hdr_upconversion_nit_multiplier.clear();
+			m_ldr_hdr_upconversion_black_bias.clear();
+			
 			m_pJob_pool = nullptr;
 		}

+		// Configures the compressor's mode by setting the proper parameters (which were preserved for backwards compatibility with old code).
+		void set_format_mode(basist::basis_tex_format m)
+		{
+			switch (m)
+			{
+			case basist::basis_tex_format::cETC1S:
+			{
+				m_hdr = false;
+				m_uastc = false;
+				m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter
+				break;
+			}
+			case basist::basis_tex_format::cUASTC4x4:
+			{
+				m_hdr = false;
+				m_uastc = true;
+				m_hdr_mode = hdr_modes::cUASTC_HDR_4X4; // doesn't matter
+				break;
+			}
+			case basist::basis_tex_format::cUASTC_HDR_4x4:
+			{
+				m_hdr = true;
+				m_uastc = true;
+				m_hdr_mode = hdr_modes::cUASTC_HDR_4X4;
+				break;
+			}
+			case basist::basis_tex_format::cASTC_HDR_6x6:
+			{
+				m_hdr = true;
+				m_uastc = true;
+				m_hdr_mode = hdr_modes::cASTC_HDR_6X6;
+				break;
+			}
+			case basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE:
+			{
+				m_hdr = true;
+				m_uastc = true;
+				m_hdr_mode = hdr_modes::cASTC_HDR_6X6_INTERMEDIATE;
+				break;
+			}
+			default:
+				assert(0);
+				break;
+			}
+		}
+
+		// By default we generate LDR ETC1S data. 
+		// if m_uastc is true but m_hdr is not true, we generate UASTC 4x4 LDR data (8bpp with or without RDO).
+		// if m_uastc is true and m_hdr is true, we generate 4x4 or 6x6 HDR data (either standard ASTC, constrained ASTC, RDO ASTC, or intermediate), controlled by m_hdr_mode.
+		
 		// True to generate UASTC .basis/.KTX2 file data, otherwise ETC1S.
+		// Should be true for any non-ETC1S format (UASTC 4x4 LDR, UASTC 4x4 HDR, RDO ASTC 6x6 HDR, and ASTC 6x6 HDR intermediate).
 		bool_param<false> m_uastc;

-		// Set m_hdr to true to switch to UASTC HDR mode.
+		// Set m_hdr to true to switch to UASTC HDR mode. m_hdr_mode then controls which format is output.
+		// m_hdr_mode then controls which format is output (4x4, 6x6, or 6x6 intermediate).
 		bool_param<false> m_hdr;

+		// If m_hdr is true, this specifies which mode we operate in (currently UASTC 4x4 HDR or ASTC 6x6 HDR). Defaults to UASTC 4x4 HDR for backwards compatibility.
+		hdr_modes m_hdr_mode;
+				
 		bool_param<false> m_use_opencl;

 		// If m_read_source_images is true, m_source_filenames (and optionally m_source_alpha_filenames) contains the filenames of PNG etc. images to read. 
@ -426,30 +510,31 @@ namespace basisu
 		bool_param<true> m_mip_fast;
 		param<int> m_mip_smallest_dimension;
 						
-		// Codebook size (quality) control. 
-		// If m_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX].
+		// ETC1S codebook size (quality) control. 
+		// If m_etc1s_quality_level != -1, it controls the quality level. It ranges from [1,255] or [BASISU_QUALITY_MIN, BASISU_QUALITY_MAX].
 		// Otherwise m_max_endpoint_clusters/m_max_selector_clusters controls the codebook sizes directly.
-		uint32_t m_max_endpoint_clusters;
-		uint32_t m_max_selector_clusters;
-		int m_quality_level;
+		uint32_t m_etc1s_max_endpoint_clusters;
+		uint32_t m_etc1s_max_selector_clusters;
+		int m_etc1s_quality_level;
 		
-		// m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the Basis file header.
+		// m_tex_type, m_userdata0, m_userdata1, m_framerate - These fields go directly into the .basis file header.
 		basist::basis_texture_type m_tex_type;
 		uint32_t m_userdata0;
 		uint32_t m_userdata1;
 		uint32_t m_us_per_frame;

+		// UASTC LDR 4x4 parameters
 		// cPackUASTCLevelDefault, etc.
-		uint32_t m_pack_uastc_flags;
-		bool_param<false> m_rdo_uastc;
-		param<float> m_rdo_uastc_quality_scalar;
-		param<int> m_rdo_uastc_dict_size;
-		param<float> m_rdo_uastc_max_smooth_block_error_scale;
-		param<float> m_rdo_uastc_smooth_block_max_std_dev;
-		param<float> m_rdo_uastc_max_allowed_rms_increase_ratio;
-		param<float> m_rdo_uastc_skip_block_rms_thresh;
-		bool_param<true> m_rdo_uastc_favor_simpler_modes_in_rdo_mode;
-		bool_param<true> m_rdo_uastc_multithreading;
+		uint32_t m_pack_uastc_ldr_4x4_flags;
+		bool_param<false> m_rdo_uastc_ldr_4x4;
+		param<float> m_rdo_uastc_ldr_4x4_quality_scalar;
+		param<int> m_rdo_uastc_ldr_4x4_dict_size;
+		param<float> m_rdo_uastc_ldr_4x4_max_smooth_block_error_scale;
+		param<float> m_rdo_uastc_ldr_4x4_smooth_block_max_std_dev;
+		param<float> m_rdo_uastc_ldr_4x4_max_allowed_rms_increase_ratio;
+		param<float> m_rdo_uastc_ldr_4x4_skip_block_rms_thresh;
+		bool_param<true> m_rdo_uastc_ldr_4x4_favor_simpler_modes_in_rdo_mode;
+		bool_param<true> m_rdo_uastc_ldr_4x4_multithreading;

 		param<int> m_resample_width;
 		param<int> m_resample_height;
@ -465,13 +550,26 @@ namespace basisu
 		param<int> m_ktx2_zstd_supercompression_level;
 		bool_param<false> m_ktx2_srgb_transfer_func;

-		astc_hdr_codec_options m_uastc_hdr_options;
+		uastc_hdr_4x4_codec_options m_uastc_hdr_4x4_options;
+		astc_6x6_hdr::astc_hdr_6x6_global_config m_astc_hdr_6x6_options;

 		bool_param<false> m_validate_output_data;

-		// If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion) and then processed as HDR. 
-		// Otherwise, LDR images will be processed as HDR as-is.
-		bool_param<true> m_hdr_ldr_srgb_to_linear_conversion;
+		// LDR->HDR upconversion parameters.
+		// 
+		// If true, LDR images (such as PNG) will be converted to normalized [0,1] linear light (via a sRGB->Linear conversion), or absolute luminance (nits or candelas per meter squared), and then processed as HDR. 
+		// Otherwise, LDR images are assumed to already be in linear light (i.e. they don't use the sRGB transfer function).
+		bool_param<true> m_ldr_hdr_upconversion_srgb_to_linear;
+		
+		// m_ldr_hdr_upconversion_nit_multiplier is only used when loading SDR/LDR images and compressing to an HDR output format.
+		// By default m_ldr_hdr_upconversion_nit_multiplier is 0. It's an override for the default.
+		// When loading LDR images, a default multiplier of 1.0 will be used in UASTC 4x4 HDR mode. Partially for backwards compatibility with previous library releases, and also because it doesn't really matter with this encoder what the multiplier is.
+		// With the 6x6 HDR encoder it does matter because it expects inputs in absolute nits, so the LDR upconversion luminance multiplier default will be 100 nits. (Most SDR monitors were/are 80-100 nits or so.)
+		param<float> m_ldr_hdr_upconversion_nit_multiplier;
+
+		// The optional sRGB space bias to use during LDR->HDR upconversion. Should be between [0,.49] or so. Only applied on black (0.0) color components.
+		// Defaults to no bias (0.0f).
+		param<float> m_ldr_hdr_upconversion_black_bias;

 		// If true, ASTC HDR quality is favored more than BC6H quality. Otherwise it's a rough balance.
 		bool_param<false> m_hdr_favor_astc;
@ -529,6 +627,8 @@ namespace basisu
 				
 		opencl_context_ptr m_pOpenCL_context;

+		basist::basis_tex_format m_fmt_mode;
+		
 		basisu::vector<image> m_slice_images;
 		basisu::vector<imagef> m_slice_images_hdr;

@ -543,6 +643,7 @@ namespace basisu
 		
 		basisu_frontend m_frontend;

+		// These are 4x4 blocks.
 		pixel_block_vec m_source_blocks;
 		pixel_block_hdr_vec m_source_blocks_hdr;

@ -572,6 +673,15 @@ namespace basisu
 		basisu::vector<gpu_image> m_uastc_slice_textures;
 		basisu_backend_output m_uastc_backend_output;

+		// The amount the HDR input has to be scaled up in case it had to be rescaled to fit into half floats.
+		float m_hdr_image_scale; 
+		
+		// The upconversion multiplier used to load LDR images in HDR mode.
+		float m_ldr_to_hdr_upconversion_nit_multiplier;
+		
+		// True if any loaded source images were LDR and upconverted to HDR.
+		bool m_upconverted_any_ldr_images;
+
 		bool m_any_source_image_has_alpha;

 		bool m_opencl_failed;
@ -588,14 +698,48 @@ namespace basisu
 		bool create_basis_file_and_transcode();
 		bool write_hdr_debug_images(const char* pBasename, const imagef& img, uint32_t width, uint32_t height);
 		bool write_output_files_and_compute_stats();
-		error_code encode_slices_to_uastc_hdr();
-		error_code encode_slices_to_uastc();
+		error_code encode_slices_to_astc_6x6_hdr();
+		error_code encode_slices_to_uastc_4x4_hdr();
+		error_code encode_slices_to_uastc_4x4_ldr();
 		bool generate_mipmaps(const imagef& img, basisu::vector<imagef>& mips, bool has_alpha);
 		bool generate_mipmaps(const image &img, basisu::vector<image> &mips, bool has_alpha);
 		bool validate_texture_type_constraints();
 		bool validate_ktx2_constraints();
-		void get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr);
+		bool get_dfd(uint8_vec& dfd, const basist::ktx2_header& hdr);
 		bool create_ktx2_file();
+		void pick_format_mode();
+
+		uint32_t get_block_width() const
+		{
+			if (m_params.m_hdr)
+			{
+				switch (m_params.m_hdr_mode)
+				{
+				case hdr_modes::cASTC_HDR_6X6:
+				case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE:
+					return 6;
+				default:
+					break;
+				}
+			}
+			return 4;
+		}
+
+		uint32_t get_block_height() const
+		{
+			if (m_params.m_hdr)
+			{
+				switch (m_params.m_hdr_mode)
+				{
+				case hdr_modes::cASTC_HDR_6X6:
+				case hdr_modes::cASTC_HDR_6X6_INTERMEDIATE:
+					return 6;
+				default:
+					break;
+				}
+			}
+			return 4;
+		}
 	};
 				
 	// Alternative simple C-style wrapper API around the basis_compressor class. 
@ -633,16 +777,14 @@ namespace basisu
 		
 		cFlagYFlip = 1 << 16,			// flip source image on Y axis before compression
 		
-		cFlagUASTC = 1 << 17,		// use UASTC compression vs. ETC1S
-		cFlagUASTCRDO = 1 << 18,		// use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
+		cFlagUASTCRDO = 1 << 17,		// use RDO postprocessing when generating UASTC files (must set uastc_rdo_quality to the quality scalar)
 		
-		cFlagPrintStats = 1 << 19,	// print image stats to stdout
-		cFlagPrintStatus = 1 << 20,	// print status to stdout
+		cFlagPrintStats = 1 << 18,		// print image stats to stdout
+		cFlagPrintStatus = 1 << 19,		// print status to stdout
 		
-		cFlagHDR = 1 << 21,			// Force encoder into HDR mode, even if source image is LDR.
-		cFlagHDRLDRImageSRGBToLinearConversion = 1 << 22, // In HDR mode, convert LDR source images to linear before encoding.
+		cFlagDebugImages = 1 << 20,		// enable status output

-		cFlagDebugImages = 1 << 23	// enable status output
+		cFlagREC2020 = 1 << 21			// ASTC 6x6 modes: treat input as REC 2020 vs. the default 709
 	};

 	// This function accepts an array of source images. 
@ -652,6 +794,7 @@ namespace basisu
 	// basisu_encoder_init() MUST be called first!
 	// LDR version. To compress the LDR source image as HDR: Use the cFlagHDR flag.
 	void* basis_compress(
+		basist::basis_tex_format mode,
 		const basisu::vector<image> &source_images,
 		uint32_t flags_and_quality, float uastc_rdo_quality,
 		size_t* pSize,
@ -660,14 +803,16 @@ namespace basisu
 	// HDR-only version.
 	// Important: The returned block MUST be manually freed using basis_free_data().
 	void* basis_compress(
+		basist::basis_tex_format mode,
 		const basisu::vector<imagef>& source_images_hdr,
-		uint32_t flags_and_quality, 
+		uint32_t flags_and_quality, float lambda,
 		size_t* pSize,
 		image_stats* pStats = nullptr);

 	// This function only accepts a single LDR source image. It's just a wrapper for basis_compress() above.
 	// Important: The returned block MUST be manually freed using basis_free_data().
 	void* basis_compress(
+		basist::basis_tex_format mode,
 		const uint8_t* pImageRGBA, uint32_t width, uint32_t height, uint32_t pitch_in_pixels,
 		uint32_t flags_and_quality, float uastc_rdo_quality,
 		size_t* pSize,
--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
@ -21,7 +21,9 @@
 #include "jpgd.h"
 #include "pvpngreader.h"
 #include "basisu_opencl.h"
-#include "basisu_astc_hdr_enc.h"
+#include "basisu_uastc_hdr_4x4_enc.h"
+#include "basisu_astc_hdr_6x6_enc.h"
+
 #include <vector>

 #ifndef TINYEXR_USE_ZFP
@ -47,10 +49,13 @@ namespace basisu
 {
 	uint64_t interval_timer::g_init_ticks, interval_timer::g_freq;
 	double interval_timer::g_timer_freq;
+
 #if BASISU_SUPPORT_SSE
 	bool g_cpu_supports_sse41;
 #endif

+	fast_linear_to_srgb g_fast_linear_to_srgb;
+
 	uint8_t g_hamming_dist[256] =
 	{
 		0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
@ -201,6 +206,7 @@ namespace basisu

 		astc_hdr_enc_init();
 		basist::bc6h_enc_init();
+		astc_6x6_hdr::global_init();

 		g_library_initialized = true;
 		return true;
@ -215,16 +221,41 @@ namespace basisu

 	void error_vprintf(const char* pFmt, va_list args)
 	{
-		char buf[8192];
+		const uint32_t BUF_SIZE = 256;
+		char buf[BUF_SIZE];

-#ifdef _WIN32		
-		vsprintf_s(buf, sizeof(buf), pFmt, args);
-#else
-		vsnprintf(buf, sizeof(buf), pFmt, args);
-#endif
+		va_list args_copy;
+		va_copy(args_copy, args);
+		int total_chars = vsnprintf(buf, sizeof(buf), pFmt, args_copy);
+		va_end(args_copy);

+		if (total_chars < 0)
+		{
+			assert(0);
+			return;
+		}
+
+		if (total_chars >= (int)BUF_SIZE)
+		{
+			basisu::vector<char> var_buf(total_chars + 1);
+			
+			va_copy(args_copy, args);
+			int total_chars_retry = vsnprintf(var_buf.data(), var_buf.size(), pFmt, args_copy);
+			va_end(args_copy);
+
+			if (total_chars_retry < 0)
+			{
+				assert(0);
+				return;
+			}
+
+			fprintf(stderr, "ERROR: %s", var_buf.data());
+		}
+		else
+		{
 			fprintf(stderr, "ERROR: %s", buf);
 		}
+	}

 	void error_printf(const char *pFmt, ...)
 	{
@ -234,6 +265,18 @@ namespace basisu
 		va_end(args);
 	}

+#if defined(_WIN32)
+	void platform_sleep(uint32_t ms)
+	{
+		Sleep(ms);
+	}
+#else
+	void platform_sleep(uint32_t ms)
+	{
+		// TODO
+	}
+#endif
+
 #if defined(_WIN32)
 	inline void query_counter(timer_ticks* pTicks)
 	{
@ -331,6 +374,8 @@ namespace basisu
 		return ticks * g_timer_freq;
 	}

+	// Note this is linear<->sRGB, NOT REC709 which uses slightly different equations/transfer functions. 
+	// However the gamuts/white points of REC709 and sRGB are the same.
 	float linear_to_srgb(float l)
 	{
 		assert(l >= 0.0f && l <= 1.0f);
@ -418,7 +463,8 @@ namespace basisu

 		uint32_t width = 0, height = 0, num_chans = 0;
 		void* pImage = pv_png::load_png(pBuf, buf_size, 4, width, height, num_chans);
-		if (!pBuf)
+
+		if (!pImage)
 		{
 			error_printf("pv_png::load_png failed while loading image \"%s\"\n", pFilename);
 			return false;
@ -457,6 +503,26 @@ namespace basisu
 		return true;
 	}

+	bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img)
+	{
+		if (buf_size > INT_MAX)
+		{
+			assert(0);
+			return false;
+		}
+
+		int width = 0, height = 0, actual_comps = 0;
+		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
+		if (!pImage_data)
+			return false;
+
+		img.init(pImage_data, width, height, 4);
+
+		free(pImage_data);
+
+		return true;
+	}
+
 	bool load_image(const char* pFilename, image& img)
 	{
 		std::string ext(string_get_extension(std::string(pFilename)));
@ -478,7 +544,7 @@ namespace basisu
 		return false;
 	}

-	static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear)
+	static void convert_ldr_to_hdr_image(imagef &img, const image &ldr_img, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)
 	{
 		img.resize(ldr_img.get_width(), ldr_img.get_height());

@ -491,23 +557,41 @@ namespace basisu
 				vec4F& d = img(x, y);
 				if (ldr_srgb_to_linear)
 				{
-					// TODO: Multiply by 100-200 nits?
-					d[0] = srgb_to_linear(c[0] * (1.0f / 255.0f));
-					d[1] = srgb_to_linear(c[1] * (1.0f / 255.0f));
-					d[2] = srgb_to_linear(c[2] * (1.0f / 255.0f));
+					float r = (float)c[0];
+					float g = (float)c[1];
+					float b = (float)c[2];
+
+					if (ldr_black_bias > 0.0f)
+					{
+						// ASTC HDR is noticeably weaker dealing with blocks containing some pixels with components set to 0.
+						// Add a very slight bias less than .5 to avoid this difficulity. When the HDR image is mapped to SDR sRGB and rounded back to 8-bits, this bias will still result in zero.
+						// (FWIW, in reality, a physical monitor would be unlikely to have a perfectly zero black level.)
+						// This is purely optional and on most images it doesn't matter visually.
+						if (r == 0.0f)
+							r = ldr_black_bias;
+						if (g == 0.0f)
+							g = ldr_black_bias;
+						if (b == 0.0f)
+							b = ldr_black_bias;
+					}
+
+					// Compute how much linear light would be emitted by a SDR 80-100 nit monitor.
+					d[0] = srgb_to_linear(r * (1.0f / 255.0f)) * linear_nit_multiplier;
+					d[1] = srgb_to_linear(g * (1.0f / 255.0f)) * linear_nit_multiplier;
+					d[2] = srgb_to_linear(b * (1.0f / 255.0f)) * linear_nit_multiplier;
 				}
 				else
 				{
-					d[0] = c[0] * (1.0f / 255.0f);
-					d[1] = c[1] * (1.0f / 255.0f);
-					d[2] = c[2] * (1.0f / 255.0f);
+					d[0] = c[0] * (1.0f / 255.0f) * linear_nit_multiplier;
+					d[1] = c[1] * (1.0f / 255.0f) * linear_nit_multiplier;
+					d[2] = c[2] * (1.0f / 255.0f) * linear_nit_multiplier;
 				}
 				d[3] = c[3] * (1.0f / 255.0f);
 			}
 		}
 	}

-	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear)
+	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)
 	{
 		if ((!pMem) || (!mem_size))
 		{
@ -571,13 +655,22 @@ namespace basisu

 			break;
 		}
+		case hdr_image_type::cHITJPGImage:
+		{
+			image ldr_img;
+			if (!load_jpg(static_cast<const uint8_t*>(pMem), mem_size, ldr_img))
+				return false;
+
+			convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);
+			break;
+		}
 		case hdr_image_type::cHITPNGImage:
 		{
 			image ldr_img;
 			if (!load_png(static_cast<const uint8_t *>(pMem), mem_size, ldr_img))
 				return false;

-			convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear);
+			convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);
 			break;
 		}
 		case hdr_image_type::cHITEXRImage:
@ -606,7 +699,20 @@ namespace basisu
 		return true;
 	}

-	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear)
+	bool is_image_filename_hdr(const char *pFilename)
+	{
+		std::string ext(string_get_extension(std::string(pFilename)));
+
+		if (ext.length() == 0)
+			return false;
+
+		const char* pExt = ext.c_str();
+
+		return ((strcasecmp(pExt, "hdr") == 0) || (strcasecmp(pExt, "exr") == 0));
+	}
+	
+	// TODO: move parameters to struct, add a HDR clean flag to eliminate NaN's/Inf's
+	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear, float linear_nit_multiplier, float ldr_black_bias)
 	{
 		std::string ext(string_get_extension(std::string(pFilename)));

@ -637,7 +743,7 @@ namespace basisu
 			if (!load_image(pFilename, ldr_img))
 				return false;

-			convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear);
+			convert_ldr_to_hdr_image(img, ldr_img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);
 		}

 		return true;
@ -1002,7 +1108,7 @@ namespace basisu
 			return false;
 		}

-		if ((src_w == dst_w) && (src_h == dst_h))
+		if ((src_w == dst_w) && (src_h == dst_h) && (filter_scale == 1.0f))
 		{
 			dst = src;
 			return true;
@ -1652,7 +1758,7 @@ namespace basisu

 		uint32_t a = max_index / num_syms, b = max_index % num_syms;

-		const uint32_t ofs = m_entries_picked.size();
+		const size_t ofs = m_entries_picked.size();

 		m_entries_picked.push_back(a);
 		m_entries_picked.push_back(b);
@ -2002,6 +2108,34 @@ namespace basisu
 		m_psnr = m_rms ? (float)clamp<double>(log10(255.0 / m_rms) * 20.0f, 0.0f, 100.0f) : 100.0f;
 	}

+	void print_image_metrics(const image& a, const image& b)
+	{
+		image_metrics im;
+		im.calc(a, b, 0, 3);
+		im.print("RGB    ");
+
+		im.calc(a, b, 0, 4);
+		im.print("RGBA   ");
+
+		im.calc(a, b, 0, 1);
+		im.print("R      ");
+
+		im.calc(a, b, 1, 1);
+		im.print("G      ");
+
+		im.calc(a, b, 2, 1);
+		im.print("B      ");
+
+		im.calc(a, b, 3, 1);
+		im.print("A      ");
+
+		im.calc(a, b, 0, 0);
+		im.print("Y 709  ");
+
+		im.calc(a, b, 0, 0, true, true);
+		im.print("Y 601  ");
+	}
+
 	void fill_buffer_with_random_bytes(void *pBuf, size_t size, uint32_t seed)
 	{
 		rand r(seed);
@ -2079,9 +2213,11 @@ namespace basisu
 	}

 	job_pool::job_pool(uint32_t num_threads) : 
-		m_num_active_jobs(0),
-		m_kill_flag(false)
+		m_num_active_jobs(0)
 	{
+		m_kill_flag.store(false);
+		m_num_active_workers.store(0);
+
 		assert(num_threads >= 1U);

 		debug_printf("job_pool::job_pool: %u total threads\n", num_threads);
@ -2100,11 +2236,23 @@ namespace basisu
 		debug_printf("job_pool::~job_pool\n");
 		
 		// Notify all workers that they need to die right now.
-		m_kill_flag = true;
+		m_kill_flag.store(true);
 		
 		m_has_work.notify_all();

-		// Wait for all workers to die.
+#ifdef __EMSCRIPTEN__
+		for ( ; ; )
+		{
+			if (m_num_active_workers.load() <= 0)
+				break;
+			std::this_thread::sleep_for(std::chrono::milliseconds(50));
+		}
+		
+		// At this point all worker threads should be exiting or exited.
+		// We could call detach(), but this seems to just call join() anyway.
+#endif
+
+		// Wait for all worker threads to exit.
 		for (uint32_t i = 0; i < m_threads.size(); i++)
 			m_threads[i].join();
 	}
@ -2157,7 +2305,18 @@ namespace basisu
 		}

 		// The queue is empty, now wait for all active jobs to finish up.
+#ifndef __EMSCRIPTEN__
 		m_no_more_jobs.wait(lock, [this]{ return !m_num_active_jobs; } );
+#else
+		// Avoid infinite blocking
+		for (; ; )
+		{
+			if (m_no_more_jobs.wait_for(lock, std::chrono::milliseconds(50), [this] { return !m_num_active_jobs; }))
+			{
+				break;
+			}
+		}
+#endif
 	}

 	void job_pool::job_thread(uint32_t index)
@ -2165,6 +2324,8 @@ namespace basisu
 		BASISU_NOTE_UNUSED(index);
 		//debug_printf("job_pool::job_thread: starting %u\n", index);

+		m_num_active_workers.fetch_add(1);
+		
 		while (true)
 		{
 			std::unique_lock<std::mutex> lock(m_mutex);
@ -2199,6 +2360,8 @@ namespace basisu
 				m_no_more_jobs.notify_all();
 		}

+		m_num_active_workers.fetch_add(-1);
+
 		//debug_printf("job_pool::job_thread: exiting\n");
 	}

@ -3314,7 +3477,7 @@ namespace basisu
 		return true;
 	}

-	bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags)
+	bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags)
 	{
 		assert((n_chans == 1) || (n_chans == 3) || (n_chans == 4));

@ -3483,18 +3646,23 @@ namespace basisu
 	
 	// Very basic global Reinhard tone mapping, output converted to sRGB with no dithering, alpha is carried through unchanged. 
 	// Only used for debugging/development.
-	void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure)
+	void tonemap_image_reinhard(image &ldr_img, const imagef &hdr_img, float exposure, bool add_noise, bool per_component, bool luma_scaling)
 	{
 		uint32_t width = hdr_img.get_width(), height = hdr_img.get_height();

 		ldr_img.resize(width, height);

+		rand r;
+		r.seed(128);
+				
 		for (uint32_t y = 0; y < height; y++)
 		{
 			for (uint32_t x = 0; x < width; x++)
 			{
 				vec4F c(hdr_img(x, y));

+				if (per_component)
+				{
 					for (uint32_t t = 0; t < 3; t++)
 					{
 						if (c[t] <= 0.0f)
@ -3507,20 +3675,70 @@ namespace basisu
 							c[t] = c[t] / (1.0f + c[t]);
 						}
 					}
+				}
+				else
+				{
+					c[0] *= exposure;
+					c[1] *= exposure;
+					c[2] *= exposure;
+
+					const float L = 0.2126f * c[0] + 0.7152f * c[1] + 0.0722f * c[2];
+
+					float Lmapped = 0.0f;
+					if (L > 0.0f)
+					{
+						//Lmapped = L / (1.0f + L);
+						//Lmapped /= L;
+						
+						Lmapped = 1.0f / (1.0f + L);
+					}
+
+					c[0] = c[0] * Lmapped;
+					c[1] = c[1] * Lmapped;
+					c[2] = c[2] * Lmapped;
+
+					if (luma_scaling)
+					{
+						// Keeps the ratio of r/g/b intact
+						float m = maximum(c[0], c[1], c[2]);
+						if (m > 1.0f)
+						{
+							c /= m;
+						}
+					}
+				}

 				c.clamp(0.0f, 1.0f);

-				c[0] = linear_to_srgb(c[0]) * 255.0f;
-				c[1] = linear_to_srgb(c[1]) * 255.0f;
-				c[2] = linear_to_srgb(c[2]) * 255.0f;
 				c[3] = c[3] * 255.0f;

 				color_rgba& o = ldr_img(x, y);

-				o[0] = (uint8_t)std::round(c[0]);
-				o[1] = (uint8_t)std::round(c[1]);
-				o[2] = (uint8_t)std::round(c[2]);
-				o[3] = (uint8_t)std::round(c[3]);
+				if (add_noise)
+				{
+					c[0] = linear_to_srgb(c[0]) * 255.0f;
+					c[1] = linear_to_srgb(c[1]) * 255.0f;
+					c[2] = linear_to_srgb(c[2]) * 255.0f;
+
+					const float NOISE_AMP = .5f;
+					c[0] += r.frand(-NOISE_AMP, NOISE_AMP);
+					c[1] += r.frand(-NOISE_AMP, NOISE_AMP);
+					c[2] += r.frand(-NOISE_AMP, NOISE_AMP);
+
+					c.clamp(0.0f, 255.0f);
+
+					o[0] = (uint8_t)fast_roundf_int(c[0]);
+					o[1] = (uint8_t)fast_roundf_int(c[1]);
+					o[2] = (uint8_t)fast_roundf_int(c[2]);
+					o[3] = (uint8_t)fast_roundf_int(c[3]);
+				}
+				else
+				{
+					o[0] = g_fast_linear_to_srgb.convert(c[0]);
+					o[1] = g_fast_linear_to_srgb.convert(c[1]);
+					o[2] = g_fast_linear_to_srgb.convert(c[2]);
+					o[3] = (uint8_t)fast_roundf_int(c[3]);
+				}
 			}
 		}
 	}
@ -3682,4 +3900,68 @@ namespace basisu
 		return true;
 	}

+	bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img)
+	{
+		const uint32_t width = hdr_test_img.get_width();
+		const uint32_t height = hdr_test_img.get_height();
+
+		dst_img.resize(width, height);
+		dst_img.set_all(color_rgba(0, 0, 0, 255));
+
+		basisu::vector<basist::half_float> half_img(width * 3 * height);
+				
+		uint32_t low_h = UINT32_MAX, high_h = 0;
+
+		for (uint32_t y = 0; y < height; y++)
+		{
+			for (uint32_t x = 0; x < width; x++)
+			{
+				const vec4F& p = hdr_test_img(x, y);
+
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					float f = p[i];
+
+					if (std::isnan(f) || std::isinf(f))
+						f = 0.0f;
+					else if (f < 0.0f)
+						f = 0.0f;
+					else if (f > basist::MAX_HALF_FLOAT)
+						f = basist::MAX_HALF_FLOAT;
+
+					uint32_t h = basist::float_to_half(f);
+
+					low_h = minimum(low_h, h);
+					high_h = maximum(high_h, h);
+					
+					half_img[(x + y * width) * 3 + i] = (basist::half_float)h;
+
+				} // i
+			} // x
+		} // y
+
+		if (low_h == high_h)
+			return false;
+
+		for (uint32_t y = 0; y < height; y++)
+		{
+			for (uint32_t x = 0; x < width; x++)
+			{
+				for (uint32_t i = 0; i < 3; i++)
+				{
+					basist::half_float h = half_img[(x + y * width) * 3 + i];
+					
+					float f = (float)(h - low_h) / (float)(high_h - low_h);
+
+					int iv = basisu::clamp<int>((int)std::round(f * 255.0f), 0, 255);
+
+					dst_img(x, y)[i] = (uint8_t)iv;
+
+				} // i
+			} // x
+		} // y
+
+		return true;
+	}
+							
 } // namespace basisu
--- a/thirdparty/basis_universal/encoder/basisu_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_enc.h
@ -64,6 +64,17 @@ namespace basisu
 	void error_vprintf(const char* pFmt, va_list args);
 	void error_printf(const char *pFmt, ...);
 	
+	template <typename... Args>
+	inline void fmt_error_printf(const char* pFmt, Args&&... args)
+	{
+		std::string res;
+		if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward<Args>(args))... }))
+			return;
+		error_printf("%s", res.c_str());
+	}
+
+	void platform_sleep(uint32_t ms);
+	
 	// Helpers
 		
 	inline uint8_t clamp255(int32_t i)
@ -98,6 +109,17 @@ namespace basisu
 		return (uint8_t)((v + (v >> 8)) >> 8);
 	}

+	inline int fast_roundf_int(float x)
+	{
+		return (x >= 0.0f) ? (int)(x + 0.5f) : (int)(x - 0.5f);
+	}
+
+	inline int fast_floorf_int(float x)
+	{
+		int xi = (int)x;  // Truncate towards zero
+		return ((x < 0.0f) && (x != (float)xi)) ? (xi - 1) : xi;
+	}
+
 	inline uint64_t read_bits(const uint8_t* pBuf, uint32_t& bit_offset, uint32_t codesize)
 	{
 		assert(codesize <= 64);
@ -169,6 +191,15 @@ namespace basisu

 	bool string_begins_with(const std::string& str, const char* pPhrase);

+	// Case sensitive, returns -1 if can't find
+	inline int string_find_first(const std::string& str, const char* pPhrase)
+	{
+		size_t res = str.find(pPhrase, 0);
+		if (res == std::string::npos)
+			return -1;
+		return (int)res;
+	}
+				
 	// Hashing
 	
 	inline uint32_t bitmix32c(uint32_t v) 
@ -209,12 +240,23 @@ namespace basisu
 	template <typename Key>
 	struct bit_hasher
 	{
-		std::size_t operator()(const Key& k) const
+		inline std::size_t operator()(const Key& k) const
 		{
 			return hash_hsieh(reinterpret_cast<const uint8_t *>(&k), sizeof(k));
 		}
 	};

+	struct string_hasher
+	{
+		inline std::size_t operator()(const std::string& k) const
+		{
+			size_t l = k.size();
+			if (!l)
+				return 0;
+			return hash_hsieh(reinterpret_cast<const uint8_t*>(k.c_str()), l);
+		}
+	};
+
 	class running_stat
 	{
 	public:
@ -318,7 +360,7 @@ namespace basisu
 		inline vec(const vec &other) { for (uint32_t i = 0; i < N; i++) m_v[i] = other.m_v[i]; }
 		template <uint32_t OtherN, typename OtherT> inline vec(const vec<OtherN, OtherT> &other) { set(other); }

-		inline T operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
+		inline const T& operator[](uint32_t i) const { assert(i < N); return m_v[i]; }
 		inline T &operator[](uint32_t i) { assert(i < N); return m_v[i]; }

 		inline T getX() const { return m_v[0]; }
@ -327,6 +369,7 @@ namespace basisu
 		inline T getW() const { static_assert(N >= 4, "N too small"); return m_v[3]; }

 		inline bool operator==(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) if (m_v[i] != rhs.m_v[i]) return false;	return true; }
+		inline bool operator!=(const vec& rhs) const { return !(*this == rhs); }
 		inline bool operator<(const vec &rhs) const { for (uint32_t i = 0; i < N; i++) { if (m_v[i] < rhs.m_v[i]) return true; else if (m_v[i] != rhs.m_v[i]) return false; } return false; }

 		inline void set_zero() { for (uint32_t i = 0; i < N; i++) m_v[i] = 0; }
@ -433,6 +476,8 @@ namespace basisu

 		inline vec &normalize_in_place() { T len = length(); if (len != 0.0f) *this *= (1.0f / len); return *this; }

+		inline vec get_normalized() const { vec res(*this); res.normalize_in_place(); return res; }
+
 		inline vec &clamp(T l, T h)
 		{
 			for (uint32_t i = 0; i < N; i++)
@ -440,6 +485,14 @@ namespace basisu
 			return *this;
 		}

+		static vec component_mul(const vec& a, const vec& b)
+		{
+			vec res;
+			for (uint32_t i = 0; i < N; i++)
+				res[i] = a[i] * b[i];
+			return res;
+		}
+
 		static vec component_min(const vec& a, const vec& b)
 		{
 			vec res;
@ -455,6 +508,14 @@ namespace basisu
 				res[i] = maximum(a[i], b[i]);
 			return res;
 		}
+
+		static vec lerp(const vec& a, const vec& b, float s)
+		{
+			vec res;
+			for (uint32_t i = 0; i < N; i++)
+				res[i] = basisu::lerp(a[i], b[i], s);
+			return res;
+		}
 	};

 	typedef vec<4, double> vec4D;
@ -462,6 +523,8 @@ namespace basisu
 	typedef vec<2, double> vec2D;
 	typedef vec<1, double> vec1D;

+	typedef vec<6, float> vec6F;
+	typedef vec<5, float> vec5F;
 	typedef vec<4, float> vec4F;
 	typedef vec<3, float> vec3F;
 	typedef vec<2, float> vec2F;
@ -469,6 +532,9 @@ namespace basisu

 	typedef vec<16, float> vec16F;

+	template<uint32_t N, typename T> struct bitwise_copyable< vec<N, T> > { enum { cFlag = true }; };
+	template<uint32_t N, typename T> struct bitwise_movable< vec<N, T> > { enum { cFlag = true }; };
+		
 	template <uint32_t Rows, uint32_t Cols, typename T>
 	class matrix
 	{
@ -514,6 +580,9 @@ namespace basisu
 		}
 	};

+	template<uint32_t R, uint32_t C, typename T> struct bitwise_copyable< matrix<R, C, T> > { enum { cFlag = true }; };
+	template<uint32_t R, uint32_t C, typename T> struct bitwise_movable< matrix<R, C, T> > { enum { cFlag = true }; };
+
 	template<uint32_t N, typename VectorType>
 	inline VectorType compute_pca_from_covar(matrix<N, N, float> &cmatrix)
 	{
@ -759,6 +828,8 @@ namespace basisu
 		
 		std::atomic<bool> m_kill_flag;

+		std::atomic<int> m_num_active_workers;
+
 		void job_thread(uint32_t index);
 	};

@ -962,6 +1033,9 @@ namespace basisu
 		inline int get_709_luma() const { return (13938U * m_comps[0] + 46869U * m_comps[1] + 4729U * m_comps[2] + 32768U) >> 16U; } 
 		inline int get_luma(bool luma_601) const { return luma_601 ? get_601_luma() : get_709_luma(); }

+		inline uint32_t get_bgra_uint32() const { return b | (g << 8) | (r << 16) | (a << 24); }
+		inline uint32_t get_rgba_uint32() const { return r | (g << 8) | (b << 16) | (a << 24); }
+
 		inline basist::color32 get_color32() const
 		{
 			return basist::color32(r, g, b, a);
@ -1136,22 +1210,6 @@ namespace basisu
 		return true;
 	}
 		
-	inline std::string string_format(const char* pFmt, ...)
-	{
-		char buf[2048];
-
-		va_list args;
-		va_start(args, pFmt);
-#ifdef _WIN32		
-		vsprintf_s(buf, sizeof(buf), pFmt, args);
-#else
-		vsnprintf(buf, sizeof(buf), pFmt, args);
-#endif		
-		va_end(args);
-
-		return std::string(buf);
-	}
-
 	inline std::string string_tolower(const std::string& s)
 	{
 		std::string result(s);
@ -1710,7 +1768,7 @@ namespace basisu
 				// This SSE function takes pointers to void types, so do some sanity checks.
 				assert(sizeof(TrainingVectorType) == sizeof(float) * 16);
 				assert(sizeof(training_vec_with_weight) == sizeof(std::pair<vec16F, uint64_t>));
-				update_covar_matrix_16x16_sse41(node.m_training_vecs.size(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
+				update_covar_matrix_16x16_sse41(node.m_training_vecs.size_u32(), m_training_vecs.data(), &node.m_origin, node.m_training_vecs.data(), &cmatrix);
 #endif
 			}

@ -2019,9 +2077,7 @@ namespace basisu

 		for (uint32_t thread_iter = 0; thread_iter < max_threads; thread_iter++)
 		{
-#ifndef __EMSCRIPTEN__
 			pJob_pool->add_job( [thread_iter, &local_clusters, &local_parent_clusters, &success_flags, &quantizers, &initial_codebook, &q, &limit_clusterizers, &max_codebook_size, &max_threads, &max_parent_codebook_size] {
-#endif

 				Quantizer& lq = quantizers[thread_iter];
 				uint_vec& cluster_indices = initial_codebook[thread_iter];
@ -2062,15 +2118,11 @@ namespace basisu
 					}
 				}

-#ifndef __EMSCRIPTEN__
 			} );
-#endif

 		} // thread_iter

-#ifndef __EMSCRIPTEN__
 		pJob_pool->wait_for_all();
-#endif

 		uint32_t total_clusters = 0, total_parent_clusters = 0;

@ -2353,6 +2405,48 @@ namespace basisu
 		{
 		}

+		bitwise_coder(const bitwise_coder& other) :
+			m_bytes(other.m_bytes),
+			m_bit_buffer(other.m_bit_buffer),
+			m_bit_buffer_size(other.m_bit_buffer_size),
+			m_total_bits(other.m_total_bits)			
+		{
+		}
+
+		bitwise_coder(bitwise_coder&& other) :
+			m_bytes(std::move(other.m_bytes)),
+			m_bit_buffer(other.m_bit_buffer),
+			m_bit_buffer_size(other.m_bit_buffer_size),
+			m_total_bits(other.m_total_bits)
+		{
+		}
+
+		bitwise_coder& operator= (const bitwise_coder& rhs)
+		{
+			if (this == &rhs)
+				return *this;
+
+			m_bytes = rhs.m_bytes;
+			m_bit_buffer = rhs.m_bit_buffer;
+			m_bit_buffer_size = rhs.m_bit_buffer_size;
+			m_total_bits = rhs.m_total_bits;
+
+			return *this;
+		}
+
+		bitwise_coder& operator= (bitwise_coder&& rhs)
+		{
+			if (this == &rhs)
+				return *this;
+
+			m_bytes = std::move(rhs.m_bytes);
+			m_bit_buffer = rhs.m_bit_buffer;
+			m_bit_buffer_size = rhs.m_bit_buffer_size;
+			m_total_bits = rhs.m_total_bits;
+
+			return *this;
+		}
+
 		inline void clear()
 		{
 			clear_vector(m_bytes);
@ -2370,8 +2464,12 @@ namespace basisu
 		}

 		inline const uint8_vec &get_bytes() const { return m_bytes; }
+		inline uint8_vec& get_bytes() { return m_bytes; }
+
+		inline void reserve(uint32_t size) { m_bytes.reserve(size); }

 		inline uint64_t get_total_bits() const { return m_total_bits; }
+		inline uint32_t get_total_bits_u32() const { assert(m_total_bits <= UINT32_MAX); return static_cast<uint32_t>(m_total_bits); }
 		inline void clear_total_bits() { m_total_bits = 0; }

 		inline void init(uint32_t reserve_size = 1024)
@ -2496,15 +2594,26 @@ namespace basisu

 		uint32_t emit_huffman_table(const huffman_encoding_table &tab);

+		void append(const bitwise_coder& other)
+		{
+			for (uint32_t i = 0; i < other.m_bytes.size(); i++)
+				put_bits(other.m_bytes[i], 8);
+		
+			if (other.m_bit_buffer_size)
+				put_bits(other.m_bit_buffer, other.m_bit_buffer_size);
+		}
+		
 	private:
 		uint8_vec m_bytes;
 		uint32_t m_bit_buffer, m_bit_buffer_size;
 		uint64_t m_total_bits;

-		void append_byte(uint8_t c)
+		inline void append_byte(uint8_t c)
 		{
-			m_bytes.resize(m_bytes.size() + 1);
-			m_bytes.back() = c;
+			//m_bytes.resize(m_bytes.size() + 1);
+			//m_bytes.back() = c;
+
+			m_bytes.push_back(c);
 		}

 		static void end_nonzero_run(uint16_vec &syms, uint32_t &run_size, uint32_t len);
@ -2672,6 +2781,31 @@ namespace basisu
 			*this = other;
 		}

+		image(image&& other) :
+			m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),
+			m_pixels(std::move(other.m_pixels))
+		{
+			other.m_width = 0;
+			other.m_height = 0;
+			other.m_pitch = 0;
+		}
+
+		image& operator= (image&& rhs)
+		{
+			if (this != &rhs)
+			{
+				m_width = rhs.m_width;
+				m_height = rhs.m_height;
+				m_pitch = rhs.m_pitch;
+				m_pixels = std::move(rhs.m_pixels);
+
+				rhs.m_width = 0;
+				rhs.m_height = 0;
+				rhs.m_pitch = 0;
+			}
+			return *this;
+		}
+
 		image &swap(image &other)
 		{
 			std::swap(m_width, other.m_width);
@ -2702,6 +2836,12 @@ namespace basisu
 			return *this;
 		}

+		image& match_dimensions(const image& other)
+		{
+			resize(other.get_width(), other.get_height());
+			return *this;
+		}
+
 		image &resize(uint32_t w, uint32_t h, uint32_t p = UINT32_MAX, const color_rgba& background = g_black_color)
 		{
 			return crop(w, h, p, background);
@ -2913,7 +3053,7 @@ namespace basisu
 					const int sx = src_x + x;
 					if (sx < 0)
 						continue;
-					else if (sx >= (int)src.get_height())
+					else if (sx >= (int)src.get_width())
 						break;

 					set_clipped(dst_x + x, dst_y + y, src(sx, sy));
@ -2955,6 +3095,8 @@ namespace basisu
 			return *this;
 		}

+		inline bool is_valid() const { return m_width > 0; }
+
 		inline uint32_t get_width() const { return m_width; }
 		inline uint32_t get_height() const { return m_height; }
 		inline uint32_t get_pitch() const { return m_pitch; }
@ -3038,8 +3180,56 @@ namespace basisu
 			return *this;
 		}

+		void swap_rb()
+		{
+			for (auto& v : m_pixels)
+				std::swap(v.r, v.b);
+		}
+
 		void debug_text(uint32_t x_ofs, uint32_t y_ofs, uint32_t x_scale, uint32_t y_scale, const color_rgba &fg, const color_rgba *pBG, bool alpha_only, const char* p, ...);
 				
+		vec4F get_filtered_vec4F(float x, float y) const
+		{
+			x -= .5f;
+			y -= .5f;
+
+			int ix = (int)floorf(x);
+			int iy = (int)floorf(y);
+			float wx = x - ix;
+			float wy = y - iy;
+
+			color_rgba a(get_clamped(ix, iy));
+			color_rgba b(get_clamped(ix + 1, iy));
+			color_rgba c(get_clamped(ix, iy + 1));
+			color_rgba d(get_clamped(ix + 1, iy + 1));
+
+			vec4F result;
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				const float top = lerp<float>((float)a[i], (float)b[i], wx);
+				const float bot = lerp<float>((float)c[i], (float)d[i], wx);
+				const float m = lerp<float>((float)top, (float)bot, wy);
+
+				result[i] = m;
+			}
+
+			return result;
+		}
+
+		// (x,y) - Continuous coordinates, where pixel centers are at (.5,.5), valid image coords are [0,width] and [0,height]. Clamp addressing.
+		color_rgba get_filtered(float x, float y) const
+		{
+			const vec4F fresult(get_filtered_vec4F(x, y));
+
+			color_rgba result;
+
+			for (uint32_t i = 0; i < 4; i++)
+				result[i] = (uint8_t)clamp<int>((int)(fresult[i] + .5f), 0, 255);
+
+			return result;
+		}
+				
 	private:
 		uint32_t m_width, m_height, m_pitch;  // all in pixels
 		color_rgba_vec m_pixels;
@ -3069,6 +3259,31 @@ namespace basisu
 			*this = other;
 		}

+		imagef(imagef&& other) :
+			m_width(other.m_width), m_height(other.m_height), m_pitch(other.m_pitch),
+			m_pixels(std::move(other.m_pixels))
+		{
+			other.m_width = 0;
+			other.m_height = 0;
+			other.m_pitch = 0;
+		}
+
+		imagef& operator= (imagef&& rhs)
+		{
+			if (this != &rhs)
+			{
+				m_width = rhs.m_width;
+				m_height = rhs.m_height;
+				m_pitch = rhs.m_pitch;
+				m_pixels = std::move(rhs.m_pixels);
+
+				rhs.m_width = 0;
+				rhs.m_height = 0;
+				rhs.m_pitch = 0;
+			}
+			return *this;
+		}
+
 		imagef &swap(imagef &other)
 		{
 			std::swap(m_width, other.m_width);
@ -3118,6 +3333,12 @@ namespace basisu
 			return *this;
 		}

+		imagef& match_dimensions(const imagef& other)
+		{
+			resize(other.get_width(), other.get_height());
+			return *this;
+		}
+
 		imagef &resize(const imagef &other, uint32_t p = UINT32_MAX, const vec4F& background = vec4F(0,0,0,1))
 		{
 			return resize(other.get_width(), other.get_height(), p, background);
@ -3248,7 +3469,7 @@ namespace basisu
 					const int sx = src_x + x;
 					if (sx < 0)
 						continue;
-					else if (sx >= (int)src.get_height())
+					else if (sx >= (int)src.get_width())
 						break;

 					set_clipped(dst_x + x, dst_y + y, src(sx, sy));
@ -3274,10 +3495,12 @@ namespace basisu
 			return *this;
 		}

+		inline bool is_valid() const { return m_width > 0; }
+
 		inline uint32_t get_width() const { return m_width; }
 		inline uint32_t get_height() const { return m_height; }
 		inline uint32_t get_pitch() const { return m_pitch; }
-		inline uint32_t get_total_pixels() const { return m_width * m_height; }
+		inline uint64_t get_total_pixels() const { return (uint64_t)m_width * m_height; }

 		inline uint32_t get_block_width(uint32_t w) const { return (m_width + (w - 1)) / w; }
 		inline uint32_t get_block_height(uint32_t h) const { return (m_height + (h - 1)) / h; }
@ -3315,7 +3538,7 @@ namespace basisu
 							{
 								if (!nan_msg)
 								{
-									fprintf(stderr, "One or more pixels was NaN, setting to 0.\n");
+									fprintf(stderr, "One or more input pixels was NaN, setting to 0.\n");
 									nan_msg = true;
 								}
 							}
@ -3324,7 +3547,7 @@ namespace basisu
 							{
 								if (!inf_msg)
 								{
-									fprintf(stderr, "One or more pixels was INF, setting to 0.\n");
+									fprintf(stderr, "One or more input pixels was INF, setting to 0.\n");
 									inf_msg = true;
 								}
 							}
@ -3333,7 +3556,7 @@ namespace basisu
 							{
 								if (!neg_zero_msg)
 								{
-									fprintf(stderr, "One or more pixels was -0, setting them to 0.\n");
+									fprintf(stderr, "One or more input pixels was -0, setting them to 0.\n");
 									neg_zero_msg = true;
 								}
 							}
@ -3350,7 +3573,7 @@ namespace basisu

 								if (!neg_msg)
 								{
-									fprintf(stderr, "One or more pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
+									fprintf(stderr, "One or more input pixels was negative -- setting these pixel components to 0 because ASTC HDR doesn't support signed values.\n");
 									neg_msg = true;
 								}
 								
@ -3363,7 +3586,7 @@ namespace basisu
 								
 								if (!clamp_msg)
 								{
-									fprintf(stderr, "One or more pixels had to be clamped to %f.\n", highest_mag);
+									fprintf(stderr, "One or more input pixels had to be clamped to %f.\n", highest_mag);
 									clamp_msg = true;
 								}

@ -3386,6 +3609,45 @@ namespace basisu
 			return *this;
 		}

+		bool has_alpha(uint32_t channel = 3) const
+		{
+			for (uint32_t y = 0; y < m_height; ++y)
+				for (uint32_t x = 0; x < m_width; ++x)
+					if ((*this)(x, y)[channel] != 1.0f)
+						return true;
+
+			return false;
+		}
+
+		vec4F get_filtered_vec4F(float x, float y) const
+		{
+			x -= .5f;
+			y -= .5f;
+
+			int ix = (int)floorf(x);
+			int iy = (int)floorf(y);
+			float wx = x - ix;
+			float wy = y - iy;
+
+			vec4F a(get_clamped(ix, iy));
+			vec4F b(get_clamped(ix + 1, iy));
+			vec4F c(get_clamped(ix, iy + 1));
+			vec4F d(get_clamped(ix + 1, iy + 1));
+
+			vec4F result;
+
+			for (uint32_t i = 0; i < 4; i++)
+			{
+				const float top = lerp<float>((float)a[i], (float)b[i], wx);
+				const float bot = lerp<float>((float)c[i], (float)d[i], wx);
+				const float m = lerp<float>((float)top, (float)bot, wy);
+
+				result[i] = m;
+			}
+
+			return result;
+		}
+						
 	private:
 		uint32_t m_width, m_height, m_pitch;  // all in pixels
 		vec4F_vec m_pixels;
@ -3402,6 +3664,52 @@ namespace basisu
 	float linear_to_srgb(float l);
 	float srgb_to_linear(float s);

+	class fast_linear_to_srgb
+	{
+	public:
+		fast_linear_to_srgb()
+		{
+			init();
+		}
+
+		void init()
+		{
+			for (int i = 0; i < LINEAR_TO_SRGB_TABLE_SIZE; ++i)
+			{
+				float l = (float)i * (1.0f / (LINEAR_TO_SRGB_TABLE_SIZE - 1));
+				m_linear_to_srgb_table[i] = (uint8_t)basisu::fast_floorf_int(255.0f * basisu::linear_to_srgb(l));
+			}
+
+			float srgb_to_linear[256];
+			for (int i = 0; i < 256; i++)
+				srgb_to_linear[i] = basisu::srgb_to_linear((float)i / 255.0f);
+
+			for (int i = 0; i < 256; i++)
+				m_srgb_to_linear_thresh[i] = (srgb_to_linear[i] + srgb_to_linear[basisu::minimum<int>(i + 1, 255)]) * .5f;
+		}
+
+		inline uint8_t convert(float l) const
+		{
+			assert((l >= 0.0f) && (l <= 1.0f));
+			int j = basisu::fast_roundf_int((LINEAR_TO_SRGB_TABLE_SIZE - 1) * l);
+
+			assert((j >= 0) && (j < LINEAR_TO_SRGB_TABLE_SIZE));
+			int b = m_linear_to_srgb_table[j];
+
+			b += (l > m_srgb_to_linear_thresh[b]);
+
+			return (uint8_t)b;
+		}
+
+	private:
+		static constexpr int LINEAR_TO_SRGB_TABLE_SIZE = 2048;
+		uint8_t m_linear_to_srgb_table[LINEAR_TO_SRGB_TABLE_SIZE];
+
+		float m_srgb_to_linear_thresh[256];
+	};
+
+	extern fast_linear_to_srgb g_fast_linear_to_srgb;
+		
 	// Image metrics
 		
 	class image_metrics
@ -3438,6 +3746,8 @@ namespace basisu
 		void calc(const image &a, const image &b, uint32_t first_chan = 0, uint32_t total_chans = 0, bool avg_comp_error = true, bool use_601_luma = false);
 	};

+	void print_image_metrics(const image& a, const image& b);
+
 	// Image saving/loading/resampling

 	bool load_png(const uint8_t* pBuf, size_t buf_size, image& img, const char* pFilename = nullptr);
@ -3450,15 +3760,22 @@ namespace basisu
 	bool load_qoi(const char* pFilename, image& img);

 	bool load_jpg(const char *pFilename, image& img);
+	bool load_jpg(const uint8_t* pBuf, size_t buf_size, image& img);
 	inline bool load_jpg(const std::string &filename, image &img) { return load_jpg(filename.c_str(), img); }
 	
 	// Currently loads .PNG, .TGA, or .JPG
 	bool load_image(const char* pFilename, image& img);
 	inline bool load_image(const std::string &filename, image &img) { return load_image(filename.c_str(), img); }

+	bool is_image_filename_hdr(const char* pFilename);
+
 	// Supports .HDR and most (but not all) .EXR's (see TinyEXR).
-	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true);
-	inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true) { return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear); }
+	bool load_image_hdr(const char* pFilename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);
+	
+	inline bool load_image_hdr(const std::string& filename, imagef& img, bool ldr_srgb_to_linear = true, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f)
+	{ 
+		return load_image_hdr(filename.c_str(), img, ldr_srgb_to_linear, linear_nit_multiplier, ldr_black_bias);
+	}

 	enum class hdr_image_type
 	{
@ -3466,10 +3783,11 @@ namespace basisu
 		cHITRGBAFloat = 1,
 		cHITPNGImage = 2,
 		cHITEXRImage = 3,
-		cHITHDRImage = 4
+		cHITHDRImage = 4,
+		cHITJPGImage = 5
 	};

-	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear);
+	bool load_image_hdr(const void* pMem, size_t mem_size, imagef& img, uint32_t width, uint32_t height, hdr_image_type img_type, bool ldr_srgb_to_linear, float linear_nit_multiplier = 1.0f, float ldr_black_bias = 0.0f);

 	uint8_t *read_tga(const uint8_t *pBuf, uint32_t buf_size, int &width, int &height, int &n_chans);
 	uint8_t *read_tga(const char *pFilename, int &width, int &height, int &n_chans);
@ -3512,7 +3830,7 @@ namespace basisu
 	};

 	// Supports 1 (Y), 3 (RGB), or 4 (RGBA) channel images.
-	bool write_exr(const char* pFilename, imagef& img, uint32_t n_chans, uint32_t flags);
+	bool write_exr(const char* pFilename, const imagef& img, uint32_t n_chans, uint32_t flags);
 			
 	enum
 	{
@ -3572,102 +3890,6 @@ namespace basisu

 	inline double get_interval_timer() { return interval_timer::ticks_to_secs(interval_timer::get_ticks()); }

-	// 2D array
-
-	template<typename T>
-	class vector2D
-	{
-		typedef basisu::vector<T> TVec;
-
-		uint32_t m_width, m_height;
-		TVec m_values;
-
-	public:
-		vector2D() :
-			m_width(0),
-			m_height(0)
-		{
-		}
-
-		vector2D(uint32_t w, uint32_t h) :
-			m_width(0),
-			m_height(0)
-		{
-			resize(w, h);
-		}
-
-		vector2D(const vector2D &other)
-		{
-			*this = other;
-		}
-
-		vector2D &operator= (const vector2D &other)
-		{
-			if (this != &other)
-			{
-				m_width = other.m_width;
-				m_height = other.m_height;
-				m_values = other.m_values;
-			}
-			return *this;
-		}
-
-		inline bool operator== (const vector2D &rhs) const
-		{
-			return (m_width == rhs.m_width) && (m_height == rhs.m_height) && (m_values == rhs.m_values);
-		}
-
-		inline uint32_t size_in_bytes() const { return (uint32_t)m_values.size() * sizeof(m_values[0]); }
-
-		inline const T &operator() (uint32_t x, uint32_t y) const { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
-		inline T &operator() (uint32_t x, uint32_t y) { assert(x < m_width && y < m_height); return m_values[x + y * m_width]; }
-
-		inline const T &operator[] (uint32_t i) const { return m_values[i]; }
-		inline T &operator[] (uint32_t i) { return m_values[i]; }
-				
-		inline const T &at_clamped(int x, int y) const { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }		
-		inline T &at_clamped(int x, int y) { return (*this)(clamp<int>(x, 0, m_width - 1), clamp<int>(y, 0, m_height - 1)); }
-
-		void clear()
-		{
-			m_width = 0;
-			m_height = 0;
-			m_values.clear();
-		}
-
-		void set_all(const T&val)
-		{
-			vector_set_all(m_values, val);
-		}
-
-		inline const T* get_ptr() const { return &m_values[0]; }
-		inline T* get_ptr() { return &m_values[0]; }
-
-		vector2D &resize(uint32_t new_width, uint32_t new_height)
-		{
-			if ((m_width == new_width) && (m_height == new_height))
-				return *this;
-
-			TVec oldVals(new_width * new_height);
-			oldVals.swap(m_values);
-
-			const uint32_t w = minimum(m_width, new_width);
-			const uint32_t h = minimum(m_height, new_height);
-
-			if ((w) && (h))
-			{
-				for (uint32_t y = 0; y < h; y++)
-					for (uint32_t x = 0; x < w; x++)
-						m_values[x + y * new_width] = oldVals[x + y * m_width];
-			}
-
-			m_width = new_width;
-			m_height = new_height;
-
-			return *this;
-		}
-	};
-
 	inline FILE *fopen_safe(const char *pFilename, const char *pMode)
 	{
 #ifdef _WIN32
@ -3723,12 +3945,14 @@ namespace basisu
 	};
 	typedef basisu::vector<pixel_block_hdr> pixel_block_hdr_vec;

-	void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure);
+	void tonemap_image_reinhard(image& ldr_img, const imagef& hdr_img, float exposure, bool add_noise = false, bool per_component = true, bool luma_scaling = false);
 	bool tonemap_image_compressive(image& dst_img, const imagef& hdr_test_img);
+	bool tonemap_image_compressive2(image& dst_img, const imagef& hdr_test_img);
 	
 	// Intersection
 	enum eClear { cClear = 0 };
 	enum eInitExpand { cInitExpand = 0 };
+	enum eIdentity { cIdentity = 0 };

 	template<typename vector_type>
 	class ray
@ -3845,6 +4069,7 @@ namespace basisu
 	typedef vec_interval<vec3F> vec_interval3F;
 	typedef vec_interval<vec4F> vec_interval4F;

+	typedef vec_interval1F aabb1F;
 	typedef vec_interval2F aabb2F;
 	typedef vec_interval3F aabb3F;

@ -4004,17 +4229,18 @@ namespace basisu
 		return result;
 	}

-	// Supports positive and denormals only. No NaN or Inf.
-	inline float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
-	{
-		assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
-
 	union fu32
 	{
 		uint32_t u;
 		float f;
 	};

+	// Supports positive and denormals only. No NaN or Inf.
+	BASISU_FORCE_INLINE float fast_half_to_float_pos_not_inf_or_nan(basist::half_float h)
+	{
+		assert(!basist::half_is_signed(h) && !basist::is_half_inf_or_nan(h));
+				
+		// add 112 to the exponent (112+half float's exp bias of 15=float32's bias of 127)
 		static const fu32 K = { 0x77800000 }; 

 		fu32 o;
@ -4024,6 +4250,61 @@ namespace basisu
 		return o.f;
 	}

+	// Positive, negative, or denormals. No NaN or Inf. Clamped to MAX_HALF_FLOAT.
+	inline basist::half_float fast_float_to_half_trunc_no_nan_or_inf(float f)
+	{
+		assert(!isnan(f) && !isinf(f));
+
+		// Sutract 112 from the exponent, to change the bias from 127 to 15.
+		static const fu32 g_f_to_h{ 0x7800000 };
+				
+		fu32 fu;
+
+		fu.f = minimum<float>((float)basist::MAX_HALF_FLOAT, fabsf(f)) * g_f_to_h.f;
+
+		return (basist::half_float)(((fu.u >> (23 - 10)) & 0x7FFF) | ((f < 0.0f) ? 0x8000 : 0));
+	}
+
+	inline basist::half_float fast_float_to_half_trunc_no_clamp_neg_nan_or_inf(float f)
+	{
+		assert(!isnan(f) && !isinf(f));
+		assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
+		
+		// Sutract 112 from the exponent, to change the bias from 127 to 15.
+		static const fu32 g_f_to_h{ 0x7800000 };
+
+		fu32 fu;
+
+		fu.f = f * g_f_to_h.f;
+		
+		return (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
+	}
+		
+	inline basist::half_float fast_float_to_half_no_clamp_neg_nan_or_inf(float f)
+	{
+		assert(!isnan(f) && !isinf(f));
+		assert((f >= 0.0f) && (f <= basist::MAX_HALF_FLOAT));
+
+		// Sutract 112 from the exponent, to change the bias from 127 to 15.
+		static const fu32 g_f_to_h{ 0x7800000 };
+
+		fu32 fu;
+
+		fu.f = f * g_f_to_h.f;
+
+		uint32_t h = (basist::half_float)((fu.u >> (23 - 10)) & 0x7FFF);
+
+		// round to even or nearest
+		uint32_t mant = fu.u & 8191; // examine lowest 13 bits
+		uint32_t inc = (mant > 4096) | ((mant == 4096) & (h & 1));
+		h += inc;
+
+		if (h > basist::MAX_HALF_FLOAT_AS_INT_BITS)
+			h = basist::MAX_HALF_FLOAT_AS_INT_BITS;
+
+		return (basist::half_float)h;
+	}
+								
 } // namespace basisu

-
+#include "basisu_math.h"
--- a/thirdparty/basis_universal/encoder/basisu_frontend.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_frontend.cpp
@ -353,9 +353,7 @@ namespace basisu
 			const uint32_t first_index = block_index_iter;
 			const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->add_job([this, first_index, last_index] {
-#endif

 				for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 				{
@ -388,15 +386,11 @@ namespace basisu
 					m_block_selector_cluster_index[block_index] = best_index;
 				}

-#ifndef __EMSCRIPTEN__
 				});
-#endif

 		}

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif

 		m_encoded_blocks.resize(m_total_blocks);
 		for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
@ -425,9 +419,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index, pass] {
-#endif
 										
 					for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 					{
@ -475,15 +467,11 @@ namespace basisu

 					} // block_index

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			}

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif

 			m_endpoint_clusters.resize(0);
 			m_endpoint_clusters.resize(endpoints.size());
@ -501,9 +489,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index] {
-#endif

 					for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 					{
@ -535,15 +521,11 @@ namespace basisu
 						m_block_selector_cluster_index[block_index] = best_index;
 					}

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			}

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif

 			m_encoded_blocks.resize(m_total_blocks);
 			for (uint32_t block_index = 0; block_index < m_total_blocks; block_index++)
@ -573,7 +555,7 @@ namespace basisu
 		debug_printf("introduce_special_selector_clusters\n");

 		uint32_t total_blocks_relocated = 0;
-		const uint32_t initial_selector_clusters = (uint32_t)m_selector_cluster_block_indices.size();
+		const uint32_t initial_selector_clusters = m_selector_cluster_block_indices.size_u32();

 		bool_vec block_relocated_flags(m_total_blocks);

@ -595,7 +577,7 @@ namespace basisu

 			debug_printf("Introducing sel %u\n", sel);

-			const uint32_t new_selector_cluster_index = (uint32_t)m_optimized_cluster_selectors.size();
+			const uint32_t new_selector_cluster_index = m_optimized_cluster_selectors.size_u32();

 			m_optimized_cluster_selectors.push_back(blk);
 			
@ -675,7 +657,7 @@ namespace basisu
 	{
 		debug_printf("optimize_selector_codebook\n");

-		const uint32_t orig_total_selector_clusters = (uint32_t)m_optimized_cluster_selectors.size();
+		const uint32_t orig_total_selector_clusters = m_optimized_cluster_selectors.size_u32();

 		bool_vec selector_cluster_was_used(m_optimized_cluster_selectors.size());
 		for (uint32_t i = 0; i < m_total_blocks; i++)
@ -787,9 +769,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index] {
-#endif

 					for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 					{
@ -830,15 +810,11 @@ namespace basisu
 								blk.set_selector(x, y, selectors[x + y * 4]);
 					}

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			}

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif

 		} // use_cpu
 		 
@ -859,9 +835,7 @@ namespace basisu
 			const uint32_t first_index = block_index_iter;
 			const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
-#endif

 				for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 				{			
@ -883,15 +857,11 @@ namespace basisu

 				} // block_index;

-#ifndef __EMSCRIPTEN__
 			} );
-#endif

 		} // block_index_iter

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif
 	}

 	void basisu_frontend::generate_endpoint_clusters()
@ -970,7 +940,7 @@ namespace basisu
 		}
 								
 		if (m_params.m_debug_stats)
-			debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", (uint32_t)m_endpoint_clusters.size(), (uint32_t)m_endpoint_parent_clusters.size());
+			debug_printf("Total endpoint clusters: %u, parent clusters: %u\n", m_endpoint_clusters.size_u32(), m_endpoint_parent_clusters.size_u32());
 	}

 	// Iterate through each array of endpoint cluster block indices and set the m_block_endpoint_clusters_indices[][] array to indicaste which cluster index each block uses.
@ -1040,11 +1010,9 @@ namespace basisu
 		for (uint32_t cluster_index_iter = 0; cluster_index_iter < m_endpoint_clusters.size(); cluster_index_iter += N)
 		{
 			const uint32_t first_index = cluster_index_iter;                                    
-			const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);   
+			const uint32_t last_index = minimum<uint32_t>(m_endpoint_clusters.size_u32(), cluster_index_iter + N);   

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->add_job( [this, first_index, last_index] {
-#endif

 				for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
 				{
@ -1112,15 +1080,11 @@ namespace basisu
 					}
 				} // cluster_index

-#ifndef __EMSCRIPTEN__
 			} );
-#endif

 		} // cluster_index_iter

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif

 		vector_sort(m_subblock_endpoint_quant_err_vec);
 	}
@ -1131,19 +1095,19 @@ namespace basisu

 		generate_block_endpoint_clusters();

-		int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - (uint32_t)m_endpoint_clusters.size();
+		int num_new_endpoint_clusters = m_params.m_max_endpoint_clusters - m_endpoint_clusters.size_u32();
 		if (num_new_endpoint_clusters <= 0)
 			return;

 		compute_endpoint_subblock_error_vec();

-		const uint32_t num_orig_endpoint_clusters = (uint32_t)m_endpoint_clusters.size();
+		const uint32_t num_orig_endpoint_clusters = m_endpoint_clusters.size_u32();

 		std::unordered_set<uint32_t> training_vector_was_relocated;

 		uint_vec cluster_sizes(num_orig_endpoint_clusters);
 		for (uint32_t i = 0; i < num_orig_endpoint_clusters; i++)
-			cluster_sizes[i] = (uint32_t)m_endpoint_clusters[i].size();
+			cluster_sizes[i] = m_endpoint_clusters[i].size_u32();

 		std::unordered_set<uint32_t> ignore_cluster;

@ -1259,7 +1223,7 @@ namespace basisu
 		// TODO: Get this working when step>0
 		if (m_params.m_pOpenCL_context && !step)
 		{
-			const uint32_t total_clusters = m_endpoint_clusters.size();
+			const uint32_t total_clusters = (uint32_t)m_endpoint_clusters.size();

 			basisu::vector<cl_pixel_cluster> pixel_clusters(total_clusters);
 			
@ -1342,7 +1306,7 @@ namespace basisu

 				} // cluster_indices_iter

-				uint32_t* pSorted = radix_sort(colors.size(), colors.data(), colors2.data(), 0, 3);
+				uint32_t* pSorted = radix_sort((uint32_t)colors.size(), colors.data(), colors2.data(), 0, 3);

 				const uint64_t first_pixel_index = input_pixels.size();

@ -1522,9 +1486,7 @@ namespace basisu
 				const uint32_t first_index = cluster_index_iter;
 				const uint32_t last_index = minimum<uint32_t>((uint32_t)m_endpoint_clusters.size(), cluster_index_iter + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index, step] {
-#endif

 					for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
 					{
@ -1643,15 +1605,11 @@ namespace basisu

 					} // cluster_index

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			} // cluster_index_iter

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif
 		}

 		debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
@ -1726,14 +1684,14 @@ namespace basisu
 		{
 			// For the OpenCL kernel, we order the parent endpoint clusters by smallest to largest for efficiency.
 			// We also prepare an array of block info structs that point into this new parent endpoint cluster array.
-			const uint32_t total_parent_clusters = m_endpoint_clusters_within_each_parent_cluster.size();
+			const uint32_t total_parent_clusters = (uint32_t)m_endpoint_clusters_within_each_parent_cluster.size();

 			basisu::vector<cl_block_info_struct> cl_block_info_structs(m_total_blocks);
 			
 			// the size of each parent cluster, in total clusters
 			uint_vec parent_cluster_sizes(total_parent_clusters);
 			for (uint32_t i = 0; i < total_parent_clusters; i++)
-				parent_cluster_sizes[i] = m_endpoint_clusters_within_each_parent_cluster[i].size();
+				parent_cluster_sizes[i] = (uint32_t)m_endpoint_clusters_within_each_parent_cluster[i].size();

 			uint_vec first_parent_cluster_ofs(total_parent_clusters);
 			uint32_t cur_ofs = 0;
@ -1818,9 +1776,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index, &best_cluster_indices, &block_clusters] {
-#endif

 					for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 					{
@ -1951,15 +1907,11 @@ namespace basisu

 					} // block_index

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			} // block_index_iter

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif
 		
 		} // use_cpu
 						
@ -2104,9 +2056,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job([this, first_index, last_index] {
-#endif

 					for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 					{
@ -2131,15 +2081,11 @@ namespace basisu

 					} // block_index

-#ifndef __EMSCRIPTEN__
 					});
-#endif

 			} // block_index_iter

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-#endif

 		} // use_cpu
 				
@ -2204,9 +2150,7 @@ namespace basisu
 			const uint32_t first_index = block_index_iter;
 			const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->add_job( [this, first_index, last_index, &training_vecs] {
-#endif

 				for (uint32_t block_index = first_index; block_index < last_index; block_index++)
 				{
@ -2233,15 +2177,11 @@ namespace basisu
 				
 				} // block_index

-#ifndef __EMSCRIPTEN__
 			} );
-#endif

 		} // block_index_iter

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif

 		vec16F_clusterizer selector_clusterizer;
 		for (uint32_t i = 0; i < m_total_blocks; i++)
@ -2335,9 +2275,7 @@ namespace basisu
 			const uint32_t first_index = cluster_index_iter;
 			const uint32_t last_index = minimum<uint32_t>((uint32_t)total_selector_clusters, cluster_index_iter + N);

-#ifndef __EMSCRIPTEN__			
 			m_params.m_pJob_pool->add_job([this, first_index, last_index] {
-#endif

 				for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
 				{
@ -2406,15 +2344,11 @@ namespace basisu

 				} // cluster_index

-#ifndef __EMSCRIPTEN__
 				});
-#endif

 		} // cluster_index_iter

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif

 		debug_printf("Elapsed time: %3.3f secs\n", tm.get_elapsed_secs());
 				
@ -2506,7 +2440,7 @@ namespace basisu

 		if ((m_params.m_pOpenCL_context) && m_use_hierarchical_selector_codebooks)
 		{
-			const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size();
+			const uint32_t num_parent_clusters = m_selector_clusters_within_each_parent_cluster.size_u32();

 			basisu::vector<fosc_selector_struct> selector_structs;
 			selector_structs.reserve(m_optimized_cluster_selectors.size());
@ -2534,7 +2468,7 @@ namespace basisu
 					selector_cluster_indices.push_back(selector_cluster_index);
 				}

-				cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size();
+				cur_ofs += m_selector_clusters_within_each_parent_cluster[parent_index].size_u32();
 			}

 			const uint32_t total_input_selectors = cur_ofs;
@ -2549,7 +2483,7 @@ namespace basisu

 				block_structs[i].m_etc_color5_inten.a = (uint8_t)blk.get_inten_table(0);
 				block_structs[i].m_first_selector = parent_selector_cluster_offsets[parent_selector_cluster];
-				block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size();
+				block_structs[i].m_num_selectors = m_selector_clusters_within_each_parent_cluster[parent_selector_cluster].size_u32();
 			}

 			uint_vec output_selector_cluster_indices(m_total_blocks);
@ -2615,9 +2549,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(m_total_blocks, first_index + N);

-	#ifndef __EMSCRIPTEN__
 				m_params.m_pJob_pool->add_job( [this, first_index, last_index, &unpacked_optimized_cluster_selectors] {
-	#endif
 	
 				int prev_best_cluster_index = 0;

@ -2756,15 +2688,11 @@ namespace basisu
 					
 				} // block_index

-	#ifndef __EMSCRIPTEN__
 				} );
-	#endif

 			} // block_index_iter

-	#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->wait_for_all();
-	#endif
 						
 			for (uint32_t i = 0; i < m_selector_cluster_block_indices.size(); i++)
 			{
@ -3081,9 +3009,7 @@ namespace basisu
 			const uint32_t first_index = cluster_index_iter;                                    
 			const uint32_t last_index = minimum<uint32_t>((uint32_t)new_endpoint_cluster_block_indices.size(), cluster_index_iter + N);   

-#ifndef __EMSCRIPTEN__
 			m_params.m_pJob_pool->add_job( [this, first_index, last_index, &cluster_improved, &cluster_valid, &new_endpoint_cluster_block_indices, &pBlock_selector_indices ] {
-#endif

 				for (uint32_t cluster_index = first_index; cluster_index < last_index; cluster_index++)
 				{
@ -3173,15 +3099,11 @@ namespace basisu

 				} // cluster_index

-#ifndef __EMSCRIPTEN__
 			} );
-#endif

 		} // cluster_index_iter

-#ifndef __EMSCRIPTEN__
 		m_params.m_pJob_pool->wait_for_all();
-#endif
 				
 		uint32_t total_unused_clusters = 0;
 		uint32_t total_improved_clusters = 0;
--- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
@ -19,6 +19,8 @@
 #include "basisu_bc7enc.h"
 #include "../transcoder/basisu_astc_hdr_core.h"

+#define BASISU_USE_GOOGLE_ASTC_DECODER (1)
+
 namespace basisu
 {
 	//------------------------------------------------------------------------------------------------
@ -1421,6 +1423,7 @@ namespace basisu
 		case texture_format::cBC6HUnsigned:
 		case texture_format::cASTC_HDR_4x4:
 		case texture_format::cUASTC_HDR_4x4:
+		case texture_format::cASTC_HDR_6x6:
 		{
 			// Can't unpack HDR blocks in unpack_block() because it returns 32bpp pixel data.
 			assert(0);
@ -1487,15 +1490,44 @@ namespace basisu
 	{
 		switch (fmt)
 		{
+			case texture_format::cASTC_HDR_6x6:
+			{
+#if BASISU_USE_GOOGLE_ASTC_DECODER
+				bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 6, 6);
+				assert(status);
+				if (!status)
+					return false;
+#else
+				// Use our decoder
+				basist::half_float half_block[6 * 6][4];
+
+				astc_helpers::log_astc_block log_blk;
+				if (!astc_helpers::unpack_block(pBlock, log_blk, 6, 6))
+					return false;
+				if (!astc_helpers::decode_block(log_blk, half_block, 6, 6, astc_helpers::cDecodeModeHDR16))
+					return false;
+
+				for (uint32_t p = 0; p < (6 * 6); p++)
+				{
+					pPixels[p][0] = basist::half_to_float(half_block[p][0]);
+					pPixels[p][1] = basist::half_to_float(half_block[p][1]);
+					pPixels[p][2] = basist::half_to_float(half_block[p][2]);
+					pPixels[p][3] = basist::half_to_float(half_block[p][3]);
+				}
+#endif
+				return true;
+			}
 			case texture_format::cASTC_HDR_4x4:
 			case texture_format::cUASTC_HDR_4x4:
 			{
-#if 1
+#if BASISU_USE_GOOGLE_ASTC_DECODER
+				// Use Google's decoder
 				bool status = basisu_astc::astc::decompress_hdr(&pPixels[0][0], (uint8_t*)pBlock, 4, 4);
 				assert(status);
 				if (!status)
 					return false;
 #else
+				// Use our decoder
 				basist::half_float half_block[16][4];
 				
 				astc_helpers::log_astc_block log_blk;
@ -1592,10 +1624,8 @@ namespace basisu

 	bool gpu_image::unpack_hdr(imagef& img) const
 	{
-		if ((m_fmt != texture_format::cASTC_HDR_4x4) && 
-			(m_fmt != texture_format::cUASTC_HDR_4x4) &&
-			(m_fmt != texture_format::cBC6HUnsigned) &&
-			(m_fmt != texture_format::cBC6HSigned))
+		if ((m_fmt != texture_format::cASTC_HDR_4x4) && (m_fmt != texture_format::cUASTC_HDR_4x4) && (m_fmt != texture_format::cASTC_HDR_6x6) &&
+			(m_fmt != texture_format::cBC6HUnsigned) &&	(m_fmt != texture_format::cBC6HSigned))
 		{
 			// Can't call on LDR images, at least currently. (Could unpack the LDR data and convert to float.)
 			assert(0);
@ -1643,6 +1673,7 @@ namespace basisu
 		KTX_RG = 0x8227,
 		KTX_RGB = 0x1907,
 		KTX_RGBA = 0x1908,
+
 		KTX_COMPRESSED_RGB_S3TC_DXT1_EXT = 0x83F0,
 		KTX_COMPRESSED_RGBA_S3TC_DXT5_EXT = 0x83F3,
 		KTX_COMPRESSED_RED_RGTC1_EXT = 0x8DBB,
@ -1655,11 +1686,42 @@ namespace basisu
 		KTX_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F,
 		KTX_COMPRESSED_RGB_PVRTC_4BPPV1_IMG = 0x8C00,
 		KTX_COMPRESSED_RGBA_PVRTC_4BPPV1_IMG = 0x8C02,
+		
 		KTX_COMPRESSED_RGBA_ASTC_4x4_KHR = 0x93B0,
+		KTX_COMPRESSED_RGBA_ASTC_5x4_KHR = 0x93B1,
+		KTX_COMPRESSED_RGBA_ASTC_5x5_KHR = 0x93B2,
+		KTX_COMPRESSED_RGBA_ASTC_6x5_KHR = 0x93B3,
+		KTX_COMPRESSED_RGBA_ASTC_6x6_KHR = 0x93B4,
+		KTX_COMPRESSED_RGBA_ASTC_8x5_KHR = 0x93B5,
+		KTX_COMPRESSED_RGBA_ASTC_8x6_KHR = 0x93B6,
+		KTX_COMPRESSED_RGBA_ASTC_8x8_KHR = 0x93B7,
+		KTX_COMPRESSED_RGBA_ASTC_10x5_KHR = 0x93B8,
+		KTX_COMPRESSED_RGBA_ASTC_10x6_KHR = 0x93B9,
+		KTX_COMPRESSED_RGBA_ASTC_10x8_KHR = 0x93BA,
+		KTX_COMPRESSED_RGBA_ASTC_10x10_KHR = 0x93BB,
+		KTX_COMPRESSED_RGBA_ASTC_12x10_KHR = 0x93BC,
+		KTX_COMPRESSED_RGBA_ASTC_12x12_KHR = 0x93BD,
+
 		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_4x4_KHR = 0x93D0,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x4_KHR = 0x93D1,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_5x5_KHR = 0x93D2,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x5_KHR = 0x93D3,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_6x6_KHR = 0x93D4,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x5_KHR = 0x93D5,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x6_KHR = 0x93D6,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_8x8_KHR = 0x93D7,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x5_KHR = 0x93D8,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x6_KHR = 0x93D9,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x8_KHR = 0x93DA,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_10x10_KHR = 0x93DB,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x10_KHR = 0x93DC,
+		KTX_COMPRESSED_SRGB8_ALPHA8_ASTC_12x12_KHR = 0x93DD,
+
 		KTX_COMPRESSED_RGBA_UASTC_4x4_KHR = 0x94CC, // TODO - Use proper value!
+
 		KTX_ATC_RGB_AMD = 0x8C92,
 		KTX_ATC_RGBA_INTERPOLATED_ALPHA_AMD = 0x87EE,
+
 		KTX_COMPRESSED_RGB_FXT1_3DFX = 0x86B0,
 		KTX_COMPRESSED_RGBA_FXT1_3DFX = 0x86B1,
 		KTX_COMPRESSED_RGBA_PVRTC_4BPPV2_IMG = 0x9138,
@ -1836,6 +1898,13 @@ namespace basisu
 			base_internal_fmt = KTX_RGBA;
 			break;
 		}
+		case texture_format::cASTC_HDR_6x6:
+		{
+			internal_fmt = KTX_COMPRESSED_RGBA_ASTC_6x6_KHR;
+			// TODO: should we write RGB? We don't support generating HDR 6x6 with alpha.
+			base_internal_fmt = KTX_RGBA; 
+			break;
+		}
 		// We use different enums for HDR vs. LDR ASTC, but internally they are both just ASTC.
 		case texture_format::cASTC_LDR_4x4:
 		case texture_format::cASTC_HDR_4x4:
--- a/thirdparty/basis_universal/encoder/basisu_math.h
+++ b/thirdparty/basis_universal/encoder/basisu_math.h
--- a/thirdparty/basis_universal/encoder/basisu_opencl.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.cpp
@ -789,7 +789,7 @@ namespace basisu

 	struct opencl_context
 	{
-		uint32_t m_ocl_total_pixel_blocks;
+		size_t m_ocl_total_pixel_blocks;
 		cl_mem m_ocl_pixel_blocks;

 		cl_command_queue m_command_queue;
@ -907,7 +907,7 @@ namespace basisu
 	};
 #pragma pack(pop)

-	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, uint32_t total_blocks, const cl_pixel_block* pPixel_blocks)
+	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
 	{
 		if (!opencl_is_available())
 			return false;
@ -939,8 +939,10 @@ namespace basisu
 		if (!pContext->m_ocl_pixel_blocks)
 			return false;

+		assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
+				
 		cl_encode_etc1s_param_struct ps;
-		ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks;
+		ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
 		ps.m_perceptual = perceptual;
 		ps.m_total_perms = total_perms;

@ -1063,8 +1065,10 @@ exit:
 		if (!pContext->m_ocl_pixel_blocks)
 			return false;

+		assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
+				
 		cl_rec_param_struct ps;
-		ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks;
+		ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
 		ps.m_perceptual = perceptual;

 		bool status = false;
@ -1118,8 +1122,10 @@ exit:
 		if (!pContext->m_ocl_pixel_blocks)
 			return false;

+		assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
+
 		fosc_param_struct ps;
-		ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks;
+		ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
 		ps.m_perceptual = perceptual;
 		
 		bool status = false;
@ -1170,8 +1176,10 @@ exit:
 		if (!pContext->m_ocl_pixel_blocks)
 			return false;

+		assert(pContext->m_ocl_total_pixel_blocks <= INT_MAX);
+
 		ds_param_struct ps;
-		ps.m_total_blocks = pContext->m_ocl_total_pixel_blocks;
+		ps.m_total_blocks = (int)pContext->m_ocl_total_pixel_blocks;
 		ps.m_perceptual = perceptual;

 		bool status = false;
@ -1232,7 +1240,7 @@ namespace basisu
 		BASISU_NOTE_UNUSED(context);
 	}

-	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, uint32_t total_blocks, const cl_pixel_block* pPixel_blocks)
+	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks)
 	{
 		BASISU_NOTE_UNUSED(pContext);
 		BASISU_NOTE_UNUSED(total_blocks);
--- a/thirdparty/basis_universal/encoder/basisu_opencl.h
+++ b/thirdparty/basis_universal/encoder/basisu_opencl.h
@ -43,7 +43,7 @@ namespace basisu
 	// Must match BASISU_ETC1_CLUSTER_FIT_ORDER_TABLE_SIZE
 	const uint32_t OPENCL_ENCODE_ETC1S_MAX_PERMS = 165;

-	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, uint32_t total_blocks, const cl_pixel_block* pPixel_blocks);
+	bool opencl_set_pixel_blocks(opencl_context_ptr pContext, size_t total_blocks, const cl_pixel_block* pPixel_blocks);

 	bool opencl_encode_etc1s_blocks(opencl_context_ptr pContext, etc_block* pOutput_blocks, bool perceptual, uint32_t total_perms);

--- a/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_resample_filters.cpp
@ -20,8 +20,7 @@

 namespace basisu
 {
-#define BOX_FILTER_SUPPORT (0.5f)
-	static float box_filter(float t) /* pulse/Fourier window */
+	float box_filter(float t) /* pulse/Fourier window */
 	{
 		// make_clist() calls the filter function with t inverted (pos = left, neg = right)
 		if ((t >= -0.5f) && (t < 0.5f))
@ -30,8 +29,7 @@ namespace basisu
 			return 0.0f;
 	}
 		
-#define TENT_FILTER_SUPPORT (1.0f)
-	static float tent_filter(float t) /* box (*) box, bilinear/triangle */
+	float tent_filter(float t) /* box (*) box, bilinear/triangle */
 	{
 		if (t < 0.0f)
 			t = -t;
@ -42,8 +40,7 @@ namespace basisu
 			return 0.0f;
 	}

-#define BELL_SUPPORT (1.5f)
-	static float bell_filter(float t) /* box (*) box (*) box */
+	float bell_filter(float t) /* box (*) box (*) box */
 	{
 		if (t < 0.0f)
 			t = -t;
@ -201,13 +198,12 @@ namespace basisu
 			return (0.0f);
 	}

-#define GAUSSIAN_SUPPORT (1.25f)
-	static float gaussian_filter(float t) // with blackman window
+	float gaussian_filter(float t) // with blackman window
 	{
 		if (t < 0)
 			t = -t;
-		if (t < GAUSSIAN_SUPPORT)
-			return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / GAUSSIAN_SUPPORT));
+		if (t < BASISU_GAUSSIAN_FILTER_SUPPORT)
+			return clean(exp(-2.0f * t * t) * sqrt(2.0f / M_PI) * blackman_exact_window(t / BASISU_GAUSSIAN_FILTER_SUPPORT));
 		else
 			return 0.0f;
 	}
@ -310,9 +306,9 @@ namespace basisu

 	const resample_filter g_resample_filters[] =
 	{
-		{ "box", box_filter, BOX_FILTER_SUPPORT }, 
-		{ "tent", tent_filter, TENT_FILTER_SUPPORT }, 
-		{ "bell", bell_filter, BELL_SUPPORT }, 
+		{ "box", box_filter, BASISU_BOX_FILTER_SUPPORT },
+		{ "tent", tent_filter, BASISU_TENT_FILTER_SUPPORT }, 
+		{ "bell", bell_filter, BASISU_BELL_FILTER_SUPPORT }, 
 		{ "b-spline", B_spline_filter, B_SPLINE_SUPPORT },
 		{ "mitchell", mitchell_filter, MITCHELL_SUPPORT }, 
 		{ "blackman", blackman_filter, BLACKMAN_SUPPORT }, 
@ -321,7 +317,7 @@ namespace basisu
 		{ "lanczos6", lanczos6_filter, LANCZOS6_SUPPORT }, 
 		{ "lanczos12", lanczos12_filter, LANCZOS12_SUPPORT }, 
 		{ "kaiser", kaiser_filter, KAISER_SUPPORT }, 
-		{ "gaussian", gaussian_filter, GAUSSIAN_SUPPORT },
+		{ "gaussian", gaussian_filter, BASISU_GAUSSIAN_FILTER_SUPPORT },
 		{ "catmullrom", catmull_rom_filter, CATMULL_ROM_SUPPORT }, 
 		{ "quadratic_interp", quadratic_interp_filter, QUADRATIC_SUPPORT }, 
 		{ "quadratic_approx", quadratic_approx_filter, QUADRATIC_SUPPORT }, 
--- a/thirdparty/basis_universal/encoder/basisu_resampler.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler.h
@ -113,6 +113,8 @@ namespace basisu
 			Resample_Real filter_scale,
 			Resample_Real src_ofs);

+		static void free_clist(Contrib_List* p) { if (p) { free(p->p); free(p); } }
+
 	private:
 		Resampler();
 		Resampler(const Resampler &o);
--- a/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
+++ b/thirdparty/basis_universal/encoder/basisu_resampler_filters.h
@ -30,6 +30,18 @@ namespace basisu
 	extern const resample_filter g_resample_filters[];
 	extern const int g_num_resample_filters;
 		
+	const float BASISU_BOX_FILTER_SUPPORT = 0.5f;
+	float box_filter(float t); /* pulse/Fourier window */
+
+	const float BASISU_TENT_FILTER_SUPPORT = 1.0f;
+	float tent_filter(float t); /* box (*) box, bilinear/triangle */
+
+	const float BASISU_GAUSSIAN_FILTER_SUPPORT = 1.25f;
+	float gaussian_filter(float t); // with blackman window
+
+	const float BASISU_BELL_FILTER_SUPPORT = 1.5f;
+	float bell_filter(float t); /* box (*) box (*) box */
+
 	int find_resample_filter(const char *pName);

 } // namespace basisu
--- a/thirdparty/basis_universal/encoder/basisu_ssim.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_ssim.cpp
@ -91,6 +91,8 @@ namespace basisu

 	void gaussian_filter(imagef &dst, const imagef &orig_img, uint32_t odd_filter_width, float sigma_sqr, bool wrapping, uint32_t width_divisor, uint32_t height_divisor)
 	{
+		assert(&dst != &orig_img);
+
 		assert(odd_filter_width && (odd_filter_width & 1));
 		odd_filter_width |= 1;

--- a/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_enc.cpp
@ -3814,31 +3814,6 @@ namespace basisu
 		}
 	};
 				
-	class tracked_stat
-	{
-	public:
-		tracked_stat() { clear(); }
-
-		void clear() { m_num = 0; m_total = 0; m_total2 = 0; }
-
-		void update(uint32_t val) { m_num++; m_total += val; m_total2 += val * val; }
-
-		tracked_stat& operator += (uint32_t val) { update(val); return *this; }
-
-		uint32_t get_number_of_values() { return m_num; }
-		uint64_t get_total() const { return m_total; }
-		uint64_t get_total2() const { return m_total2; }
-
-		float get_average() const { return m_num ? (float)m_total / m_num : 0.0f; };
-		float get_std_dev() const { return m_num ? sqrtf((float)(m_num * m_total2 - m_total * m_total)) / m_num : 0.0f; }
-		float get_variance() const { float s = get_std_dev(); return s * s; }
-
-	private:
-		uint32_t m_num;
-		uint64_t m_total;
-		uint64_t m_total2;
-	};
-		
 	static bool uastc_rdo_blocks(uint32_t first_index, uint32_t last_index, basist::uastc_block* pBlocks, const color_rgba* pBlock_pixels, const uastc_rdo_params& params, uint32_t flags, 
 		uint32_t &total_skipped, uint32_t &total_refined, uint32_t &total_modified, uint32_t &total_smooth)
 	{
@ -4150,9 +4125,7 @@ namespace basisu
 				const uint32_t first_index = block_index_iter;
 				const uint32_t last_index = minimum<uint32_t>(num_blocks, block_index_iter + blocks_per_job);

-#ifndef __EMSCRIPTEN__
 				pJob_pool->add_job([first_index, last_index, pBlocks, pBlock_pixels, &params, flags, &total_skipped, &total_modified, &total_refined, &total_smooth, &all_succeeded, &stat_mutex] {
-#endif

 					uint32_t job_skipped = 0, job_modified = 0, job_refined = 0, job_smooth = 0;

@ -4168,16 +4141,12 @@ namespace basisu
 						total_smooth += job_smooth;
 					}

-#ifndef __EMSCRIPTEN__
 					}
 				);
-#endif

 			} // block_index_iter

-#ifndef __EMSCRIPTEN__
 			pJob_pool->wait_for_all();
-#endif

 			status = all_succeeded;
 		}
--- a/thirdparty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.cpp
--- a/thirdparty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_uastc_hdr_4x4_enc.h
@ -1,29 +1,20 @@
-// basisu_astc_hdr_enc.h
+// basisu_uastc_hdr_4x4_enc.h
 #pragma once
 #include "basisu_enc.h"
 #include "basisu_gpu_texture.h"
 #include "../transcoder/basisu_astc_helpers.h"
 #include "../transcoder/basisu_astc_hdr_core.h"
+#include "basisu_astc_hdr_common.h"

 namespace basisu
 {
-	// This MUST be called before encoding any blocks.
-	void astc_hdr_enc_init();
-
-	const uint32_t MODE11_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
-	const uint32_t MODE7_PART1_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART1_LAST_ISE_RANGE = astc_helpers::BISE_16_LEVELS;
-	const uint32_t MODE7_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE7_PART2_LAST_ISE_RANGE = astc_helpers::BISE_8_LEVELS;
-	const uint32_t MODE11_PART2_FIRST_ISE_RANGE = astc_helpers::BISE_3_LEVELS, MODE11_PART2_LAST_ISE_RANGE = astc_helpers::BISE_4_LEVELS;
-	const uint32_t MODE11_TOTAL_SUBMODES = 8; // plus an extra hidden submode, directly encoded, for direct, so really 9 (see tables 99/100 of the ASTC spec)
-	const uint32_t MODE7_TOTAL_SUBMODES = 6;
-		
-	struct astc_hdr_codec_options
+	struct uastc_hdr_4x4_codec_options : astc_hdr_codec_base_options
 	{
 		float m_bc6h_err_weight;

 		bool m_use_solid;

-		bool m_use_mode11;
+		bool m_use_mode11_part1;
 		bool m_mode11_uber_mode;
 		uint32_t m_first_mode11_weight_ise_range;
 		uint32_t m_last_mode11_weight_ise_range;
@ -45,8 +36,6 @@ namespace basisu
 		uint32_t m_first_mode11_part2_weight_ise_range;
 		uint32_t m_last_mode11_part2_weight_ise_range;

-		float m_r_err_scale, m_g_err_scale;
-
 		bool m_refine_weights;

 		uint32_t m_level;
@ -54,10 +43,7 @@ namespace basisu
 		bool m_use_estimated_partitions;
 		uint32_t m_max_estimated_partitions;

-		// If true, the ASTC HDR compressor is allowed to more aggressively vary weight indices for slightly higher compression in non-fastest mode. This will hurt BC6H quality, however.
-		bool m_allow_uber_mode;
-
-		astc_hdr_codec_options();
+		uastc_hdr_4x4_codec_options();

 		void init();

@ -73,7 +59,7 @@ namespace basisu
 		void set_quality_fastest();
 	};

-	struct astc_hdr_pack_results
+	struct astc_hdr_4x4_pack_results
 	{
 		double m_best_block_error;
 		double m_bc6h_block_error; // note this is not used/set by the encoder, here for convienance 
@ -119,35 +105,6 @@ namespace basisu
 		}
 	};
 			
-	void interpolate_qlog12_colors(
-		const int e[2][3],
-		basist::half_float* pDecoded_half,
-		vec3F* pDecoded_float,
-		uint32_t n, uint32_t ise_weight_range);
-		
-	bool get_astc_hdr_mode_11_block_colors(
-		const uint8_t* pEndpoints,
-		basist::half_float* pDecoded_half,
-		vec3F* pDecoded_float,
-		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
-		
-	bool get_astc_hdr_mode_7_block_colors(
-		const uint8_t* pEndpoints,
-		basist::half_float* pDecoded_half,
-		vec3F* pDecoded_float,
-		uint32_t n, uint32_t ise_weight_range, uint32_t ise_endpoint_range);
-
-	double eval_selectors(
-		uint32_t num_pixels,
-		uint8_t* pWeights,
-		const basist::half_float* pBlock_pixels_half,
-		uint32_t num_weight_levels,
-		const basist::half_float* pDecoded_half,
-		const astc_hdr_codec_options& coptions,
-		uint32_t usable_selector_bitmask = UINT32_MAX);
-
-	double compute_block_error(const basist::half_float* pOrig_block, const basist::half_float* pPacked_block, const astc_hdr_codec_options& coptions);
-
 	// Encodes a 4x4 ASTC HDR block given a 4x4 array of source block pixels/texels.
 	// Supports solid color blocks, mode 11 (all submodes), mode 7/1 partition (all submodes), 
 	// and mode 7/2 partitions (all submodes) - 30 patterns, only the ones also in common with the BC6H format.
@ -164,16 +121,16 @@ namespace basisu
 	// astc_hdr_enc_init() MUST have been called first to initialized the codec.
 	// Input pixels are checked and cannot be NaN's, Inf's, signed, or too large (greater than MAX_HALF_FLOAT, or 65504). 
 	// Normal values and denormals are okay.
-	bool astc_hdr_enc_block(
-		const float* pRGBPixels,
-		const astc_hdr_codec_options& coptions,
-		basisu::vector<astc_hdr_pack_results> &all_results);
+	bool astc_hdr_4x4_enc_block(
+		const float* pRGBPixels, const basist::half_float *pRGBPixelsHalf,
+		const uastc_hdr_4x4_codec_options& coptions,
+		basisu::vector<astc_hdr_4x4_pack_results> &all_results);

-	bool astc_hdr_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_pack_results& results);
+	bool astc_hdr_4x4_pack_results_to_block(basist::astc_blk& dst_blk, const astc_hdr_4x4_pack_results& results);
 		
-	bool astc_hdr_refine_weights(const basist::half_float* pSource_block, astc_hdr_pack_results& cur_results, const astc_hdr_codec_options& coptions, float bc6h_weight, bool* pImproved_flag);
+	bool astc_hdr_4x4_refine_weights(const basist::half_float* pSource_block, astc_hdr_4x4_pack_results& cur_results, const uastc_hdr_4x4_codec_options& coptions, float bc6h_weight, bool* pImproved_flag);

-	struct astc_hdr_block_stats
+	struct astc_hdr_4x4_block_stats
 	{
 		std::mutex m_mutex;

@ -195,7 +152,7 @@ namespace basisu

 		uint32_t m_total_refined;
 								
-		astc_hdr_block_stats() { clear(); }
+		astc_hdr_4x4_block_stats() { clear(); }

 		void clear()
 		{
@ -215,7 +172,7 @@ namespace basisu
 			clear_obj(m_part_hist);
 		}

-		void update(const astc_hdr_pack_results& log_blk);
+		void update(const astc_hdr_4x4_pack_results& log_blk);
 		
 		void print();
 	};
--- a/thirdparty/basis_universal/patches/0001-external-zstd-pr344.patch
+++ b/thirdparty/basis_universal/patches/0001-external-zstd-pr344.patch
@ -1,5 +1,5 @@
 diff --git a/thirdparty/basis_universal/encoder/basisu_comp.cpp b/thirdparty/basis_universal/encoder/basisu_comp.cpp
-index f16e75bd46..81813257cd 100644
+index 59a2a50900..e9aa20f313 100644
 --- a/thirdparty/basis_universal/encoder/basisu_comp.cpp
 +++ b/thirdparty/basis_universal/encoder/basisu_comp.cpp
@@ -33,7 +33,7 @@
@ -12,10 +12,10 @@ index f16e75bd46..81813257cd 100644
 
 // Set to 1 to disable the mipPadding alignment workaround (which only seems to be needed when no key-values are written at all)
 diff --git a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
-index ea994b0c4f..32018cd282 100644
+index 0f7ca1565f..d7bce42013 100644
 --- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
 +++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
-@@ -164,7 +164,7 @@
+@@ -169,7 +169,7 @@
    // If BASISD_SUPPORT_KTX2_ZSTD is 0, UASTC files compressed with Zstd cannot be loaded.
 	#if BASISD_SUPPORT_KTX2_ZSTD
 		// We only use two Zstd API's: ZSTD_decompress() and ZSTD_isError()
--- a/thirdparty/basis_universal/patches/0002-external-jpgd.patch
+++ b/thirdparty/basis_universal/patches/0002-external-jpgd.patch
@ -1,8 +1,8 @@
 diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-index 47e8981bc3..6c0ac0ad37 100644
+index b9804090b1..5987685ae7 100644
 --- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
 +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-@@ -458,7 +458,7 @@ namespace basisu
+@@ -492,7 +492,7 @@ namespace basisu
 	bool load_jpg(const char *pFilename, image& img)
 	{
 		int width = 0, height = 0, actual_comps = 0;
@ -11,3 +11,12 @@ index 47e8981bc3..6c0ac0ad37 100644
 		if (!pImage_data)
 			return false;
 		
+@@ -512,7 +512,7 @@ namespace basisu
+ 		}
+ 
+ 		int width = 0, height = 0, actual_comps = 0;
+-		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
+		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
+ 		if (!pImage_data)
+ 			return false;
+ 
--- a/thirdparty/basis_universal/patches/0003-external-tinyexr.patch
+++ b/thirdparty/basis_universal/patches/0003-external-tinyexr.patch
@ -1,8 +1,8 @@
 diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-index 6c0ac0ad37..2bf486a028 100644
+index 7904aab91c..4d885cba16 100644
 --- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
 +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-@@ -27,7 +27,7 @@
+@@ -29,7 +29,7 @@
 #ifndef TINYEXR_USE_ZFP
 #define TINYEXR_USE_ZFP (1)
 #endif
@ -11,7 +11,7 @@ index 6c0ac0ad37..2bf486a028 100644
 
 #ifndef MINIZ_HEADER_FILE_ONLY
 #define MINIZ_HEADER_FILE_ONLY
-@@ -3257,7 +3257,8 @@ namespace basisu
+@@ -3420,7 +3420,8 @@ namespace basisu
 		float* out_rgba = nullptr;
 		const char* err = nullptr;
 		
--- a/thirdparty/basis_universal/patches/0004-remove-tinydds-qoi.patch
+++ b/thirdparty/basis_universal/patches/0004-remove-tinydds-qoi.patch
@ -1,8 +1,8 @@
 diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-index 2bf486a028..fff98e8301 100644
+index 4d885cba16..6c2cf0260e 100644
 --- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
 +++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-@@ -37,9 +37,6 @@
+@@ -39,9 +39,6 @@
 #endif
 #include "basisu_miniz.h"
 
@ -12,7 +12,7 @@ index 2bf486a028..fff98e8301 100644
 #if defined(_WIN32)
 // For QueryPerformanceCounter/QueryPerformanceFrequency
 #define WIN32_LEAN_AND_MEAN
-@@ -408,16 +405,7 @@ namespace basisu
+@@ -453,16 +450,7 @@ namespace basisu
 
 	bool load_qoi(const char* pFilename, image& img)
 	{
@ -31,7 +31,7 @@ index 2bf486a028..fff98e8301 100644
 
 	bool load_png(const uint8_t *pBuf, size_t buf_size, image &img, const char *pFilename)
 diff --git a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
-index 000869a533..648cfb47ae 100644
+index 339218fcf2..028ac3f314 100644
 --- a/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
 +++ b/thirdparty/basis_universal/encoder/basisu_gpu_texture.cpp
@@ -19,9 +19,6 @@
@ -41,10 +41,10 @@ index 000869a533..648cfb47ae 100644
 -#define TINYDDS_IMPLEMENTATION
 -#include "3rdparty/tinydds.h"
 -
+ #define BASISU_USE_GOOGLE_ASTC_DECODER (1)
+ 
 namespace basisu
- {
- 	//------------------------------------------------------------------------------------------------
-@@ -1980,207 +1977,7 @@ namespace basisu
+@@ -2049,207 +2046,7 @@ namespace basisu
 	// and cubemap, cubemap mipmapped, and cubemap array mipmapped.
 	bool write_dds_file(uint8_vec &dds_data, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
 	{
@ -63,11 +63,11 @@ index 000869a533..648cfb47ae 100644
 -				assert(0);
 -				return false;
 -			}
-			slices = gpu_images.size() / 6;
+-			slices = gpu_images.size_u32() / 6;
 -		}
 -		else
 -		{
-			slices = gpu_images.size();
+-			slices = gpu_images.size_u32();
 -		}
 -
 -		uint32_t width = 0, height = 0, total_levels = 0;
@ -185,7 +185,7 @@ index 000869a533..648cfb47ae 100644
 -		assert(total_levels < 32);
 -		for (uint32_t i = 0; i < total_levels; i++)
 -		{
-			mipmap_sizes[i] = mipmaps[i].size_in_bytes();
+-			mipmap_sizes[i] = mipmaps[i].size_in_bytes_u32();
 -			mipmap_ptrs[i] = mipmaps[i].get_ptr();
 -		}
 -
@ -253,7 +253,7 @@ index 000869a533..648cfb47ae 100644
 	}
 
 	bool write_dds_file(const char* pFilename, const basisu::vector<gpu_image_vec>& gpu_images, bool cubemap_flag, bool use_srgb_format)
-@@ -2201,188 +1998,6 @@ namespace basisu
+@@ -2270,188 +2067,6 @@ namespace basisu
 		
 	bool read_uncompressed_dds_file(const char* pFilename, basisu::vector<image> &ldr_mips,	basisu::vector<imagef>& hdr_mips)
 	{
--- a/thirdparty/basis_universal/patches/0005-windows-illegal-character.patch
+++ b/thirdparty/basis_universal/patches/0005-windows-illegal-character.patch
@ -0,0 +1,13 @@
+diff --git a/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h b/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h
+index 0d6d2ae936..8b82ad8c29 100644
+--- a/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h
+++ b/thirdparty/basis_universal/encoder/basisu_astc_hdr_6x6_enc.h
+@@ -16,7 +16,7 @@ namespace astc_6x6_hdr
+ 	{
+ 		// Important: The Delta ITP colorspace error metric we use internally makes several assumptions about the nature of the HDR RGB inputs supplied to the encoder.
+ 		// This encoder computes colorspace error in the ICtCp (or more accurately the delta ITP, where CT is scaled by .5 vs. ICtCp to become T) colorspace, so getting this correct is important.
+-		// By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m▓), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light).
+		// By default the encoder assumes the input is in absolute luminance (in nits or candela per square meter, cd/m^2), specified as positive-only linear light RGB, using the REC 709 colorspace gamut (but NOT the sRGB transfer function, i.e. linear light).
+ 		// If the m_rec2020_bt2100_color_gamut flag is true, the input colorspace is treated as REC 2020/BT.2100 (which is wider than 709).
+ 		// For SDR/LDR->HDR upconversion, the REC 709 sRGB input should be converted to linear light (sRGB->linear) and the resulting normalized linear RGB values scaled by either 80 or 100 nits (the luminance of a typical SDR monitor). 
+ 		// SDR upconversion to normalized [0,1] (i.e. non-absolute) luminances may work but is not supported because ITP errors will not be predicted correctly.
--- a/thirdparty/basis_universal/patches/0006-ambiguous-calls.patch
+++ b/thirdparty/basis_universal/patches/0006-ambiguous-calls.patch
@ -0,0 +1,22 @@
+diff --git a/thirdparty/basis_universal/transcoder/basisu_containers.h b/thirdparty/basis_universal/transcoder/basisu_containers.h
+index 03fae33974..7fff4c243e 100644
+--- a/thirdparty/basis_universal/transcoder/basisu_containers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers.h
+@@ -3349,7 +3349,7 @@ namespace basisu
+ 
+ 		inline size_t hash_key(const Key& k) const
+ 		{
+-			assert((safe_shift_left(1ULL, (SIZE_T_BITS - m_hash_shift))) == m_values.size());
+			assert((safe_shift_left(static_cast<uint64_t>(1), (SIZE_T_BITS - m_hash_shift))) == m_values.size());
+ 
+ 			// Fibonacci hashing
+ 			if (SIZE_T_BITS == 32)
+@@ -3433,7 +3433,7 @@ namespace basisu
+ 				return false;
+ 
+ 			new_map.m_hash_shift = SIZE_T_BITS - helpers::floor_log2i((uint64_t)new_hash_size);
+-			assert(new_hash_size == safe_shift_left(1ULL, SIZE_T_BITS - new_map.m_hash_shift));
+			assert(new_hash_size == safe_shift_left(static_cast<uint64_t>(1), SIZE_T_BITS - new_map.m_hash_shift));
+ 
+ 			new_map.m_grow_threshold = std::numeric_limits<size_t>::max();
+ 
--- a/thirdparty/basis_universal/transcoder/basisu.h
+++ b/thirdparty/basis_universal/transcoder/basisu.h
@ -21,33 +21,6 @@
 	#pragma warning (disable : 4127) // warning C4127: conditional expression is constant
 	#pragma warning (disable : 4530) // C++ exception handler used, but unwind semantics are not enabled.
 	
-	// Slamming this off always for v1.16 because we've gotten rid of most std containers.
-	#ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL
-		#define BASISU_NO_ITERATOR_DEBUG_LEVEL (1)
-	#endif
-
-	#ifndef BASISU_NO_ITERATOR_DEBUG_LEVEL
-		//#define _HAS_ITERATOR_DEBUGGING 0
-
-		#if defined(_DEBUG) || defined(DEBUG)
-			// This is madness, but we need to disable iterator debugging in debug builds or the encoder is unsable because MSVC's iterator debugging implementation is totally broken.
-			#ifndef _ITERATOR_DEBUG_LEVEL
-			#define _ITERATOR_DEBUG_LEVEL 1
-			#endif
-			#ifndef _SECURE_SCL
-			#define _SECURE_SCL 1
-			#endif
-		#else // defined(_DEBUG) || defined(DEBUG)
-			#ifndef _SECURE_SCL
-			#define _SECURE_SCL 0
-			#endif
-			#ifndef _ITERATOR_DEBUG_LEVEL
-			#define _ITERATOR_DEBUG_LEVEL 0
-			#endif
-		#endif // defined(_DEBUG) || defined(DEBUG)
-
-	#endif // BASISU_NO_ITERATOR_DEBUG_LEVEL
-
 #endif // _MSC_VER

 #include <stdlib.h>
@ -66,6 +39,7 @@
 #include <type_traits>
 #include <assert.h>
 #include <random>
+#include <inttypes.h>

 #include "basisu_containers.h"

@ -114,6 +88,7 @@ namespace basisu
 	typedef basisu::vector<int16_t> int16_vec;
 	typedef basisu::vector<uint16_t> uint16_vec;
 	typedef basisu::vector<uint32_t> uint_vec;
+	typedef basisu::vector<size_t> size_t_vec;
 	typedef basisu::vector<uint64_t> uint64_vec;
 	typedef basisu::vector<int> int_vec;
 	typedef basisu::vector<bool> bool_vec;
@ -121,6 +96,16 @@ namespace basisu

 	void enable_debug_printf(bool enabled);
 	void debug_printf(const char *pFmt, ...);
+	void debug_puts(const char* p);
+
+	template <typename... Args>
+	inline void fmt_debug_printf(const char* pFmt, Args&&... args)
+	{
+		std::string res;
+		if (!fmt_variants(res, pFmt, fmt_variant_vec{ fmt_variant(std::forward<Args>(args))... }))
+			return;
+		debug_puts(res.c_str());
+	}

 #ifndef __EMSCRIPTEN__
 #ifdef __GNUC__
@ -137,16 +122,13 @@ namespace basisu
 #endif                            
 #endif

+	constexpr double cPiD = 3.14159265358979323846264338327950288;
+	constexpr float REALLY_SMALL_FLOAT_VAL = .000000125f;
+	constexpr float SMALL_FLOAT_VAL = .0000125f;
+	constexpr float BIG_FLOAT_VAL = 1e+30f;
+
 	template <typename T0, typename T1> inline T0 lerp(T0 a, T0 b, T1 c) { return a + (b - a) * c; }
 		
-	template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
-	template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
-	template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
-	
-	template <typename S> inline S minimum(S a, S b) {	return (a < b) ? a : b; }
-	template <typename S> inline S minimum(S a, S b, S c) {	return minimum(minimum(a, b), c); }
-	template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
-
 	inline float clampf(float value, float low, float high) { if (value < low) value = low; else if (value > high) value = high;	return value; }
 	inline float saturate(float value) { return clampf(value, 0, 1.0f); }
 	inline uint8_t minimumub(uint8_t a, uint8_t b) { return (a < b) ? a : b; }
@ -159,9 +141,30 @@ namespace basisu
 	inline float maximumf(float a, float b) { return (a > b) ? a : b; }
 	inline int squarei(int i) { return i * i; }
 	inline float squaref(float i) { return i * i; }
+	inline double squared(double i) { return i * i; }
 	template<typename T> inline T square(T a) { return a * a; }
+	template<typename T> inline T sign(T a) { return (a < 0) ? (T)-1 : ((a == 0) ? (T)0 : (T)1); }
 		
-	template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
+	inline bool equal_tol(float a, float b, float t) { return fabsf(a - b) <= ((maximum(fabsf(a), fabsf(b)) + 1.0f) * t); }
+	inline bool equal_tol(double a, double b, double t) { return fabs(a - b) <= ((maximum(fabs(a), fabs(b)) + 1.0f) * t); }
+
+	template <class T>
+	inline T prev_wrap(T i, T n)
+	{
+		T temp = i - 1;
+		if (temp < 0)
+			temp = n - 1;
+		return temp;
+	}
+
+	template <class T>
+	inline T next_wrap(T i, T n)
+	{
+		T temp = i + 1;
+		if (temp >= n)
+			temp = 0;
+		return temp;
+	}
 		
 	inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i);	}
 	inline uint64_t iabs64(int64_t i) {	return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
@ -356,6 +359,7 @@ namespace basisu
 			return *this;
 		}

+#if 0
 #ifdef __GNUC__
 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Warray-bounds"            
@ -414,6 +418,57 @@ namespace basisu
 #ifdef __GNUC__
 #pragma GCC diagnostic pop
 #endif
+#else
+		inline operator uint32_t() const
+		{
+			if constexpr (NumBytes == 1)
+			{
+				return m_bytes[0];
+			}
+			else if constexpr (NumBytes == 2)
+			{
+				return (m_bytes[1] << 8U) | m_bytes[0];
+			}
+			else if constexpr (NumBytes == 3)
+			{
+				return (m_bytes[2] << 16U) | (m_bytes[1] << 8U) | m_bytes[0];
+			}
+			else if constexpr (NumBytes == 4)
+			{
+				return read_le_dword(m_bytes);
+			}
+			else if constexpr (NumBytes == 5)
+			{
+				uint32_t l = read_le_dword(m_bytes);
+				uint32_t h = m_bytes[4];
+				return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+			}
+			else if constexpr (NumBytes == 6)
+			{
+				uint32_t l = read_le_dword(m_bytes);
+				uint32_t h = (m_bytes[5] << 8U) | m_bytes[4];
+				return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+			}
+			else if constexpr (NumBytes == 7)
+			{
+				uint32_t l = read_le_dword(m_bytes);
+				uint32_t h = (m_bytes[6] << 16U) | (m_bytes[5] << 8U) | m_bytes[4];
+				return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+			}
+			else if constexpr (NumBytes == 8)
+			{
+				uint32_t l = read_le_dword(m_bytes);
+				uint32_t h = read_le_dword(m_bytes + 4);
+				return static_cast<uint64_t>(l) | (static_cast<uint64_t>(h) << 32U);
+			}
+			else
+			{
+				static_assert(NumBytes <= 8, "Invalid NumBytes");
+				return 0;
+			}
+		}
+		#endif
+
 	};

 	enum eZero { cZero };
@ -446,7 +501,7 @@ namespace basisu
 	static const uint8_t g_huffman_sorted_codelength_codes[] = { cHuffmanSmallZeroRunCode, cHuffmanBigZeroRunCode,	cHuffmanSmallRepeatCode, cHuffmanBigRepeatCode, 0, 8, 7, 9, 6, 0xA, 5, 0xB, 4, 0xC, 3, 0xD, 2, 0xE, 1, 0xF, 0x10 };
 	const uint32_t cHuffmanTotalSortedCodelengthCodes = sizeof(g_huffman_sorted_codelength_codes) / sizeof(g_huffman_sorted_codelength_codes[0]);

-	// GPU texture formats
+	// GPU texture formats and various uncompressed texture formats.

 	enum class texture_format
 	{
@ -466,7 +521,8 @@ namespace basisu
 		cBC6HUnsigned,		// HDR
 		cBC7,
 		cASTC_LDR_4x4,		// ASTC 4x4 LDR only
-		cASTC_HDR_4x4,		// ASTC 4x4 HDR only (but may use LDR ASTC blocks internally)
+		cASTC_HDR_4x4,		// ASTC 4x4 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this)
+		cASTC_HDR_6x6,		// ASTC 6x6 HDR only (but may use LDR ASTC blocks internally, although our encoders don't do this)
 		cPVRTC1_4_RGB,
 		cPVRTC1_4_RGBA,
 		cATC_RGB,
@ -491,8 +547,33 @@ namespace basisu
 		cRGB_9E5
 	};

+	inline bool is_uncompressed_texture_format(texture_format fmt)
+	{
+		switch (fmt)
+		{
+		case texture_format::cRGBA32:
+		case texture_format::cRGB565:
+		case texture_format::cBGR565:
+		case texture_format::cRGBA4444:
+		case texture_format::cABGR4444:
+		case texture_format::cRGBA_HALF:
+		case texture_format::cRGB_HALF:
+		case texture_format::cRGB_9E5:
+			return true;
+		default:
+			break;
+		}
+
+		return false;
+	}
+
+	inline bool is_block_based_texture_format(texture_format fmt)
+	{
+		return !is_uncompressed_texture_format(fmt);
+	}
+
 	// This is bytes per block for GPU formats, or bytes per texel for uncompressed formats.
-	inline uint32_t get_bytes_per_block(texture_format fmt)
+	inline uint32_t get_bytes_per_block_or_pixel(texture_format fmt)
 	{
 		switch (fmt)
 		{
@ -534,16 +615,22 @@ namespace basisu
 	// This is qwords per block for GPU formats, or not valid for uncompressed formats.
 	inline uint32_t get_qwords_per_block(texture_format fmt)
 	{
-		return get_bytes_per_block(fmt) >> 3;
+		assert(is_block_based_texture_format(fmt));
+
+		const uint32_t bytes_per_block = get_bytes_per_block_or_pixel(fmt);
+		return bytes_per_block >> 3;
 	}

 	inline uint32_t get_block_width(texture_format fmt)
 	{
-		BASISU_NOTE_UNUSED(fmt);
+		assert(is_block_based_texture_format(fmt));
+
 		switch (fmt)
 		{
 		case texture_format::cFXT1_RGB:
 			return 8;
+		case texture_format::cASTC_HDR_6x6:
+			return 6;
 		default:
 			break;
 		}
@ -552,20 +639,42 @@ namespace basisu

 	inline uint32_t get_block_height(texture_format fmt)
 	{
-		BASISU_NOTE_UNUSED(fmt);
+		assert(is_block_based_texture_format(fmt));
+
+		switch (fmt)
+		{
+		case texture_format::cASTC_HDR_6x6:
+			return 6;
+		default:
+			break;
+		}
 		return 4;
 	}

 	inline bool is_hdr_texture_format(texture_format fmt)
 	{
-		if (fmt == texture_format::cASTC_HDR_4x4)
-			return true;
-		if (fmt == texture_format::cUASTC_HDR_4x4)
-			return true;
-		if ((fmt == texture_format::cBC6HSigned) || (fmt == texture_format::cBC6HUnsigned))
+		switch (fmt)
+		{
+		case texture_format::cASTC_HDR_4x4:
+		case texture_format::cUASTC_HDR_4x4:
+		case texture_format::cASTC_HDR_6x6:
+		case texture_format::cBC6HSigned:
+		case texture_format::cBC6HUnsigned:
+		case texture_format::cRGBA_HALF:
+		case texture_format::cRGB_HALF:
+		case texture_format::cRGB_9E5:
 			return true;
+		default:
+			break;
+		}
+
 		return false;
 	}

+	inline bool is_ldr_texture_format(texture_format fmt)
+	{
+		return !is_hdr_texture_format(fmt);
+	}
+							
 } // namespace basisu

--- a/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_hdr_core.h
@ -35,40 +35,17 @@ namespace basist
 	const uint32_t MAX_QLOG16 = 63487;
 	const float MAX_QLOG16_VAL = 65504.0f;

+	// TODO: Should be called something like "NUM_MODE11_ENDPOINT_VALUES"
 	const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;

-	// Notes:
-	// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
-	// However, this is not lossless in the general sense.
-	inline half_float qlog16_to_half_slow(uint32_t qlog16)
-	{
-		assert(qlog16 <= 0xFFFF);
-
-		int C = qlog16;
-
-		int E = (C & 0xF800) >> 11;
-		int M = C & 0x7FF;
-
-		int Mt;
-		if (M < 512)
-			Mt = 3 * M;
-		else if (M >= 1536)
-			Mt = 5 * M - 2048;
-		else
-			Mt = 4 * M - 512;
-
-		int Cf = (E << 10) + (Mt >> 3);
-		return (half_float)Cf;
-	}
-
 	// This is not lossless
-	inline half_float qlog_to_half_slow(uint32_t qlog, uint32_t bits)
+	inline half_float qlog_to_half(uint32_t qlog, uint32_t bits)
 	{
 		assert((bits >= 7U) && (bits <= 16U));
 		assert(qlog < (1U << bits));

 		int C = qlog << (16 - bits);
-		return qlog16_to_half_slow(C);
+		return astc_helpers::qlog16_to_half(C);
 	}

 	void astc_hdr_core_init();
@ -99,4 +76,131 @@ namespace basist
 	bool astc_hdr_transcode_to_bc6h(const astc_blk& src_blk, bc6h_block& dst_blk);
 	bool astc_hdr_transcode_to_bc6h(const astc_helpers::log_astc_block& log_blk, bc6h_block& dst_blk);

+	namespace astc_6x6_hdr
+	{
+		const uint32_t MAX_ASTC_HDR_6X6_DIM = 32768;
+		const int32_t REUSE_MAX_BUFFER_ROWS = 5; // 1+-(-4), so we need to buffer 5 rows total
+
+		struct block_mode_desc
+		{
+			bool m_dp;
+			uint32_t m_cem;
+			uint32_t m_num_partitions;
+			uint32_t m_grid_x;
+			uint32_t m_grid_y;
+
+			// the coding ISE ranges (which may not be valid ASTC ranges for this configuration)
+			uint32_t m_endpoint_ise_range;
+			uint32_t m_weight_ise_range;
+
+			// the physical/output ASTC decompression ISE ranges (i.e. what the decompressor must output)
+			uint32_t m_transcode_endpoint_ise_range;
+			uint32_t m_transcode_weight_ise_range;
+
+			uint32_t m_flags;
+			int m_dp_channel;
+		};
+
+		// Lack of level flag indicates level 3+
+		const uint32_t BASIST_HDR_6X6_LEVEL0 = 1;
+		const uint32_t BASIST_HDR_6X6_LEVEL1 = 2;
+		const uint32_t BASIST_HDR_6X6_LEVEL2 = 4;
+
+		const uint32_t TOTAL_BLOCK_MODE_DECS = 75;
+		extern const block_mode_desc g_block_mode_descs[TOTAL_BLOCK_MODE_DECS];
+
+		void copy_weight_grid(bool dual_plane, uint32_t grid_x, uint32_t grid_y, const uint8_t* transcode_weights, astc_helpers::log_astc_block& decomp_blk);
+
+		enum class encoding_type
+		{
+			cInvalid = -1,
+			cRun = 0,
+			cSolid = 1,
+			cReuse = 2,
+			cBlock = 3,
+			cTotal
+		};
+
+		const uint32_t REUSE_XY_DELTA_BITS = 5;
+		const uint32_t NUM_REUSE_XY_DELTAS = 1 << REUSE_XY_DELTA_BITS;
+
+		struct reuse_xy_delta
+		{
+			int8_t m_x, m_y;
+		};
+
+		extern const reuse_xy_delta g_reuse_xy_deltas[NUM_REUSE_XY_DELTAS];
+
+		const uint32_t RUN_CODE = 0b000, RUN_CODE_LEN = 3;
+		const uint32_t SOLID_CODE = 0b100, SOLID_CODE_LEN = 3;
+		const uint32_t REUSE_CODE = 0b10, REUSE_CODE_LEN = 2;
+		const uint32_t BLOCK_CODE = 0b1, BLOCK_CODE_LEN = 1;
+
+		enum class endpoint_mode
+		{
+			cInvalid = -1,
+
+			cRaw = 0,
+			cUseLeft,
+			cUseUpper,
+			cUseLeftDelta,
+			cUseUpperDelta,
+
+			cTotal
+		};
+
+		enum class block_mode
+		{
+			cInvalid = -1,
+
+			cBMTotalModes = TOTAL_BLOCK_MODE_DECS
+		};
+
+		const uint32_t NUM_ENDPOINT_DELTA_BITS = 5;
+
+		const uint32_t NUM_UNIQUE_PARTITIONS2 = 521;
+		extern const uint32_t g_part2_unique_index_to_seed[NUM_UNIQUE_PARTITIONS2];
+
+		const uint32_t NUM_UNIQUE_PARTITIONS3 = 333;
+		extern const uint32_t g_part3_unique_index_to_seed[NUM_UNIQUE_PARTITIONS3];
+
+		bool decode_values(basist::bitwise_decoder& decoder, uint32_t total_values, uint32_t ise_range, uint8_t* pValues);
+
+		void requantize_astc_weights(uint32_t n, const uint8_t* pSrc_ise_vals, uint32_t from_ise_range, uint8_t* pDst_ise_vals, uint32_t to_ise_range);
+
+		void requantize_ise_endpoints(uint32_t cem, uint32_t src_ise_endpoint_range, const uint8_t* pSrc_endpoints, uint32_t dst_ise_endpoint_range, uint8_t* pDst_endpoints);
+
+		const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2 = 2;
+		const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_4 = 4;
+		const uint32_t BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_9 = 9;
+
+		struct fast_bc6h_params
+		{
+			uint32_t m_num_diff_endpoint_modes_to_try;
+			uint32_t m_max_2subset_pats_to_try;
+
+			bool m_hq_ls;
+			bool m_brute_force_weight4_assignment;
+			
+			fast_bc6h_params()
+			{
+				init();
+			}
+
+			void init()
+			{
+				m_hq_ls = true;
+				m_num_diff_endpoint_modes_to_try = BC6H_NUM_DIFF_ENDPOINT_MODES_TO_TRY_2;
+				m_max_2subset_pats_to_try = 1;
+				m_brute_force_weight4_assignment = false;
+			}
+		};
+
+		void fast_encode_bc6h(const basist::half_float* pPixels, basist::bc6h_block* pBlock, const fast_bc6h_params &params);
+
+		bool decode_6x6_hdr(const uint8_t* pComp_data, uint32_t comp_data_size, basisu::vector2D<astc_helpers::astc_block>& decoded_blocks, uint32_t& width, uint32_t& height);
+
+	} // namespace astc_6x6_hdr
+
 } // namespace basist
+
--- a/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
@ -15,6 +15,7 @@ namespace astc_helpers
 	const uint32_t MIN_GRID_DIM = 2; // the minimum dimension of a block's weight grid
 	const uint32_t MIN_BLOCK_DIM = 4, MAX_BLOCK_DIM = 12; // the valid block dimensions in texels
 	const uint32_t MAX_GRID_WEIGHTS = 64; // a block may have a maximum of 64 weight grid values
+	const uint32_t NUM_MODE11_ENDPOINTS = 6, NUM_MODE7_ENDPOINTS = 4;

 	static const uint32_t NUM_ASTC_BLOCK_SIZES = 14;
 	extern const uint8_t g_astc_block_sizes[NUM_ASTC_BLOCK_SIZES][2];
@ -108,25 +109,30 @@ namespace astc_helpers
 		bool m_error_flag;
 		
 		bool m_solid_color_flag_ldr, m_solid_color_flag_hdr;
-		uint16_t m_solid_color[4];
+
+		uint8_t m_user_mode;					// user defined value, not used in this module
 		
 		// Rest is only valid if !m_solid_color_flag_ldr && !m_solid_color_flag_hdr
-		uint32_t m_grid_width, m_grid_height;	// weight grid dimensions, not the dimension of the block
+		uint8_t m_grid_width, m_grid_height;	// weight grid dimensions, not the dimension of the block
 		
 		bool m_dual_plane;

-		uint32_t m_weight_ise_range;			// 0-11
-		uint32_t m_endpoint_ise_range;			// 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking
+		uint8_t m_weight_ise_range;				// 0-11
+		uint8_t m_endpoint_ise_range;			// 4-20, this is actually inferred from the size of the other config bits+weights, but this is here for checking

-		uint32_t m_color_component_selector;	// 0-3, 0=GBA R, 1=RBA G, 2=RGA B, 3=RGB A, only used in dual plane mode
+		uint8_t m_color_component_selector;	// 0-3, controls which channel uses the 2nd (odd) weights, only used in dual plane mode

-		uint32_t m_num_partitions;				// or the # of subsets, 1-4 (1-3 if dual plane mode)
-		uint32_t m_partition_id;				// 10-bits, must be 0 if m_num_partitions==1
+		uint8_t m_num_partitions;				// or the # of subsets, 1-4 (1-3 if dual plane mode)
+		uint16_t m_partition_id;				// 10-bits, must be 0 if m_num_partitions==1
 		
-		uint32_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
+		uint8_t m_color_endpoint_modes[MAX_PARTITIONS]; // each subset's CEM's
 		
+		union
+		{
 			// ISE weight grid values. In dual plane mode, the order is p0,p1,  p0,p1,  etc.
 			uint8_t m_weights[MAX_GRID_WEIGHTS];
+			uint16_t m_solid_color[4];
+		};
 		
 		// ISE endpoint values
 		// Endpoint order examples:
@ -169,7 +175,7 @@ namespace astc_helpers

 	inline int get_ise_sequence_bits(int count, int range)
 	{
-		// See 18.22 Data Size Determination
+		// See 18.22 Data Size Determination - note this will be <= the # of bits actually written by encode_bise(). (It's magic.)
 		int total_bits = g_ise_range_table[range][0] * count;
 		total_bits += (g_ise_range_table[range][1] * 8 * count + 4) / 5;
 		total_bits += (g_ise_range_table[range][2] * 7 * count + 2) / 3;
@ -182,16 +188,26 @@ namespace astc_helpers
 		return (l * (64 - w) + h * w + 32) >> 6;
 	}

-	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range);
+	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats = nullptr);
+
+	struct pack_stats
+	{
+		uint32_t m_header_bits;
+		uint32_t m_endpoint_bits;
+		uint32_t m_weight_bits;
+
+		inline pack_stats() { clear(); }
+		inline void clear() { memset(this, 0, sizeof(*this)); }
+	};

 	// Packs a logical to physical ASTC block. Note this does not validate the block's dimensions (use is_valid_block_size()), just the grid dimensions.
-	bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr);
+	bool pack_astc_block(astc_block &phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range = nullptr, pack_stats *pStats = nullptr);

 	// Pack LDR void extent (really solid color) blocks. For LDR, pass in (val | (val << 8)) for each component.
-	void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a);
+	void pack_void_extent_ldr(astc_block& blk, uint16_t r, uint16_t g, uint16_t b, uint16_t a, pack_stats *pStats = nullptr);

 	// Pack HDR void extent (16-bit values are FP16/half floats - no NaN/Inf's)
-	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah);
+	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats = nullptr);

 	// These helpers are all quite slow, but are useful for table preparation.
 	
@ -300,6 +316,24 @@ namespace astc_helpers
 	extern dequant_tables g_dequant_tables;
 	void init_tables(bool init_rank_tabs);

+	struct weighted_sample
+	{
+		uint8_t m_src_x;
+		uint8_t m_src_y;
+		uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
+	};
+
+	void compute_upsample_weights(
+		int block_width, int block_height,
+		int weight_grid_width, int weight_grid_height,
+		weighted_sample* pWeights); // there will be block_width * block_height bilinear samples
+
+	void upsample_weight_grid(
+		uint32_t bx, uint32_t by,		// destination/to dimension
+		uint32_t wx, uint32_t wy,		// source/from dimension
+		const uint8_t* pSrc_weights,	// these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
+		uint8_t* pDst_weights);			// [by][bx]
+		
 	// Procedurally returns the texel partition/subset index given the block coordinate and config.
 	int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block);
 		
@ -315,6 +349,27 @@ namespace astc_helpers
 	half_float float_to_half(float val, bool toward_zero);
 	float half_to_float(half_float hval);

+	// Notes:
+	// qlog16_to_half(half_to_qlog16(half_val_as_int)) == half_val_as_int (is lossless)
+	// However, this is not lossless in the general sense.
+	inline half_float qlog16_to_half(int k)
+	{
+		assert((k >= 0) && (k <= 0xFFFF));
+
+		int E = (k & 0xF800) >> 11;
+		int M = k & 0x7FF;
+
+		int Mt;
+		if (M < 512)
+			Mt = 3 * M;
+		else if (M >= 1536)
+			Mt = 5 * M - 2048;
+		else
+			Mt = 4 * M - 512;
+
+		return (half_float)((E << 10) + (Mt >> 3));
+	}
+
 	const int MAX_RGB9E5 = 0xff80;
 	void unpack_rgb9e5(uint32_t packed, float& r, float& g, float& b);
 	uint32_t pack_rgb9e5(float r, float g, float b);
@ -437,7 +492,7 @@ namespace astc_helpers
 	};

 	// Encodes 3 values to output, usable for any range that uses quints and bits
-	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+	static inline void astc_encode_quints(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t* pStats)
 	{
 		// First extract the quints and the bits from the 3 input values
 		int quints = 0, bits[3];
@ -461,6 +516,9 @@ namespace astc_helpers
 		// Now interleave the 7 encoded quint bits with the bits to form the encoded output. See table 95-96.
 		astc_set_bits(pOutput, bit_pos, bits[0] | (astc_extract_bits(T, 0, 2) << n) | (bits[1] << (3 + n)) | (astc_extract_bits(T, 3, 4) << (3 + n * 2)) |
 			(bits[2] << (5 + n * 2)) | (astc_extract_bits(T, 5, 6) << (5 + n * 3)), 7 + n * 3);
+
+		if (pStats)
+			*pStats += n * 3 + 7;
 	}

 	static const uint8_t g_astc_trit_encode[243] = { 0, 1, 2, 4, 5, 6, 8, 9, 10, 16, 17, 18, 20, 21, 22, 24, 25, 26, 3, 7, 11, 19, 23, 27, 12, 13, 14, 32, 33, 34, 36, 37, 38, 40, 41, 42, 48, 49, 50, 52, 53, 54, 56, 57, 58, 35, 39,
@ -471,7 +529,7 @@ namespace astc_helpers
 		191, 223, 124, 125, 126 };

 	// Encodes 5 values to output, usable for any range that uses trits and bits
-	static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n)
+	static void astc_encode_trits(uint32_t* pOutput, const uint8_t* pValues, uint32_t& bit_pos, int n, uint32_t *pStats)
 	{
 		// First extract the trits and the bits from the 5 input values
 		int trits = 0, bits[5];
@ -497,10 +555,13 @@ namespace astc_helpers
 		
 		astc_set_bits(pOutput, bit_pos, astc_extract_bits(T, 2, 3) | (bits[2] << 2) | (astc_extract_bits(T, 4, 4) << (2 + n)) | (bits[3] << (3 + n)) | (astc_extract_bits(T, 5, 6) << (3 + n * 2)) |
 			(bits[4] << (5 + n * 2)) | (astc_extract_bits(T, 7, 7) << (5 + n * 3)), n * 3 + 6);
+		
+		if (pStats)
+			*pStats += n * 5 + 8;
 	}

 	// Packs values using ASTC's BISE to output buffer.
-	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range)
+	void encode_bise(uint32_t* pDst, const uint8_t* pSrc_vals, uint32_t bit_pos, int num_vals, int range, uint32_t *pStats)
 	{
 		uint32_t temp[5] = { 0 };

@ -533,19 +594,23 @@ namespace astc_helpers
 				for (int i = 0; i < limit; i++)
 					vals[i] = pSrc_vals[group_index * group_size + i];

+				// Note this always writes a group of 3 or 5 bits values, even for incomplete groups. So it can write more than needed. 
+				// get_ise_sequence_bits() returns the # of bits that must be written for proper decoding.
 				if (group_size == 5)
-					astc_encode_trits(temp, vals, bit_pos, num_bits);
+					astc_encode_trits(temp, vals, bit_pos, num_bits, pStats);
 				else
-					astc_encode_quints(temp, vals, bit_pos, num_bits);
+					astc_encode_quints(temp, vals, bit_pos, num_bits, pStats);
 			}
 		}
 		else
 		{
 			for (int i = 0; i < num_vals; i++)
 				astc_set_bits_1_to_9(temp, bit_pos, pSrc_vals[i], num_bits);
+
+			if (pStats)
+				*pStats += num_vals * num_bits;
 		}

-		// TODO: Could this write too many bits on incomplete blocks?
 		pDst[0] |= temp[0]; pDst[1] |= temp[1];
 		pDst[2] |= temp[2]; pDst[3] |= temp[3];
 	}
@ -652,7 +717,7 @@ namespace astc_helpers
 		return false;
 	}

-	bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range)
+	bool pack_astc_block(astc_block& phys_block, const log_astc_block& log_block, int* pExpected_endpoint_range, pack_stats *pStats)
 	{
 		memset(&phys_block, 0, sizeof(phys_block));

@ -665,12 +730,12 @@ namespace astc_helpers
 				
 		if (log_block.m_solid_color_flag_ldr)
 		{
-			pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+			pack_void_extent_ldr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
 			return true;
 		}
 		else if (log_block.m_solid_color_flag_hdr)
 		{
-			pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3]);
+			pack_void_extent_hdr(phys_block, log_block.m_solid_color[0], log_block.m_solid_color[1], log_block.m_solid_color[2], log_block.m_solid_color[3], pStats);
 			return true;
 		}
 				
@ -688,12 +753,16 @@ namespace astc_helpers
 		if (log_block.m_color_component_selector > 3)
 			return false;

+		// TODO: sanity check grid width/height vs. block's physical width/height
+				
 		uint32_t config_bits = 0;
 		if (!get_config_bits(log_block, config_bits))
 			return false;

 		uint32_t bit_pos = 0;
 		astc_set_bits(&phys_block.m_vals[0], bit_pos, config_bits, 11);
+		if (pStats)
+			pStats->m_header_bits += 11;

 		const uint32_t total_grid_weights = (log_block.m_dual_plane ? 2 : 1) * (log_block.m_grid_width * log_block.m_grid_height);
 		const uint32_t total_weight_bits = get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
@ -705,6 +774,8 @@ namespace astc_helpers
 		uint32_t total_extra_bits = 0;

 		astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_num_partitions - 1, 2);
+		if (pStats)
+			pStats->m_header_bits += 2;

 		if (log_block.m_num_partitions > 1)
 		{
@ -712,12 +783,14 @@ namespace astc_helpers
 				return false;

 			astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_partition_id, 10);
+			if (pStats)
+				pStats->m_header_bits += 10;

 			uint32_t highest_cem = 0, lowest_cem = UINT32_MAX;
 			for (uint32_t j = 0; j < log_block.m_num_partitions; j++)
 			{
-				highest_cem = my_max(highest_cem, log_block.m_color_endpoint_modes[j]);
-				lowest_cem = my_min(lowest_cem, log_block.m_color_endpoint_modes[j]);
+				highest_cem = my_max<uint32_t>(highest_cem, log_block.m_color_endpoint_modes[j]);
+				lowest_cem = my_min<uint32_t>(lowest_cem, log_block.m_color_endpoint_modes[j]);
 			}

 			if (highest_cem > 15)
@ -752,9 +825,13 @@ namespace astc_helpers

 				uint32_t cem_bit_pos = 128 - total_weight_bits - total_extra_bits;
 				astc_set_bits(&phys_block.m_vals[0], cem_bit_pos, encoded_cem >> 6, total_extra_bits);
+				if (pStats)
+					pStats->m_header_bits += total_extra_bits;
 			}

 			astc_set_bits(&phys_block.m_vals[0], bit_pos, encoded_cem & 0x3f, 6);
+			if (pStats)
+				pStats->m_header_bits += 6;
 		}
 		else
 		{
@ -764,6 +841,8 @@ namespace astc_helpers
 				return false;

 			astc_set_bits(&phys_block.m_vals[0], bit_pos, log_block.m_color_endpoint_modes[0], 4);
+			if (pStats)
+				pStats->m_header_bits += 4;
 		}

 		if (log_block.m_dual_plane)
@ -775,6 +854,8 @@ namespace astc_helpers
 			
 			uint32_t ccs_bit_pos = 128 - (int)total_weight_bits - (int)total_extra_bits;
 			astc_set_bits(&phys_block.m_vals[0], ccs_bit_pos, log_block.m_color_component_selector, 2);
+			if (pStats)
+				pStats->m_header_bits += 2;
 		}

 		const uint32_t total_config_bits = bit_pos + total_extra_bits;
@ -812,6 +893,12 @@ namespace astc_helpers
 			return false;
 		}

+		if (pStats)
+		{
+			pStats->m_endpoint_bits += get_ise_sequence_bits(total_cem_vals, endpoint_ise_range);
+			pStats->m_weight_bits += get_ise_sequence_bits(total_grid_weights, log_block.m_weight_ise_range);
+		}
+
 		// Pack endpoints forwards
 		encode_bise(&phys_block.m_vals[0], log_block.m_endpoints, bit_pos, total_cem_vals, endpoint_ise_range);
 		
@ -1210,7 +1297,7 @@ namespace astc_helpers
 		}
 	}

-	void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah)
+	void pack_void_extent_ldr(astc_block &blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats* pStats)
 	{
 		uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
 		memset(pDst, 0xFF, 16);
@ -1226,10 +1313,13 @@ namespace astc_helpers
 		pDst[13] = (uint8_t)(bh >> 8);
 		pDst[14] = (uint8_t)ah;
 		pDst[15] = (uint8_t)(ah >> 8);
+
+		if (pStats)
+			pStats->m_header_bits += 128;
 	}

 	// rh-ah are half-floats
-	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah) 
+	void pack_void_extent_hdr(astc_block& blk, uint16_t rh, uint16_t gh, uint16_t bh, uint16_t ah, pack_stats *pStats) 
 	{
 		uint8_t* pDst = (uint8_t*)&blk.m_vals[0];
 		memset(pDst, 0xFF, 16);
@ -1244,6 +1334,9 @@ namespace astc_helpers
 		pDst[13] = (uint8_t)(bh >> 8);
 		pDst[14] = (uint8_t)ah;
 		pDst[15] = (uint8_t)(ah >> 8);
+
+		if (pStats)
+			pStats->m_header_bits += 128;
 	}
 		
 	bool is_cem_ldr(uint32_t mode)
@ -1323,22 +1416,17 @@ namespace astc_helpers
 	dequant_tables g_dequant_tables;

 	void precompute_texel_partitions_4x4();
+	void precompute_texel_partitions_6x6();

 	void init_tables(bool init_rank_tabs)
 	{
 		g_dequant_tables.init(init_rank_tabs);
 		
 		precompute_texel_partitions_4x4();
+		precompute_texel_partitions_6x6();
 	}
 		
-	struct weighted_sample
-	{
-		uint8_t m_src_x;
-		uint8_t m_src_y;
-		uint8_t m_weights[2][2]; // [y][x], scaled by 16, round by adding 8
-	};
-
-	static void compute_upsample_weights(
+	void compute_upsample_weights(
 		int block_width, int block_height,
 		int weight_grid_width, int weight_grid_height,
 		weighted_sample* pWeights) // there will be block_width * block_height bilinear samples
@ -1373,7 +1461,7 @@ namespace astc_helpers
 	}

 	// Should be dequantized [0,64] weights
-	static void upsample_weight_grid(
+	void upsample_weight_grid(
 		uint32_t bx, uint32_t by,		// destination/to dimension
 		uint32_t wx, uint32_t wy,		// source/from dimension
 		const uint8_t* pSrc_weights,	// these are dequantized [0,64] weights, NOT ISE symbols, [wy][wx]
@ -1429,6 +1517,7 @@ namespace astc_helpers
 		return p;
 	}

+	// small_block = num_blk_pixels < 31
 	int compute_texel_partition(uint32_t seedIn, uint32_t xIn, uint32_t yIn, uint32_t zIn, int num_partitions, bool small_block)
 	{
 		assert(zIn == 0);
@ -1495,8 +1584,12 @@ namespace astc_helpers
 			: 3;
 	}

+	// 4x4, 2 and 3 subsets
 	static uint32_t g_texel_partitions_4x4[1024][2]; 
 	
+	// 6x6, 2 and 3 subsets (2 subsets low 4 bits, 3 subsets high 4 bits)
+	static uint8_t g_texel_partitions_6x6[1024][6 * 6];
+
 	void precompute_texel_partitions_4x4()
 	{
 		for (uint32_t p = 0; p < 1024; p++)
@ -1518,6 +1611,24 @@ namespace astc_helpers
 		}
 	}

+	void precompute_texel_partitions_6x6()
+	{
+		for (uint32_t p = 0; p < 1024; p++)
+		{
+			for (uint32_t y = 0; y < 6; y++)
+			{
+				for (uint32_t x = 0; x < 6; x++)
+				{
+					const uint32_t p2 = compute_texel_partition(p, x, y, 0, 2, false);
+					const uint32_t p3 = compute_texel_partition(p, x, y, 0, 3, false);
+					
+					assert((p2 <= 1) && (p3 <= 2));
+					g_texel_partitions_6x6[p][x + y * 6] = (uint8_t)((p3 << 4) | p2);
+				}
+			}
+		}
+	}
+
 	static inline int get_precompute_texel_partitions_4x4(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
 	{
 		assert(g_texel_partitions_4x4[1][0]);
@ -1529,6 +1640,17 @@ namespace astc_helpers
 		return (g_texel_partitions_4x4[seed][num_partitions - 2] >> shift) & 3;
 	}

+	static inline int get_precompute_texel_partitions_6x6(uint32_t seed, uint32_t x, uint32_t y, uint32_t num_partitions)
+	{
+		assert(g_texel_partitions_6x6[0][0]);
+		assert(seed < 1024);
+		assert((x <= 5) && (y <= 5));
+		assert((num_partitions >= 2) && (num_partitions <= 3));
+
+		const uint32_t shift = (num_partitions == 3) ? 4 : 0;
+		return (g_texel_partitions_6x6[seed][x + y * 6] >> shift) & 3;
+	}
+
 	void blue_contract(
 		int r, int g, int b, int a, 
 		int &dr, int &dg, int &db, int &da)
@ -2145,24 +2267,6 @@ namespace astc_helpers
 		return x.f;
 	}
 		
-	static inline half_float qlog16_to_half(int k)
-	{
-		assert((k >= 0) && (k <= 0xFFFF));
-
-		int E = (k & 0xF800) >> 11;
-		int M = k & 0x7FF;
-
-		int Mt;
-		if (M < 512)
-			Mt = 3 * M;
-		else if (M >= 1536)
-			Mt = 5 * M - 2048;
-		else
-			Mt = 4 * M - 512;
-
-		return (half_float)((E << 10) + (Mt >> 3));
-	}
-
 	// See https://registry.khronos.org/OpenGL/extensions/EXT/EXT_texture_shared_exponent.txt
 	const int RGB9E5_EXPONENT_BITS = 5, RGB9E5_MANTISSA_BITS = 9, RGB9E5_EXP_BIAS = 15, RGB9E5_MAX_VALID_BIASED_EXP = 31;
 	const int MAX_RGB9E5_EXP = (RGB9E5_MAX_VALID_BIASED_EXP - RGB9E5_EXP_BIAS);
@ -2514,7 +2618,8 @@ namespace astc_helpers

 		// Decode texels
 		const bool small_block = num_blk_pixels < 31;
-		const bool use_precomputed_texel_partitions = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
+		const bool use_precomputed_texel_partitions_4x4 = (blk_width == 4) && (blk_height == 4) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
+		const bool use_precomputed_texel_partitions_6x6 = (blk_width == 6) && (blk_height == 6) && (log_blk.m_num_partitions >= 2) && (log_blk.m_num_partitions <= 3);
 		const uint32_t ccs = log_blk.m_dual_plane ? log_blk.m_color_component_selector : UINT32_MAX;
 		
 		bool success = true;
@ -2527,9 +2632,17 @@ namespace astc_helpers
 				for (uint32_t x = 0; x < blk_width; x++)
 				{
 					const uint32_t pixel_index = x + y * blk_width;
-					const uint32_t subset = (log_blk.m_num_partitions > 1) ? 
-						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
-						: 0;
+					
+					uint32_t subset = 0;
+					if (log_blk.m_num_partitions > 1)
+					{
+						if (use_precomputed_texel_partitions_4x4)
+							subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else if (use_precomputed_texel_partitions_6x6)
+							subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else
+							subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
+					}

 					int comp[3];

@ -2592,9 +2705,17 @@ namespace astc_helpers
 				for (uint32_t x = 0; x < blk_width; x++)
 				{
 					const uint32_t pixel_index = x + y * blk_width;
-					const uint32_t subset = (log_blk.m_num_partitions > 1) ?
-						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
-						: 0;
+					
+					uint32_t subset = 0;
+					if (log_blk.m_num_partitions > 1)
+					{
+						if (use_precomputed_texel_partitions_4x4)
+							subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else if (use_precomputed_texel_partitions_6x6)
+							subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else
+							subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
+					}

 					for (uint32_t c = 0; c < 4; c++)
 					{
@ -2653,9 +2774,16 @@ namespace astc_helpers
 				{
 					const uint32_t pixel_index = x + y * blk_width;

-					const uint32_t subset = (log_blk.m_num_partitions > 1) ?
-						(use_precomputed_texel_partitions ? get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions) : compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block))
-						: 0;
+					uint32_t subset = 0;
+					if (log_blk.m_num_partitions > 1)
+					{
+						if (use_precomputed_texel_partitions_4x4)
+							subset = get_precompute_texel_partitions_4x4(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else if (use_precomputed_texel_partitions_6x6)
+							subset = get_precompute_texel_partitions_6x6(log_blk.m_partition_id, x, y, log_blk.m_num_partitions);
+						else
+							subset = compute_texel_partition(log_blk.m_partition_id, x, y, 0, log_blk.m_num_partitions, small_block);
+					}

 					if (!is_ldr_endpoints[subset])
 					{
@ -3235,10 +3363,10 @@ namespace astc_helpers
 		if (p < 2)
 			return false;
 		
-		log_blk.m_grid_width = W;
-		log_blk.m_grid_height = H;
+		log_blk.m_grid_width = (uint8_t)W;
+		log_blk.m_grid_height = (uint8_t)H;
 		
-		log_blk.m_weight_ise_range = (p - 2) + (P * BISE_10_LEVELS);
+		log_blk.m_weight_ise_range = (uint8_t)((p - 2) + (P * BISE_10_LEVELS));
 		assert(log_blk.m_weight_ise_range <= LAST_VALID_WEIGHT_ISE_RANGE);

 		log_blk.m_dual_plane = Dp;
@ -3441,16 +3569,16 @@ namespace astc_helpers

 		// Right before the weight bits, there may be extra CEM bits, then the 2 CCS bits if dual plane.

-		log_blk.m_num_partitions = bits.get_bits(11, 2) + 1;
+		log_blk.m_num_partitions = (uint8_t)(bits.get_bits(11, 2) + 1);
 		if (log_blk.m_num_partitions == 1)
-			log_blk.m_color_endpoint_modes[0] = bits.get_bits(13, 4); // read CEM bits
+			log_blk.m_color_endpoint_modes[0] = (uint8_t)(bits.get_bits(13, 4)); // read CEM bits
 		else
 		{
 			// 2 or more partitions
 			if (log_blk.m_dual_plane && (log_blk.m_num_partitions == 4))
 				return false;

-			log_blk.m_partition_id = bits.get_bits(13, 10);
+			log_blk.m_partition_id = (uint16_t)bits.get_bits(13, 10);

 			uint32_t cem_bits = bits.get_bits(23, 6);

@ -3458,7 +3586,7 @@ namespace astc_helpers
 			{
 				// All CEM's the same
 				for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
-					log_blk.m_color_endpoint_modes[i] = cem_bits >> 2;
+					log_blk.m_color_endpoint_modes[i] = (uint8_t)(cem_bits >> 2);
 			}
 			else
 			{
@ -3511,7 +3639,7 @@ namespace astc_helpers

 				for (uint32_t i = 0; i < log_blk.m_num_partitions; i++)
 				{
-					log_blk.m_color_endpoint_modes[i] = first_cem_index + (c[i] * 4) + m[i];
+					log_blk.m_color_endpoint_modes[i] = (uint8_t)(first_cem_index + (c[i] * 4) + m[i]);
 					assert(log_blk.m_color_endpoint_modes[i] <= 15);
 				}
 			}
@ -3528,7 +3656,7 @@ namespace astc_helpers
 				return false;

 			uint32_t ccs_bit_pos = end_of_weight_bit_ofs - total_extra_bits;
-			log_blk.m_color_component_selector = bits.get_bits(ccs_bit_pos, 2);
+			log_blk.m_color_component_selector = (uint8_t)(bits.get_bits(ccs_bit_pos, 2));
 		}

 		uint32_t config_bit_pos = 11 + 2; // config+num_parts
@ -3569,7 +3697,7 @@ namespace astc_helpers
 		if (endpoint_ise_range < (int)FIRST_VALID_ENDPOINT_ISE_RANGE)
 			return false;

-		log_blk.m_endpoint_ise_range = endpoint_ise_range;
+		log_blk.m_endpoint_ise_range = (uint8_t)endpoint_ise_range;

 		// Decode endpoints forwards in block
 		decode_bise(log_blk.m_endpoint_ise_range, log_blk.m_endpoints, total_cem_vals, bits, config_bit_pos);
--- a/thirdparty/basis_universal/transcoder/basisu_containers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers.h
--- a/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
+++ b/thirdparty/basis_universal/transcoder/basisu_containers_impl.h
@ -7,55 +7,112 @@

 namespace basisu
 {
-   bool elemental_vector::increase_capacity(uint32_t min_new_capacity, bool grow_hint, uint32_t element_size, object_mover pMover, bool nofail)
+	// A container operation has internally panicked in an unrecoverable way.
+	// Either an allocation has failed, or a range or consistency check has failed.
+#ifdef _MSC_VER
+	__declspec(noreturn)
+#else
+	[[noreturn]] 
+#endif
+	void container_abort(const char* pMsg, ...)
+	{
+		assert(0);
+
+		va_list args;
+		va_start(args, pMsg);
+
+		char buf[1024] = {};
+
+#ifdef _MSC_VER
+		vsprintf_s(buf, sizeof(buf), pMsg, args);
+#else
+		vsnprintf(buf, sizeof(buf), pMsg, args);
+#endif
+		va_end(args);
+
+		fputs(buf, stderr);
+
+		std::terminate();
+	}
+
+	bool elemental_vector::increase_capacity(size_t min_new_capacity, bool grow_hint, size_t element_size, object_mover pMover, bool nofail_flag)
 	{
 		assert(m_size <= m_capacity);
+		assert(min_new_capacity >= m_size);
+		assert(element_size);
 		
+		// Basic sanity check min_new_capacity
+		if (!can_fit_into_size_t((uint64_t)min_new_capacity * element_size))
+		{
+			assert(0);
+			
+			if (nofail_flag)
+				return false;
+
+			container_abort("elemental_vector::increase_capacity: requesting too many elements\n");
+		}
+
+		// Check for sane library limits
 		if (sizeof(void*) == sizeof(uint64_t))
+		{
+			// 16 GB
 			assert(min_new_capacity < (0x400000000ULL / element_size));
+		}
 		else
+		{
+			// ~1.99 GB
 			assert(min_new_capacity < (0x7FFF0000U / element_size));
+		}

+		// If vector is already large enough just return.
 		if (m_capacity >= min_new_capacity)
 			return true;

 		uint64_t new_capacity_u64 = min_new_capacity;
+
 		if ((grow_hint) && (!helpers::is_power_of_2(new_capacity_u64)))
+		{
 			new_capacity_u64 = helpers::next_pow2(new_capacity_u64);

-      size_t new_capacity = (size_t)new_capacity_u64;
-      if (new_capacity != new_capacity_u64)
+			if (!can_fit_into_size_t(new_capacity_u64))
 			{
-          if (nofail)
+				assert(0);
+
+				if (nofail_flag)
 					return false;
-          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
-          abort();
+
+				container_abort("elemental_vector::increase_capacity: vector too large\n");
+			}
 		}

-      const uint64_t desired_size_u64 = (uint64_t)element_size * new_capacity;
+		const uint64_t desired_size_u64 = element_size * new_capacity_u64;

-      const size_t desired_size = (size_t)desired_size_u64;
-      if (desired_size_u64 != desired_size)
+		if (!can_fit_into_size_t(desired_size_u64))
 		{
-          if (nofail)
+			assert(0);
+
+			if (nofail_flag)
 				return false;
-          fprintf(stderr, "elemental_vector::increase_capacity: vector too large\n");
-          abort();
+
+			container_abort("elemental_vector::increase_capacity: vector too large\n");
 		}

+		const size_t desired_size = static_cast<size_t>(desired_size_u64);
+						
 		size_t actual_size = 0;
+		BASISU_NOTE_UNUSED(actual_size);
+
 		if (!pMover)
 		{
 			void* new_p = realloc(m_p, desired_size);
 			if (!new_p)
 			{
-            if (nofail)
+				assert(0);
+
+				if (nofail_flag)
 					return false;

-            char buf[256];
-            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
-            fprintf(stderr, "%s", buf);
-            abort();
+				container_abort("elemental_vector::increase_capacity: realloc() failed allocating %zu bytes", desired_size);
 			}

 #if BASISU_VECTOR_DETERMINISTIC
@ -74,13 +131,11 @@ namespace basisu
 			void* new_p = malloc(desired_size);
 			if (!new_p)
 			{
-            if (nofail)
+				assert(0);
+				if (nofail_flag)
 					return false;

-            char buf[256];
-            snprintf(buf, sizeof(buf), "elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
-            fprintf(stderr, "%s", buf);
-            abort();
+				container_abort("elemental_vector::increase_capacity: malloc() failed allocating %zu bytes", desired_size);
 			}

 #if BASISU_VECTOR_DETERMINISTIC
@ -101,10 +156,14 @@ namespace basisu
 			m_p = new_p;
 		}

+#if BASISU_VECTOR_DETERMINISTIC
+		m_capacity = static_cast<size_t>(new_capacity_u64);
+#else
 		if (actual_size > desired_size)
-         m_capacity = static_cast<uint32_t>(actual_size / element_size);
+			m_capacity = static_cast<size_t>(actual_size / element_size);
 		else
-         m_capacity = static_cast<uint32_t>(new_capacity);
+			m_capacity = static_cast<size_t>(new_capacity_u64);
+#endif

 		return true;
 	}
@ -115,8 +174,7 @@ namespace basisu

 	static void handle_hashmap_test_verify_failure(int line)
 	{
-      fprintf(stderr, "HASHMAP_TEST_VERIFY() faild on line %i\n", line);
-      abort();
+		container_abort("HASHMAP_TEST_VERIFY() faild on line %i\n", line);
 	}

 	class counted_obj
@ -131,20 +189,40 @@ namespace basisu
 		counted_obj(const counted_obj& obj) :
 			m_val(obj.m_val)
 		{
+			if (m_val != UINT64_MAX)
 				m_count++;
 		}

+		counted_obj(counted_obj&& obj) :
+			m_val(obj.m_val)
+		{
+			obj.m_val = UINT64_MAX;
+		}
+
+		counted_obj& operator= (counted_obj&& rhs)
+		{
+			if (this != &rhs)
+			{
+				m_val = rhs.m_val;
+				rhs.m_val = UINT64_MAX;
+			}
+			return *this;
+		}
+
 		~counted_obj()
+		{
+			if (m_val != UINT64_MAX)
 			{
 				assert(m_count > 0);
 				m_count--;
 			}
+		}

 		static uint32_t m_count;

-      uint32_t m_val;
+		uint64_t m_val;

-      operator size_t() const { return m_val; }
+		operator size_t() const { return (size_t)m_val; }

 		bool operator== (const counted_obj& rhs) const { return m_val == rhs.m_val; }
 		bool operator== (const uint32_t rhs) const { return m_val == rhs; }
@ -180,6 +258,82 @@ namespace basisu

 	void hash_map_test()
 	{
+		{
+			basisu::hash_map<uint32_t> s;
+			uint_vec k;
+
+			for (uint32_t i = 0; i < 1000000; i++)
+			{
+				s.insert(i);
+				k.push_back(i);
+			}
+						
+			for (uint32_t i = 0; i < k.size(); i++)
+			{
+				uint32_t r = rand() ^ (rand() << 15);
+
+				uint32_t j = i + (r % (k.size() - i));
+
+				std::swap(k[i], k[j]);
+			}
+
+			basisu::hash_map<uint32_t> s1(s);
+
+			for (uint32_t i = 0; i < 1000000; i++)
+			{
+				auto res = s.find(i);
+				HASHMAP_TEST_VERIFY(res != s.end());
+				HASHMAP_TEST_VERIFY(res->first == i);
+				s.erase(i);
+			}
+
+			for (uint32_t it = 0; it < 1000000; it++)
+			{
+				uint32_t i = k[it];
+
+				auto res = s1.find(i);
+				HASHMAP_TEST_VERIFY(res != s.end());
+				HASHMAP_TEST_VERIFY(res->first == i);
+				s1.erase(i);
+			}
+
+			for (uint32_t i = 0; i < 1000000; i++)
+			{
+				auto res = s.find(i);
+				HASHMAP_TEST_VERIFY(res == s.end());
+
+				auto res1 = s1.find(i);
+				HASHMAP_TEST_VERIFY(res1 == s1.end());
+			}
+
+			HASHMAP_TEST_VERIFY(s.empty());
+			HASHMAP_TEST_VERIFY(s1.empty());
+		}
+
+		{
+			typedef basisu::hash_map< uint32_t, basisu::vector<uint32_t> > hm;
+			hm q;
+			
+			basisu::vector<uint32_t> a, b;
+			a.push_back(1);
+			b.push_back(2);
+			b.push_back(3);
+
+			basisu::vector<uint32_t> c(b);
+
+			hm::insert_result ir;
+			q.try_insert(ir, 1, std::move(a));
+			q.try_insert(ir, 2, std::move(b));
+			q.try_insert(ir, std::make_pair(3, c));
+		}
+
+		{
+			typedef basisu::hash_map<counted_obj, counted_obj> my_hash_map;
+			my_hash_map m;
+			counted_obj a, b;
+			m.insert(std::move(a), std::move(b));
+		}
+
 		{
 			basisu::hash_map<uint64_t, uint64_t> k;
 			basisu::hash_map<uint64_t, uint64_t> l;
@ -211,7 +365,7 @@ namespace basisu
 			typedef basisu::hash_map<counted_obj, counted_obj> my_hash_map;
 			my_hash_map m;

-         const uint32_t n = irand32(0, 100000);
+			const uint32_t n = irand32(1, 100000);

 			printf("%u\n", n);

@ -251,10 +405,10 @@ namespace basisu

 			for (uint32_t t = 0; t < 2; t++)
 			{
-            const uint32_t nd = irand32(1, q.size() + 1);
+				const uint32_t nd = irand32(1, q.size_u32() + 1);
 				for (uint32_t i = 0; i < nd; i++)
 				{
-               uint32_t p = irand32(0, q.size());
+					uint32_t p = irand32(0, q.size_u32());

 					int k = q[p];
 					if (k >= 0)
@ -311,4 +465,348 @@ namespace basisu

 #endif // BASISU_HASHMAP_TEST

+	// String formatting
+
+	bool fmt_variant::to_string(std::string& res, std::string& fmt) const
+	{
+		res.resize(0);
+
+		// Scan for allowed formatting characters.
+		for (size_t i = 0; i < fmt.size(); i++)
+		{
+			const char c = fmt[i];
+
+			if (isdigit(c) || (c == '.') || (c == ' ') || (c == '#') || (c == '+') || (c == '-'))
+				continue;
+
+			if (isalpha(c))
+			{
+				if ((i + 1) == fmt.size())
+					continue;
+			}
+
+			return false;
+		}
+
+		if (fmt.size() && (fmt.back() == 'c'))
+		{
+			if ((m_type == variant_type::cI32) || (m_type == variant_type::cU32))
+			{
+				if (m_u32 > 255)
+					return false;
+
+				// Explictly allowing caller to pass in a char of 0, which is ignored.
+				if (m_u32)
+					res.push_back((uint8_t)m_u32);
+				return true;
+			}
+			else
+				return false;
+		}
+
+		switch (m_type)
+		{
+		case variant_type::cInvalid:
+		{
+			return false;
+		}
+		case variant_type::cI32:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u'))
+						return false;
+				}
+				else
+				{
+					fmt += "i";
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_i32);
+			}
+			else
+			{
+				res = string_format("%i", m_i32);
+			}
+			break;
+		}
+		case variant_type::cU32:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if ((e != 'x') && (e != 'X') && (e != 'i') && (e != 'd') && (e != 'u'))
+						return false;
+				}
+				else
+				{
+					fmt += "u";
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_u32);
+			}
+			else
+			{
+				res = string_format("%u", m_u32);
+			}
+			break;
+		}
+		case variant_type::cI64:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if (e == 'x')
+					{
+						fmt.pop_back();
+						fmt += PRIx64;
+					}
+					else if (e == 'X')
+					{
+						fmt.pop_back();
+						fmt += PRIX64;
+					}
+					else
+						return false;
+				}
+				else
+				{
+					fmt += PRId64;
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_i64);
+			}
+			else
+			{
+				res = string_format("%" PRId64, m_i64);
+			}
+			break;
+		}
+		case variant_type::cU64:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if (e == 'x')
+					{
+						fmt.pop_back();
+						fmt += PRIx64;
+					}
+					else if (e == 'X')
+					{
+						fmt.pop_back();
+						fmt += PRIX64;
+					}
+					else
+						return false;
+				}
+				else
+				{
+					fmt += PRIu64;
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_u64);
+			}
+			else
+			{
+				res = string_format("%" PRIu64, m_u64);
+			}
+			break;
+		}
+		case variant_type::cFlt:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E'))
+						return false;
+				}
+				else
+				{
+					fmt += "f";
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_flt);
+			}
+			else
+			{
+				res = string_format("%f", m_flt);
+			}
+			break;
+		}
+		case variant_type::cDbl:
+		{
+			if (fmt.size())
+			{
+				int e = fmt.back();
+				if (isalpha(e))
+				{
+					if ((e != 'f') && (e != 'g') && (e != 'e') && (e != 'E'))
+						return false;
+				}
+				else
+				{
+					fmt += "f";
+				}
+
+				res = string_format((std::string("%") + fmt).c_str(), m_dbl);
+			}
+			else
+			{
+				res = string_format("%f", m_dbl);
+			}
+			break;
+		}
+		case variant_type::cStrPtr:
+		{
+			if (fmt.size())
+				return false;
+			if (!m_pStr)
+				return false;
+			res = m_pStr;
+			break;
+		}
+		case variant_type::cBool:
+		{
+			if (fmt.size())
+				return false;
+			res = m_bool ? "true" : "false";
+			break;
+		}
+		case variant_type::cStdStr:
+		{
+			if (fmt.size())
+				return false;
+			res = m_str;
+			break;
+		}
+		default:
+		{
+			return false;
+		}
+		}
+
+		return true;
+	}
+
+	bool fmt_variants(std::string& res, const char* pFmt, const fmt_variant_vec& variants)
+	{
+		res.resize(0);
+
+		// Must specify a format string
+		if (!pFmt)
+		{
+			assert(0);
+			return false;
+		}
+
+		// Check format string's length
+		const size_t fmt_len = strlen(pFmt);
+		if (!fmt_len)
+		{
+			if (variants.size())
+			{
+				assert(0);
+				return false;
+			}
+			return true;
+		}
+
+		// Wildly estimate output length
+		res.reserve(fmt_len + 32);
+
+		std::string var_fmt;
+		var_fmt.reserve(16);
+
+		std::string tmp;
+		tmp.reserve(16);
+
+		size_t variant_index = 0;
+		bool inside_brackets = false;
+		const char* p = pFmt;
+
+		while (*p)
+		{
+			const uint8_t c = *p++;
+
+			if (inside_brackets)
+			{
+				if (c == '}')
+				{
+					inside_brackets = false;
+
+					if (variant_index >= variants.size())
+					{
+						assert(0);
+						return false;
+					}
+
+					if (!variants[variant_index].to_string(tmp, var_fmt))
+					{
+						assert(0);
+						return false;
+					}
+
+					res += tmp;
+
+					variant_index++;
+				}
+				else
+				{
+					// Check for forbidden formatting characters.
+					if ((c == '*') || (c == 'n') || (c == '%'))
+					{
+						assert(0);
+						return false;
+					}
+
+					var_fmt.push_back(c);
+				}
+			}
+			else if (c == '{')
+			{
+				// Check for escaped '{'
+				if (*p == '{')
+				{
+					res.push_back((char)c);
+					p++;
+				}
+				else
+				{
+					inside_brackets = true;
+					var_fmt.resize(0);
+				}
+			}
+			else
+			{
+				res.push_back((char)c);
+			}
+		}
+
+		if (inside_brackets)
+		{
+			assert(0);
+			return false;
+		}
+
+		if (variant_index != variants.size())
+		{
+			assert(0);
+			return false;
+		}
+
+		return true;
+	}
+
 } // namespace basisu
--- a/thirdparty/basis_universal/transcoder/basisu_file_headers.h
+++ b/thirdparty/basis_universal/transcoder/basisu_file_headers.h
@ -38,7 +38,7 @@ namespace basist
 		basisu::packed_uint<2> m_orig_width;	// The original image width (may not be a multiple of 4 pixels)
 		basisu::packed_uint<2> m_orig_height;  // The original image height (may not be a multiple of 4 pixels)

-		basisu::packed_uint<2> m_num_blocks_x;	// The slice's block X dimensions. Each block is 4x4 pixels. The slice's pixel resolution may or may not be a power of 2.
+		basisu::packed_uint<2> m_num_blocks_x;	// The slice's block X dimensions. Each block is 4x4 or 6x6 pixels. The slice's pixel resolution may or may not be a power of 2.
 		basisu::packed_uint<2> m_num_blocks_y;	// The slice's block Y dimensions. 

 		basisu::packed_uint<4> m_file_ofs;		// Offset from the start of the file to the start of the slice's data
@ -90,7 +90,10 @@ namespace basist
 	{
 		cETC1S = 0,
 		cUASTC4x4 = 1,
-		cUASTC_HDR_4x4 = 2
+		cUASTC_HDR_4x4 = 2,
+		cASTC_HDR_6x6 = 3,
+		cASTC_HDR_6x6_INTERMEDIATE = 4,
+		cTotalFormats
 	};

 	struct basis_file_header
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.cpp
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder.h
@ -86,7 +86,7 @@ namespace basist
 		cTFETC2_EAC_RG11 = 21,						// RG only (ETC2 EAC RG11 unsigned), R=opaque.r, G=alpha - for tangent space normal maps

 		cTFBC6H = 22,								// HDR, RGB only, unsigned
-		cTFASTC_HDR_4x4_RGBA = 23,				// HDR, RGBA (currently UASTC HDR is only RGB), unsigned
+		cTFASTC_HDR_4x4_RGBA = 23,					// HDR, RGBA (currently UASTC HDR 4x4 encoders are only RGB), unsigned

 		// Uncompressed (raw pixel) formats
 		// Note these uncompressed formats (RGBA32, 565, and 4444) can only be transcoded to from LDR input files (ETC1S or UASTC LDR).
@ -95,14 +95,16 @@ namespace basist
 		cTFBGR565 = 15,								// 16bpp RGB image stored in raster (not block) order in memory, R at bit position 0
 		cTFRGBA4444 = 16,							// 16bpp RGBA image stored in raster (not block) order in memory, R at bit position 12, A at bit position 0
 		
-		// Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR).
+		// Note these uncompressed formats (HALF and 9E5) can only be transcoded to from HDR input files (UASTC HDR 4x4 or ASTC HDR 6x6).
 		cTFRGB_HALF = 24,							// 48bpp RGB half (16-bits/component, 3 components)
 		cTFRGBA_HALF = 25,							// 64bpp RGBA half (16-bits/component, 4 components) (A will always currently 1.0, UASTC_HDR doesn't support alpha)
 		cTFRGB_9E5 = 26,							// 32bpp RGB 9E5 (shared exponent, positive only, see GL_EXT_texture_shared_exponent)

-		cTFTotalTextureFormats = 27,
+		cTFASTC_HDR_6x6_RGBA = 27,					// HDR, RGBA (currently our ASTC HDR 6x6 encodes are only RGB), unsigned

-		// Old enums for compatibility with code compiled against previous versions
+		cTFTotalTextureFormats = 28,
+
+		// ----- The following are old/legacy enums for compatibility with code compiled against previous versions
 		cTFETC1 = cTFETC1_RGB,
 		cTFETC2 = cTFETC2_RGBA,
 		cTFBC1 = cTFBC1_RGB,
@ -138,6 +140,9 @@ namespace basist
 	// Returns true if the format is HDR.
 	bool basis_transcoder_format_is_hdr(transcoder_texture_format fmt);

+	// Returns true if the format is LDR.
+	inline bool basis_transcoder_format_is_ldr(transcoder_texture_format fmt) { return !basis_transcoder_format_is_hdr(fmt); }
+
 	// Returns the basisu::texture_format corresponding to the specified transcoder_texture_format.
 	basisu::texture_format basis_get_basisu_texture_format(transcoder_texture_format fmt);

@ -159,14 +164,25 @@ namespace basist
 	// Returns true if the specified format was enabled at compile time, and is supported for the specific basis/ktx2 texture format (ETC1S, UASTC, or UASTC HDR).
 	bool basis_is_format_supported(transcoder_texture_format tex_type, basis_tex_format fmt = basis_tex_format::cETC1S);

+	// Returns the block width/height for the specified basis texture file format.
+	uint32_t basis_tex_format_get_block_width(basis_tex_format fmt);
+	uint32_t basis_tex_format_get_block_height(basis_tex_format fmt);
+		
+	bool basis_tex_format_is_hdr(basis_tex_format fmt);
+	inline bool basis_tex_format_is_ldr(basis_tex_format fmt) { return !basis_tex_format_is_hdr(fmt); }
+		
 	// Validates that the output buffer is large enough to hold the entire transcoded texture.
 	// For uncompressed texture formats, most input parameters are in pixels, not blocks. Blocks are 4x4 pixels.
 	bool basis_validate_output_buffer_size(transcoder_texture_format target_format,
 		uint32_t output_blocks_buf_size_in_blocks_or_pixels,
 		uint32_t orig_width, uint32_t orig_height,
 		uint32_t output_row_pitch_in_blocks_or_pixels,
-		uint32_t output_rows_in_pixels,
-		uint32_t total_slice_blocks);
+		uint32_t output_rows_in_pixels);
+
+	// Computes the size in bytes of a transcoded image or texture, taking into account the format's block width/height and any minimum size PVRTC1 requirements required by OpenGL.
+	// Note the returned value is not necessarily the # of bytes a transcoder could write to the output buffer due to these minimum PVRTC1 requirements.
+	// (These PVRTC1 requirements are not ours, but OpenGL's.)
+	uint32_t basis_compute_transcoded_image_size_in_bytes(transcoder_texture_format target_format, uint32_t orig_width, uint32_t orig_height);

 	class basisu_transcoder;

@ -197,7 +213,9 @@ namespace basist
 		}
 	};

-	// Low-level helper class that does the actual transcoding.
+	// Low-level helper classes that do the actual transcoding.
+	
+	// ETC1S
 	class basisu_lowlevel_etc1s_transcoder
 	{
 		friend class basisu_transcoder;
@ -216,18 +234,18 @@ namespace basist

 		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
 			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const bool is_video, const bool is_alpha_slice, const uint32_t level_index, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
-			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0);
+			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0);

 		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
 			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
-			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0)
+			basisu_transcoder_state* pState = nullptr, bool astc_transcode_alpha = false, void* pAlpha_blocks = nullptr, uint32_t output_rows_in_pixels = 0, uint32_t decode_flags = 0)
 		{
 			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt, output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks,
 				header.m_tex_type == cBASISTexTypeVideoFrames, (slice_desc.m_flags & cSliceDescFlagsHasAlpha) != 0, slice_desc.m_level_index,
 				slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels, pState,
 				astc_transcode_alpha,
 				pAlpha_blocks,
-				output_rows_in_pixels);
+				output_rows_in_pixels, decode_flags);
 		}

 		// Container independent transcoding
@ -292,15 +310,18 @@ namespace basist
 		// Used internally when decoding formats like ASTC that require both color and alpha data to be available when transcoding to the output format.
 		cDecodeFlagsOutputHasAlphaIndices = 16,

-		cDecodeFlagsHighQuality = 32
+		cDecodeFlagsHighQuality = 32,
+
+		cDecodeFlagsNoETC1SChromaFiltering = 64
 	};

-	class basisu_lowlevel_uastc_transcoder
+	// UASTC LDR 4x4
+	class basisu_lowlevel_uastc_ldr_4x4_transcoder
 	{
 		friend class basisu_transcoder;

 	public:
-		basisu_lowlevel_uastc_transcoder();
+		basisu_lowlevel_uastc_ldr_4x4_transcoder();

 		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
 			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
@ -331,12 +352,87 @@ namespace basist
 			int channel0 = -1, int channel1 = -1);
 	};

-	class basisu_lowlevel_uastc_hdr_transcoder
+	// UASTC HDR 4x4
+	class basisu_lowlevel_uastc_hdr_4x4_transcoder
 	{
 		friend class basisu_transcoder;

 	public:
-		basisu_lowlevel_uastc_hdr_transcoder();
+		basisu_lowlevel_uastc_hdr_4x4_transcoder();
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+		{
+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+				output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+				pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+
+		// Container independent transcoding
+		bool transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t slice_offset, uint32_t slice_length,
+			uint32_t decode_flags = 0,
+			bool has_alpha = false,
+			bool is_video = false,
+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr,
+			uint32_t output_rows_in_pixels = 0,
+			int channel0 = -1, int channel1 = -1);
+	};
+
+	// ASTC HDR 6x6
+	class basisu_lowlevel_astc_hdr_6x6_transcoder
+	{
+		friend class basisu_transcoder;
+
+	public:
+		basisu_lowlevel_astc_hdr_6x6_transcoder();
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0);
+
+		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
+			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, const basis_file_header& header, const basis_slice_desc& slice_desc, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr, uint32_t output_rows_in_pixels = 0, int channel0 = -1, int channel1 = -1, uint32_t decode_flags = 0)
+		{
+			return transcode_slice(pDst_blocks, num_blocks_x, num_blocks_y, pImage_data, image_data_size, fmt,
+				output_block_or_pixel_stride_in_bytes, bc1_allow_threecolor_blocks, (header.m_flags & cBASISHeaderFlagHasAlphaSlices) != 0, slice_desc.m_orig_width, slice_desc.m_orig_height, output_row_pitch_in_blocks_or_pixels,
+				pState, output_rows_in_pixels, channel0, channel1, decode_flags);
+		}
+
+		// Container independent transcoding
+		bool transcode_image(
+			transcoder_texture_format target_format,
+			void* pOutput_blocks, uint32_t output_blocks_buf_size_in_blocks_or_pixels,
+			const uint8_t* pCompressed_data, uint32_t compressed_data_length,
+			uint32_t num_blocks_x, uint32_t num_blocks_y, uint32_t orig_width, uint32_t orig_height, uint32_t level_index,
+			uint32_t slice_offset, uint32_t slice_length,
+			uint32_t decode_flags = 0,
+			bool has_alpha = false,
+			bool is_video = false,
+			uint32_t output_row_pitch_in_blocks_or_pixels = 0,
+			basisu_transcoder_state* pState = nullptr,
+			uint32_t output_rows_in_pixels = 0,
+			int channel0 = -1, int channel1 = -1);
+	};
+
+	// ASTC HDR 6x6 intermediate
+	class basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder
+	{
+		friend class basisu_transcoder;
+
+	public:
+		basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder();

 		bool transcode_slice(void* pDst_blocks, uint32_t num_blocks_x, uint32_t num_blocks_y, const uint8_t* pImage_data, uint32_t image_data_size, block_format fmt,
 			uint32_t output_block_or_pixel_stride_in_bytes, bool bc1_allow_threecolor_blocks, bool has_alpha, const uint32_t orig_width, const uint32_t orig_height, uint32_t output_row_pitch_in_blocks_or_pixels = 0,
@ -379,6 +475,9 @@ namespace basist
 		uint32_t m_num_blocks_y;
 		uint32_t m_total_blocks;

+		uint32_t m_block_width;
+		uint32_t m_block_height;
+
 		uint32_t m_compressed_size;

 		uint32_t m_slice_index;	// the slice index in the .basis file
@ -404,6 +503,9 @@ namespace basist
 		uint32_t m_width;
 		uint32_t m_height;

+		uint32_t m_block_width;
+		uint32_t m_block_height;
+
 		uint32_t m_num_blocks_x;
 		uint32_t m_num_blocks_y;
 		uint32_t m_total_blocks;
@ -425,6 +527,9 @@ namespace basist
 		uint32_t m_width;
 		uint32_t m_height;

+		uint32_t m_block_width;
+		uint32_t m_block_height;
+
 		uint32_t m_num_blocks_x;
 		uint32_t m_num_blocks_y;
 		uint32_t m_total_blocks;
@ -474,6 +579,9 @@ namespace basist

 		basis_tex_format m_tex_format; // ETC1S, UASTC, etc.

+		uint32_t m_block_width;
+		uint32_t m_block_height;
+
 		bool m_y_flipped;				// true if the image was Y flipped
 		bool m_etc1s;					// true if the file is ETC1S
 		bool m_has_alpha_slices;	// true if the texture has alpha slices (for ETC1S: even slices RGB, odd slices alpha)
@ -502,7 +610,7 @@ namespace basist
 		// Note that the number of mipmap levels for each image may differ, and that images may have different resolutions.
 		uint32_t get_total_images(const void* pData, uint32_t data_size) const;

-		basis_tex_format get_tex_format(const void* pData, uint32_t data_size) const;
+		basis_tex_format get_basis_tex_format(const void* pData, uint32_t data_size) const;

 		// Returns the number of mipmap levels in an image.
 		uint32_t get_total_image_levels(const void* pData, uint32_t data_size, uint32_t image_index) const;
@ -532,7 +640,7 @@ namespace basist
 		// It'll first find the slice(s) to transcode, then call transcode_slice() one or two times to decode both the color and alpha texture data (or RG texture data from two slices for BC5).
 		// If the .basis file doesn't have alpha slices, the output alpha blocks will be set to fully opaque (all 255's).
 		// Currently, to decode to PVRTC1 the basis texture's dimensions in pixels must be a power of 2, due to PVRTC1 format requirements. 
-		// output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32.
+		// output_blocks_buf_size_in_blocks_or_pixels should be at least the image level's total_blocks (num_blocks_x * num_blocks_y), or the total number of output pixels if fmt==cTFRGBA32 etc.
 		// output_row_pitch_in_blocks_or_pixels: Number of blocks or pixels per row. If 0, the transcoder uses the slice's num_blocks_x or orig_width (NOT num_blocks_x * 4). Ignored for PVRTC1 (due to texture swizzling).
 		// output_rows_in_pixels: Ignored unless fmt is uncompressed (cRGBA32, etc.). The total number of output rows in the output buffer. If 0, the transcoder assumes the slice's orig_height (NOT num_blocks_y * 4).
 		// Notes: 
@ -574,13 +682,15 @@ namespace basist
 		const basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() const { return m_lowlevel_etc1s_decoder; }
 		basisu_lowlevel_etc1s_transcoder& get_lowlevel_etc1s_decoder() { return m_lowlevel_etc1s_decoder; }

-		const basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; }
-		basisu_lowlevel_uastc_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; }
+		const basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() const { return m_lowlevel_uastc_decoder; }
+		basisu_lowlevel_uastc_ldr_4x4_transcoder& get_lowlevel_uastc_decoder() { return m_lowlevel_uastc_decoder; }

 	private:
 		mutable basisu_lowlevel_etc1s_transcoder m_lowlevel_etc1s_decoder;
-		mutable basisu_lowlevel_uastc_transcoder m_lowlevel_uastc_decoder;
-		mutable basisu_lowlevel_uastc_hdr_transcoder m_lowlevel_uastc_hdr_decoder;
+		mutable basisu_lowlevel_uastc_ldr_4x4_transcoder m_lowlevel_uastc_decoder;
+		mutable basisu_lowlevel_uastc_hdr_4x4_transcoder m_lowlevel_uastc_4x4_hdr_decoder;
+		mutable basisu_lowlevel_astc_hdr_6x6_transcoder m_lowlevel_astc_6x6_hdr_decoder;
+		mutable basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_lowlevel_astc_6x6_hdr_intermediate_decoder;

 		bool m_ready_to_transcode;

@ -654,6 +764,12 @@ namespace basist
 		basisu::packed_uint<4> m_alpha_slice_byte_length;
 	};

+	struct ktx2_astc_hdr_6x6_intermediate_image_desc
+	{
+		basisu::packed_uint<4> m_rgb_slice_byte_offset;
+		basisu::packed_uint<4> m_rgb_slice_byte_length;
+	};
+
 	struct ktx2_animdata
 	{
 		basisu::packed_uint<4> m_duration;
@ -663,10 +779,22 @@ namespace basist
 #pragma pack(pop)

 	const uint32_t KTX2_VK_FORMAT_UNDEFINED = 0;
-	const uint32_t KTX2_FORMAT_UASTC_4x4_SFLOAT_BLOCK = 1000066000; // TODO, is this correct?
-	const uint32_t KTX2_KDF_DF_MODEL_UASTC = 166;
-	const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR = 167;
-	const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163;
+	
+	// These are standard Vulkan texture VkFormat ID's, see https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkFormat.html
+	const uint32_t KTX2_FORMAT_ASTC_4x4_SFLOAT_BLOCK = 1000066000;
+	const uint32_t KTX2_FORMAT_ASTC_5x4_SFLOAT_BLOCK = 1000066001;
+	const uint32_t KTX2_FORMAT_ASTC_5x5_SFLOAT_BLOCK = 1000066002;
+	const uint32_t KTX2_FORMAT_ASTC_6x5_SFLOAT_BLOCK = 1000066003;
+	const uint32_t KTX2_FORMAT_ASTC_6x6_SFLOAT_BLOCK = 1000066004;
+	const uint32_t KTX2_FORMAT_ASTC_8x5_SFLOAT_BLOCK = 1000066005;
+	const uint32_t KTX2_FORMAT_ASTC_8x6_SFLOAT_BLOCK = 1000066006;
+
+	const uint32_t KTX2_KDF_DF_MODEL_ASTC = 162; // 0xA2
+	const uint32_t KTX2_KDF_DF_MODEL_ETC1S = 163; // 0xA3
+	const uint32_t KTX2_KDF_DF_MODEL_UASTC_LDR_4X4 = 166; // 0xA6
+	const uint32_t KTX2_KDF_DF_MODEL_UASTC_HDR_4X4 = 167; // 0xA7
+	const uint32_t KTX2_KDF_DF_MODEL_ASTC_HDR_6X6_INTERMEDIATE = 168; // 0xA8, TODO - coordinate with Khronos on this
+	
 	const uint32_t KTX2_IMAGE_IS_P_FRAME = 2;
 	const uint32_t KTX2_UASTC_BLOCK_SIZE = 16; // also the block size for UASTC_HDR
 	const uint32_t KTX2_MAX_SUPPORTED_LEVEL_COUNT = 16; // this is an implementation specific constraint and can be increased
@ -679,7 +807,8 @@ namespace basist
 	{
 		KTX2_SS_NONE = 0,
 		KTX2_SS_BASISLZ = 1,
-		KTX2_SS_ZSTANDARD = 2
+		KTX2_SS_ZSTANDARD = 2,
+		KTX2_SS_BASIS
 	};

 	extern const uint8_t g_ktx2_file_identifier[12];
@ -780,10 +909,14 @@ namespace basist
 		uint32_t m_width;
 		uint32_t m_height;
 				
-		// The texture's dimensions in 4x4 texel blocks.
+		// The texture's dimensions in 4x4 or 6x6 texel blocks.
 		uint32_t m_num_blocks_x;
 		uint32_t m_num_blocks_y;

+		// The format's block width/height (currently either 4 or 6).
+		uint32_t m_block_width;
+		uint32_t m_block_height;
+
 		// The total number of blocks
 		uint32_t m_total_blocks;

@ -853,14 +986,38 @@ namespace basist
 		// Returns 0 or the number of layers in the texture array or texture video. Valid after init().
 		uint32_t get_layers() const { return m_header.m_layer_count; }

-		// Returns cETC1S, cUASTC4x4, or cUASTC_HDR_4x4. Valid after init().
-		basist::basis_tex_format get_format() const { return m_format; } 
+		// Returns cETC1S, cUASTC4x4, cUASTC_HDR_4x4, cASTC_HDR_6x6, cASTC_HDR_6x6_INTERMEDIATE. Valid after init().
+		basist::basis_tex_format get_basis_tex_format() const { return m_format; }

-		bool is_etc1s() const { return get_format() == basist::basis_tex_format::cETC1S; }
+		// ETC1S LDR 4x4
+		bool is_etc1s() const { return get_basis_tex_format() == basist::basis_tex_format::cETC1S; }

-		bool is_uastc() const { return get_format() == basist::basis_tex_format::cUASTC4x4; }
+		// UASTC LDR 4x4 (only)
+		bool is_uastc() const { return get_basis_tex_format() == basist::basis_tex_format::cUASTC4x4; }

-		bool is_hdr() const { return get_format() == basist::basis_tex_format::cUASTC_HDR_4x4; }
+		// Is ASTC HDR 4x4 or 6x6
+		bool is_hdr() const
+		{
+			return basis_tex_format_is_hdr(get_basis_tex_format());
+		}
+
+		bool is_ldr() const
+		{
+			return !is_hdr();
+		}
+
+		bool is_hdr_4x4() const
+		{
+			return (get_basis_tex_format() == basist::basis_tex_format::cUASTC_HDR_4x4);
+		}
+
+		bool is_hdr_6x6() const
+		{
+			return (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6) || (get_basis_tex_format() == basist::basis_tex_format::cASTC_HDR_6x6_INTERMEDIATE);
+		}
+
+		uint32_t get_block_width() const { return basis_tex_format_get_block_width(get_basis_tex_format()); }
+		uint32_t get_block_height() const { return basis_tex_format_get_block_height(get_basis_tex_format());	}

 		// Returns true if the ETC1S file has two planes (typically RGBA, or RRRG), or true if the UASTC file has alpha data. Valid after init().
 		uint32_t get_has_alpha() const { return m_has_alpha; }
@ -894,9 +1051,11 @@ namespace basist
 		struct key_value
 		{
 			// The key field is UTF8 and always zero terminated. 
+			// In memory we always append a zero terminator to the key.
 			basisu::uint8_vec m_key;

-			// The value may be empty. It consists of raw bytes which may or may not be zero terminated.
+			// The value may be empty. In the KTX2 file it consists of raw bytes which may or may not be zero terminated. 
+			// In memory we always append a zero terminator to the value.
 			basisu::uint8_vec m_value;

 			bool operator< (const key_value& rhs) const { return strcmp((const char*)m_key.data(), (const char *)rhs.m_key.data()) < 0; }
@ -917,6 +1076,8 @@ namespace basist
 		// Returns the array of ETC1S image descriptors, which is only valid after get_etc1s_image_descs() is called.
 		const basisu::vector<ktx2_etc1s_image_desc>& get_etc1s_image_descs() const { return m_etc1s_image_descs; }

+		const basisu::vector<ktx2_astc_hdr_6x6_intermediate_image_desc>& get_astc_hdr_6x6_intermediate_image_descs() const { return m_astc_6x6_intermediate_image_descs; }
+
 		// Must have called startTranscoding() first
 		uint32_t get_etc1s_image_descs_image_flags(uint32_t level_index, uint32_t layer_index, uint32_t face_index) const;

@ -924,6 +1085,9 @@ namespace basist
 		// For ETC1S data, if this returns true you must currently transcode the file from first to last frame, in order, without skipping any frames.
 		bool is_video() const { return m_is_video; }
 		
+		// Defaults to 0, only non-zero if the key existed in the source KTX2 file.
+		float get_ldr_hdr_upconversion_nit_multiplier() const { return m_ldr_hdr_upconversion_nit_multiplier; }
+				
 		// start_transcoding() MUST be called before calling transcode_image().
 		// This method decompresses the ETC1S global endpoint/selector codebooks, which is not free, so try to avoid calling it excessively.
 		bool start_transcoding();
@ -956,6 +1120,7 @@ namespace basist
 		
 		ktx2_etc1s_global_data_header m_etc1s_header;
 		basisu::vector<ktx2_etc1s_image_desc> m_etc1s_image_descs;
+		basisu::vector<ktx2_astc_hdr_6x6_intermediate_image_desc> m_astc_6x6_intermediate_image_descs;

 		basist::basis_tex_format m_format;
 					
@ -967,19 +1132,54 @@ namespace basist
 		ktx2_df_channel_id m_dfd_chan0, m_dfd_chan1;
 								
 		basist::basisu_lowlevel_etc1s_transcoder m_etc1s_transcoder;
-		basist::basisu_lowlevel_uastc_transcoder m_uastc_transcoder;
-		basist::basisu_lowlevel_uastc_hdr_transcoder m_uastc_hdr_transcoder;
+		basist::basisu_lowlevel_uastc_ldr_4x4_transcoder m_uastc_transcoder;
+		basist::basisu_lowlevel_uastc_hdr_4x4_transcoder m_uastc_hdr_transcoder;
+		basist::basisu_lowlevel_astc_hdr_6x6_transcoder m_astc_hdr_6x6_transcoder;
+		basist::basisu_lowlevel_astc_hdr_6x6_intermediate_transcoder m_astc_hdr_6x6_intermediate_transcoder;
 				
 		ktx2_transcoder_state m_def_transcoder_state;

 		bool m_has_alpha;
 		bool m_is_video;
+		float m_ldr_hdr_upconversion_nit_multiplier;

 		bool decompress_level_data(uint32_t level_index, basisu::uint8_vec& uncomp_data);
+		bool read_astc_6x6_hdr_intermediate_global_data();
 		bool decompress_etc1s_global_data();
 		bool read_key_values();
 	};

+	// Replaces if the key already exists
+	inline void ktx2_add_key_value(ktx2_transcoder::key_value_vec& key_values, const std::string& key, const std::string& val)
+	{
+		assert(key.size());
+
+		basist::ktx2_transcoder::key_value* p = nullptr;
+
+		// Try to find an existing key
+		for (size_t i = 0; i < key_values.size(); i++)
+		{
+			if (strcmp((const char*)key_values[i].m_key.data(), key.c_str()) == 0)
+			{
+				p = &key_values[i];
+				break;
+			}
+		}
+		
+		if (!p)
+			p = key_values.enlarge(1);
+
+		p->m_key.resize(0);
+		p->m_value.resize(0);
+
+		p->m_key.resize(key.size() + 1);
+		memcpy(p->m_key.data(), key.c_str(), key.size());
+
+		p->m_value.resize(val.size() + 1);
+		if (val.size())
+			memcpy(p->m_value.data(), val.c_str(), val.size());
+	}
+
 #endif // BASISD_SUPPORT_KTX2

 	// Returns true if the transcoder was compiled with KTX2 support.
--- a/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
+++ b/thirdparty/basis_universal/transcoder/basisu_transcoder_internal.h
@ -20,9 +20,10 @@
 #pragma warning (disable: 4127) //  conditional expression is constant
 #endif

-// v1.50: Added UASTC HDR support
-#define BASISD_LIB_VERSION 150
-#define BASISD_VERSION_STRING "01.50"
+// v1.50: Added UASTC HDR 4x4 support
+// v1.60: Added RDO ASTC HDR 6x6 and intermediate support
+#define BASISD_LIB_VERSION 160
+#define BASISD_VERSION_STRING "01.60"

 #ifdef _DEBUG
 #define BASISD_BUILD_DEBUG
@ -91,10 +92,37 @@ namespace basist
 		cUASTC_HDR_4x4,						// HDR, transcodes only to 4x4 HDR ASTC, BC6H, or uncompressed
 		cBC6H,
 		cASTC_HDR_4x4,
+		cASTC_HDR_6x6,
 								
 		cTotalBlockFormats
 	};

+	inline uint32_t get_block_width(block_format fmt)
+	{
+		switch (fmt)
+		{
+		case block_format::cFXT1_RGB:
+			return 8;
+		case block_format::cASTC_HDR_6x6:
+			return 6;
+		default:
+			break;
+		}
+		return 4;
+	}
+
+	inline uint32_t get_block_height(block_format fmt)
+	{
+		switch (fmt)
+		{
+		case block_format::cASTC_HDR_6x6:
+			return 6;
+		default:
+			break;
+		}
+		return 4;
+	}
+
 	const int COLOR5_PAL0_PREV_HI = 9, COLOR5_PAL0_DELTA_LO = -9, COLOR5_PAL0_DELTA_HI = 31;
 	const int COLOR5_PAL1_PREV_HI = 21, COLOR5_PAL1_DELTA_LO = -21, COLOR5_PAL1_DELTA_HI = 21;
 	const int COLOR5_PAL2_PREV_HI = 31, COLOR5_PAL2_DELTA_LO = -31, COLOR5_PAL2_DELTA_HI = 9;
@ -559,6 +587,12 @@ namespace basist
 			return ct.init(total_used_syms, &code_sizes[0]);
 		}

+		size_t get_bits_remaining() const
+		{
+			size_t total_bytes_remaining = m_pBuf_end - m_pBuf;
+			return total_bytes_remaining * 8 + m_bit_buf_size;
+		}
+
 	private:
 		uint32_t m_buf_size;
 		const uint8_t *m_pBuf;
@ -804,6 +838,7 @@ namespace basist
 	const double MIN_DENORM_HALF_FLOAT = 0.000000059604645; // smallest positive subnormal number
 	const double MIN_HALF_FLOAT = 0.00006103515625; // smallest positive normal number
 	const double MAX_HALF_FLOAT = 65504.0; // largest normal number
+	const uint32_t MAX_HALF_FLOAT_AS_INT_BITS = 0x7BFF; // the half float rep for 65504.0

 	inline uint32_t get_bits(uint32_t val, int low, int high)
 	{
@ -975,6 +1010,13 @@ namespace basist
 		return (h * 64 + 30) / 31;
 	}

+	// Suboptimal, but very close.
+	inline uint32_t bc6h_half_to_blog(half_float h, uint32_t num_bits)
+	{
+		assert(h <= MAX_BC6H_HALF_FLOAT_AS_UINT);
+		return (h * 64 + 30) / (31 * (1 << (16 - num_bits)));
+	}
+
 	struct bc6h_block
 	{
 		uint8_t m_bytes[16];
@ -988,6 +1030,26 @@ namespace basist
 	void bc6h_enc_block_2subset_3bit_weights(bc6h_block* pPacked_block, uint32_t common_part_index, const half_float pEndpoints[2][3][2], const uint8_t* pWeights); // pEndpoints[subset][comp][lh_index]
 	bool bc6h_enc_block_solid_color(bc6h_block* pPacked_block, const half_float pColor[3]);

+	struct bc6h_logical_block
+	{
+		uint32_t m_mode;
+		uint32_t m_partition_pattern;	// must be 0 if 1 subset
+		uint32_t m_endpoints[3][4];		// [comp][subset*2+lh_index] - must be already properly packed
+		uint8_t m_weights[16];			// weights must be of the proper size, taking into account skipped MSB's which must be 0
+
+		void clear()
+		{
+			basisu::clear_obj(*this);
+		}
+	};
+
+	void pack_bc6h_block(bc6h_block& dst_blk, bc6h_logical_block& log_blk);
+		
+	namespace bc7_mode_5_encoder
+	{
+		void encode_bc7_mode_5_block(void* pDst_block, color32* pPixels, bool hq_mode);
+	}
+		
 } // namespace basist


--- a/thirdparty/libktx/lib/basis_transcode.cpp
+++ b/thirdparty/libktx/lib/basis_transcode.cpp
@ -658,7 +658,7 @@ ktxTexture2_transcodeUastc(ktxTexture2* This,
    ktxLevelIndexEntry* protoLevelIndex = protoPriv._levelIndex;
    ktx_size_t levelOffsetWrite = 0;

-    basisu_lowlevel_uastc_transcoder uit;
+    basisu_lowlevel_uastc_ldr_4x4_transcoder uit;
    // See comment on same declaration in transcodeEtc1s.
    std::vector<basisu_transcoder_state> xcoderStates;
    xcoderStates.resize(This->isVideo ? This->numFaces : 1);
--- a/thirdparty/libktx/patches/0003-basisu-1.60.patch
+++ b/thirdparty/libktx/patches/0003-basisu-1.60.patch
@ -0,0 +1,13 @@
+diff --git a/thirdparty/libktx/lib/basis_transcode.cpp b/thirdparty/libktx/lib/basis_transcode.cpp
+index d7ecb7a0fd..43ad059150 100644
+--- a/thirdparty/libktx/lib/basis_transcode.cpp
+++ b/thirdparty/libktx/lib/basis_transcode.cpp
+@@ -658,7 +658,7 @@ ktxTexture2_transcodeUastc(ktxTexture2* This,
+     ktxLevelIndexEntry* protoLevelIndex = protoPriv._levelIndex;
+     ktx_size_t levelOffsetWrite = 0;
+ 
+-    basisu_lowlevel_uastc_transcoder uit;
+    basisu_lowlevel_uastc_ldr_4x4_transcoder uit;
+     // See comment on same declaration in transcodeEtc1s.
+     std::vector<basisu_transcoder_state> xcoderStates;
+     xcoderStates.resize(This->isVideo ? This->numFaces : 1);