diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt
index 288b12030fc..ca0d1976708 100644
--- a/COPYRIGHT.txt
+++ b/COPYRIGHT.txt
@@ -324,16 +324,18 @@ Comment: Jolt Physics
 Copyright: 2021, Jorrit Rouwe
 License: Expat
 
-Files: thirdparty/jpeg-compressor/*
-Comment: jpeg-compressor
-Copyright: 2012, Rich Geldreich
-License: public-domain or Apache-2.0
-
 Files: thirdparty/libbacktrace/*
 Comment: libbacktrace
 Copyright: 2012-2021, Free Software Foundation, Inc.
 License: BSD-3-clause
 
+Files: thirdparty/libjpeg-turbo/*
+Comment: libjpeg-turbo
+Copyright: 2009-2024, D. R. Commander
+ 2015, Viktor Szathmáry.
+ 1991-2020, Thomas G. Lane, Guido Vollbeding
+License: BSD-3-clause and IJG
+
 Files: thirdparty/libktx/*
 Comment: KTX
 Copyright: 2013-2020, Mark Callow
@@ -1683,6 +1685,42 @@ License: HarfBuzz
  ON AN "AS IS" BASIS, AND THE COPYRIGHT HOLDER HAS NO OBLIGATION TO
  PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
 
+License: IJG
+ The authors make NO WARRANTY or representation, either express or implied,
+ with respect to this software, its quality, accuracy, merchantability, or
+ fitness for a particular purpose.  This software is provided "AS IS", and you,
+ its user, assume the entire risk as to its quality and accuracy.
+ .
+ This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
+ All Rights Reserved except as specified below.
+ .
+ Permission is hereby granted to use, copy, modify, and distribute this
+ software (or portions thereof) for any purpose, without fee, subject to these
+ conditions:
+ (1) If any part of the source code for this software is distributed, then this
+ README file must be included, with this copyright and no-warranty notice
+ unaltered; and any additions, deletions, or changes to the original files
+ must be clearly indicated in accompanying documentation.
+ (2) If only executable code is distributed, then the accompanying
+ documentation must state that "this software is based in part on the work of
+ the Independent JPEG Group".
+ (3) Permission for use of this software is granted only if the user accepts
+ full responsibility for any undesirable consequences; the authors accept
+ NO LIABILITY for damages of any kind.
+ .
+ These conditions apply to any software derived from or based on the IJG code,
+ not just to the unmodified library.  If you use our work, you ought to
+ acknowledge us.
+ .
+ Permission is NOT granted for the use of any IJG author's name or company name
+ in advertising or publicity relating to this software or products derived from
+ it.  This software may be referred to only as "the Independent JPEG Group's
+ software".
+ .
+ We specifically permit and encourage the use of this software as the basis of
+ commercial products, provided that all warranty or liability claims are
+ assumed by the product vendor.
+
 License: MPL-2.0
  Mozilla Public License Version 2.0
  ==================================
diff --git a/SConstruct b/SConstruct
index 7037ca4eb8b..e895cffdab1 100644
--- a/SConstruct
+++ b/SConstruct
@@ -276,6 +276,7 @@ opts.Add(BoolVariable("builtin_glslang", "Use the built-in glslang library", Tru
 opts.Add(BoolVariable("builtin_graphite", "Use the built-in Graphite library", True))
 opts.Add(BoolVariable("builtin_harfbuzz", "Use the built-in HarfBuzz library", True))
 opts.Add(BoolVariable("builtin_icu4c", "Use the built-in ICU library", True))
+opts.Add(BoolVariable("builtin_libjpeg_turbo", "Use the built-in libjpeg-turbo library", True))
 opts.Add(BoolVariable("builtin_libogg", "Use the built-in libogg library", True))
 opts.Add(BoolVariable("builtin_libpng", "Use the built-in libpng library", True))
 opts.Add(BoolVariable("builtin_libtheora", "Use the built-in libtheora library", True))
diff --git a/misc/error_suppressions/tsan.txt b/misc/error_suppressions/tsan.txt
index 9aa1d78753d..f8ca0900e93 100644
--- a/misc/error_suppressions/tsan.txt
+++ b/misc/error_suppressions/tsan.txt
@@ -6,3 +6,4 @@ deadlock:modules/text_server_adv/text_server_adv.cpp
 deadlock:modules/text_server_fb/text_server_fb.cpp
 race:modules/navigation_2d/nav_map_2d.cpp
 race:modules/navigation_3d/nav_map_3d.cpp
+race:thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.cpp
diff --git a/misc/error_suppressions/ubsan.txt b/misc/error_suppressions/ubsan.txt
index a6e63ade24e..2edac7ac959 100644
--- a/misc/error_suppressions/ubsan.txt
+++ b/misc/error_suppressions/ubsan.txt
@@ -6,6 +6,7 @@ float-divide-by-zero:thirdparty/thorvg/src/renderer/sw_engine/tvgSwFill.cpp
 function:thirdparty/embree/common/sys/thread.cpp
 function:thirdparty/embree/kernels/common/accel.h
 function:thirdparty/xatlas/xatlas.cpp
+implicit-integer-sign-change:thirdparty/basis_universal/encoder/jpgd.cpp
 implicit-integer-sign-change:thirdparty/basis_universal/transcoder/basisu_astc_helpers.h
 implicit-integer-sign-change:thirdparty/embree/common/lexers/../sys/ref.h
 implicit-integer-sign-change:thirdparty/embree/common/lexers/tokenstream.cpp
@@ -38,7 +39,6 @@ implicit-integer-sign-change:thirdparty/icu4c/common/unicode/unistr.h
 implicit-integer-sign-change:thirdparty/icu4c/common/unistr.cpp
 implicit-integer-sign-change:thirdparty/icu4c/common/uresbund.cpp
 implicit-integer-sign-change:thirdparty/icu4c/common/ustrtrns.cpp
-implicit-integer-sign-change:thirdparty/jpeg-compressor/jpgd.cpp
 implicit-integer-sign-change:thirdparty/libogg/bitwise.c
 implicit-integer-sign-change:thirdparty/libvorbis/info.c
 implicit-integer-sign-change:thirdparty/libvorbis/sharedbook.c
diff --git a/modules/basis_universal/SCsub b/modules/basis_universal/SCsub
index 41dbc9c56a3..395ca5ee952 100644
--- a/modules/basis_universal/SCsub
+++ b/modules/basis_universal/SCsub
@@ -38,6 +38,7 @@ if basisu_encoder:
         "basisu_ssim.cpp",
         "basisu_uastc_enc.cpp",
         "basisu_uastc_hdr_4x4_enc.cpp",
+        "jpgd.cpp",
         "pvpngreader.cpp",
     ]
     encoder_sources = [thirdparty_dir + "encoder/" + file for file in encoder_sources]
@@ -47,7 +48,6 @@ transcoder_sources = [thirdparty_dir + "transcoder/basisu_transcoder.cpp"]
 env_basisu.Prepend(CPPEXTPATH=[thirdparty_dir])
 
 if basisu_encoder:
-    env_basisu.Prepend(CPPEXTPATH=["#thirdparty/jpeg-compressor"])
     env_basisu.Prepend(CPPEXTPATH=["#thirdparty/tinyexr"])
 
 if env["builtin_zstd"]:
diff --git a/modules/basis_universal/config.py b/modules/basis_universal/config.py
index 0f723cbb6b5..c469d165e95 100644
--- a/modules/basis_universal/config.py
+++ b/modules/basis_universal/config.py
@@ -1,6 +1,6 @@
 def can_build(env, platform):
     if env.editor_build:  # Encoder dependencies
-        env.module_add_dependencies("basis_universal", ["jpg", "tinyexr"])
+        env.module_add_dependencies("basis_universal", ["tinyexr"])
     return True
 
 
diff --git a/modules/jpg/SCsub b/modules/jpg/SCsub
index fbc69a86fd7..e5b1bc19c0b 100644
--- a/modules/jpg/SCsub
+++ b/modules/jpg/SCsub
@@ -6,23 +6,99 @@ Import("env_modules")
 
 env_jpg = env_modules.Clone()
 
-# Thirdparty source files
-
 thirdparty_obj = []
 
-# Not unbundled for now as they are not commonly available as shared library
-thirdparty_dir = "#thirdparty/jpeg-compressor/"
-thirdparty_sources = [
-    "jpgd.cpp",
-    "jpge.cpp",
-]
-thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
+thirdparty_dir = "#thirdparty/libjpeg-turbo"
 
-env_jpg.Prepend(CPPEXTPATH=[thirdparty_dir])
+thirdparty_sources_common = [
+    "jaricom.c",
+    "jcapimin.c",
+    "jcarith.c",
+    "jchuff.c",
+    "jcicc.c",
+    "jcinit.c",
+    "jcmarker.c",
+    "jcmaster.c",
+    "jcomapi.c",
+    "jcparam.c",
+    "jcphuff.c",
+    "jctrans.c",
+    "jdapimin.c",
+    "jdarith.c",
+    "jdatadst.c",
+    "jdatadst-tj.c",
+    "jdatasrc.c",
+    "jdatasrc-tj.c",
+    "jdhuff.c",
+    "jdicc.c",
+    "jdinput.c",
+    "jdmarker.c",
+    "jdmaster.c",
+    "jdphuff.c",
+    "jdtrans.c",
+    "jerror.c",
+    "jfdctflt.c",
+    "jmemmgr.c",
+    "jmemnobs.c",
+    "jpeg_nbits.c",
+    "transupp.c",
+    "turbojpeg.c",
+]
+
+thirdparty_sources_bit_dependent = [
+    "jcapistd.c",
+    "jccoefct.c",
+    "jccolor.c",
+    "jcdctmgr.c",
+    "jcmainct.c",
+    "jcprepct.c",
+    "jcsample.c",
+    "jdcoefct.c",
+    "jdcolor.c",
+    "jdapistd.c",
+    "jddctmgr.c",
+    "jdmainct.c",
+    "jdmerge.c",
+    "jdpostct.c",
+    "jdsample.c",
+    "jfdctfst.c",
+    "jfdctint.c",
+    "jidctflt.c",
+    "jidctfst.c",
+    "jidctint.c",
+    "jidctred.c",
+    "jutils.c",
+    "jquant1.c",
+    "jquant2.c",
+]
+
+thirdparty_sources_by_bits = {
+    8: list(thirdparty_sources_bit_dependent),
+    12: list(thirdparty_sources_bit_dependent),
+}
+
+
+def source_paths(files):
+    return [thirdparty_dir + "/src/" + f for f in files]
+
+
+env_jpg.Prepend(CPPEXTPATH=[thirdparty_dir + "/src"])
+
+
+def add_bit_depth(bit_depth: int):
+    env_bit_depth = env_jpg.Clone()
+    env_bit_depth.disable_warnings()
+    env_bit_depth["OBJSUFFIX"] = f"_{bit_depth}{env_bit_depth['OBJSUFFIX']}"
+    env_bit_depth.Append(CPPDEFINES=[f"BITS_IN_JSAMPLE={bit_depth}"])
+    env_bit_depth.add_source_files(thirdparty_obj, source_paths(thirdparty_sources_by_bits[bit_depth]))
+
+
+add_bit_depth(8)
+add_bit_depth(12)
 
 env_thirdparty = env_jpg.Clone()
 env_thirdparty.disable_warnings()
-env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
+env_thirdparty.add_source_files(thirdparty_obj, source_paths(thirdparty_sources_common))
 env.modules_sources += thirdparty_obj
 
 # Godot source files
diff --git a/modules/jpg/image_loader_jpegd.cpp b/modules/jpg/image_loader_jpegd.cpp
deleted file mode 100644
index 2e615f55e85..00000000000
--- a/modules/jpg/image_loader_jpegd.cpp
+++ /dev/null
@@ -1,209 +0,0 @@
-/**************************************************************************/
-/*  image_loader_jpegd.cpp                                                */
-/**************************************************************************/
-/*                         This file is part of:                          */
-/*                             GODOT ENGINE                               */
-/*                        https://godotengine.org                         */
-/**************************************************************************/
-/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
-/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
-/*                                                                        */
-/* Permission is hereby granted, free of charge, to any person obtaining  */
-/* a copy of this software and associated documentation files (the        */
-/* "Software"), to deal in the Software without restriction, including    */
-/* without limitation the rights to use, copy, modify, merge, publish,    */
-/* distribute, sublicense, and/or sell copies of the Software, and to     */
-/* permit persons to whom the Software is furnished to do so, subject to  */
-/* the following conditions:                                              */
-/*                                                                        */
-/* The above copyright notice and this permission notice shall be         */
-/* included in all copies or substantial portions of the Software.        */
-/*                                                                        */
-/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
-/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
-/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
-/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
-/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
-/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
-/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
-/**************************************************************************/
-
-#include "image_loader_jpegd.h"
-
-#include <jpgd.h>
-#include <jpge.h>
-
-#include <string.h>
-
-Error jpeg_load_image_from_buffer(Image *p_image, const uint8_t *p_buffer, int p_buffer_len) {
-	jpgd::jpeg_decoder_mem_stream mem_stream(p_buffer, p_buffer_len);
-
-	jpgd::jpeg_decoder decoder(&mem_stream);
-
-	if (decoder.get_error_code() != jpgd::JPGD_SUCCESS) {
-		return ERR_CANT_OPEN;
-	}
-
-	const int image_width = decoder.get_width();
-	const int image_height = decoder.get_height();
-	const int comps = decoder.get_num_components();
-	if (comps != 1 && comps != 3) {
-		return ERR_FILE_CORRUPT;
-	}
-
-	if (decoder.begin_decoding() != jpgd::JPGD_SUCCESS) {
-		return ERR_FILE_CORRUPT;
-	}
-
-	const int dst_bpl = image_width * comps;
-
-	Vector<uint8_t> data;
-
-	data.resize(dst_bpl * image_height);
-
-	uint8_t *dw = data.ptrw();
-
-	jpgd::uint8 *pImage_data = (jpgd::uint8 *)dw;
-
-	for (int y = 0; y < image_height; y++) {
-		const jpgd::uint8 *pScan_line;
-		jpgd::uint scan_line_len;
-		if (decoder.decode((const void **)&pScan_line, &scan_line_len) != jpgd::JPGD_SUCCESS) {
-			return ERR_FILE_CORRUPT;
-		}
-
-		jpgd::uint8 *pDst = pImage_data + y * dst_bpl;
-
-		if (comps == 1) {
-			memcpy(pDst, pScan_line, dst_bpl);
-		} else {
-			// For images with more than 1 channel pScan_line will always point to a buffer
-			// containing 32-bit RGBA pixels. Alpha is always 255 and we ignore it.
-			for (int x = 0; x < image_width; x++) {
-				pDst[0] = pScan_line[x * 4 + 0];
-				pDst[1] = pScan_line[x * 4 + 1];
-				pDst[2] = pScan_line[x * 4 + 2];
-				pDst += 3;
-			}
-		}
-	}
-
-	//all good
-
-	Image::Format fmt;
-	if (comps == 1) {
-		fmt = Image::FORMAT_L8;
-	} else {
-		fmt = Image::FORMAT_RGB8;
-	}
-
-	p_image->set_data(image_width, image_height, false, fmt, data);
-
-	return OK;
-}
-
-Error ImageLoaderJPG::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale) {
-	Vector<uint8_t> src_image;
-	uint64_t src_image_len = f->get_length();
-	ERR_FAIL_COND_V(src_image_len == 0, ERR_FILE_CORRUPT);
-	src_image.resize(src_image_len);
-
-	uint8_t *w = src_image.ptrw();
-
-	f->get_buffer(&w[0], src_image_len);
-
-	Error err = jpeg_load_image_from_buffer(p_image.ptr(), w, src_image_len);
-
-	return err;
-}
-
-void ImageLoaderJPG::get_recognized_extensions(List<String> *p_extensions) const {
-	p_extensions->push_back("jpg");
-	p_extensions->push_back("jpeg");
-}
-
-static Ref<Image> _jpegd_mem_loader_func(const uint8_t *p_png, int p_size) {
-	Ref<Image> img;
-	img.instantiate();
-	Error err = jpeg_load_image_from_buffer(img.ptr(), p_png, p_size);
-	ERR_FAIL_COND_V(err, Ref<Image>());
-	return img;
-}
-
-class ImageLoaderJPGOSFile : public jpge::output_stream {
-public:
-	Ref<FileAccess> f;
-
-	virtual bool put_buf(const void *Pbuf, int len) {
-		f->store_buffer((const uint8_t *)Pbuf, len);
-		return true;
-	}
-};
-
-class ImageLoaderJPGOSBuffer : public jpge::output_stream {
-public:
-	Vector<uint8_t> *buffer = nullptr;
-	virtual bool put_buf(const void *Pbuf, int len) {
-		uint32_t base = buffer->size();
-		buffer->resize(base + len);
-		memcpy(buffer->ptrw() + base, Pbuf, len);
-		return true;
-	}
-};
-
-static Error _jpgd_save_to_output_stream(jpge::output_stream *p_output_stream, const Ref<Image> &p_img, float p_quality) {
-	ERR_FAIL_COND_V(p_img.is_null() || p_img->is_empty(), ERR_INVALID_PARAMETER);
-	Ref<Image> image = p_img->duplicate();
-	if (image->is_compressed()) {
-		Error error = image->decompress();
-		ERR_FAIL_COND_V_MSG(error != OK, error, "Couldn't decompress image.");
-	}
-	if (image->get_format() != Image::FORMAT_RGB8) {
-		image = image->duplicate();
-		image->convert(Image::FORMAT_RGB8);
-	}
-
-	jpge::params p;
-	p.m_quality = CLAMP(p_quality * 100, 1, 100);
-
-	jpge::jpeg_encoder enc;
-	enc.init(p_output_stream, image->get_width(), image->get_height(), 3, p);
-
-	const uint8_t *src_data = image->get_data().ptr();
-	for (int i = 0; i < image->get_height(); i++) {
-		if (!enc.process_scanline(&src_data[i * image->get_width() * 3])) {
-			return FAILED;
-		}
-	}
-
-	if (enc.process_scanline(nullptr)) {
-		return OK;
-	} else {
-		return FAILED;
-	}
-}
-
-static Vector<uint8_t> _jpgd_buffer_save_func(const Ref<Image> &p_img, float p_quality) {
-	Vector<uint8_t> output;
-	ImageLoaderJPGOSBuffer ob;
-	ob.buffer = &output;
-	if (_jpgd_save_to_output_stream(&ob, p_img, p_quality) != OK) {
-		return Vector<uint8_t>();
-	}
-	return output;
-}
-
-static Error _jpgd_save_func(const String &p_path, const Ref<Image> &p_img, float p_quality) {
-	Error err;
-	Ref<FileAccess> file = FileAccess::open(p_path, FileAccess::WRITE, &err);
-	ERR_FAIL_COND_V_MSG(err, err, vformat("Can't save JPG at path: '%s'.", p_path));
-	ImageLoaderJPGOSFile ob;
-	ob.f = file;
-	return _jpgd_save_to_output_stream(&ob, p_img, p_quality);
-}
-
-ImageLoaderJPG::ImageLoaderJPG() {
-	Image::_jpg_mem_loader_func = _jpegd_mem_loader_func;
-	Image::save_jpg_func = _jpgd_save_func;
-	Image::save_jpg_buffer_func = _jpgd_buffer_save_func;
-}
diff --git a/modules/jpg/image_loader_libjpeg_turbo.cpp b/modules/jpg/image_loader_libjpeg_turbo.cpp
new file mode 100644
index 00000000000..f4199d713cf
--- /dev/null
+++ b/modules/jpg/image_loader_libjpeg_turbo.cpp
@@ -0,0 +1,191 @@
+/**************************************************************************/
+/*  image_loader_libjpeg_turbo.cpp                                        */
+/**************************************************************************/
+/*                         This file is part of:                          */
+/*                             GODOT ENGINE                               */
+/*                        https://godotengine.org                         */
+/**************************************************************************/
+/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
+/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur.                  */
+/*                                                                        */
+/* Permission is hereby granted, free of charge, to any person obtaining  */
+/* a copy of this software and associated documentation files (the        */
+/* "Software"), to deal in the Software without restriction, including    */
+/* without limitation the rights to use, copy, modify, merge, publish,    */
+/* distribute, sublicense, and/or sell copies of the Software, and to     */
+/* permit persons to whom the Software is furnished to do so, subject to  */
+/* the following conditions:                                              */
+/*                                                                        */
+/* The above copyright notice and this permission notice shall be         */
+/* included in all copies or substantial portions of the Software.        */
+/*                                                                        */
+/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,        */
+/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF     */
+/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
+/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY   */
+/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,   */
+/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE      */
+/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.                 */
+/**************************************************************************/
+
+#include "image_loader_libjpeg_turbo.h"
+
+#include <turbojpeg.h>
+
+#include <string.h>
+
+Error jpeg_turbo_load_image_from_buffer(Image *p_image, const uint8_t *p_buffer, int p_buffer_len) {
+	tjhandle tj_instance = tj3Init(TJINIT_DECOMPRESS);
+	if (tj_instance == NULL) {
+		return FAILED;
+	}
+
+	if (tj3DecompressHeader(tj_instance, p_buffer, p_buffer_len) < 0) {
+		tj3Destroy(tj_instance);
+		return ERR_FILE_CORRUPT;
+	}
+
+	const unsigned int width = tj3Get(tj_instance, TJPARAM_JPEGWIDTH);
+	const unsigned int height = tj3Get(tj_instance, TJPARAM_JPEGHEIGHT);
+	const TJCS colorspace = (TJCS)tj3Get(tj_instance, TJPARAM_COLORSPACE);
+
+	if (tj3Get(tj_instance, TJPARAM_PRECISION) > 8) {
+		// Proceed anyway and convert to rgb8?
+		tj3Destroy(tj_instance);
+		return ERR_UNAVAILABLE;
+	}
+
+	TJPF tj_pixel_format;
+	Image::Format gd_pixel_format;
+	if (colorspace == TJCS_GRAY) {
+		tj_pixel_format = TJPF_GRAY;
+		gd_pixel_format = Image::FORMAT_L8;
+	} else {
+		// Force everything else (RGB, CMYK etc) into RGB8.
+		tj_pixel_format = TJPF_RGB;
+		gd_pixel_format = Image::FORMAT_RGB8;
+	}
+
+	Vector<uint8_t> data;
+	data.resize(width * height * tjPixelSize[tj_pixel_format]);
+
+	if (tj3Decompress8(tj_instance, p_buffer, p_buffer_len, data.ptrw(), 0, tj_pixel_format) < 0) {
+		tj3Destroy(tj_instance);
+		return ERR_FILE_CORRUPT;
+	}
+
+	tj3Destroy(tj_instance);
+	p_image->set_data(width, height, false, gd_pixel_format, data);
+	return OK;
+}
+
+Error ImageLoaderLibJPEGTurbo::load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale) {
+	Vector<uint8_t> src_image;
+	uint64_t src_image_len = f->get_length();
+	ERR_FAIL_COND_V(src_image_len == 0, ERR_FILE_CORRUPT);
+	src_image.resize(src_image_len);
+
+	uint8_t *w = src_image.ptrw();
+
+	f->get_buffer(&w[0], src_image_len);
+
+	Error err = jpeg_turbo_load_image_from_buffer(p_image.ptr(), w, src_image_len);
+
+	return err;
+}
+
+void ImageLoaderLibJPEGTurbo::get_recognized_extensions(List<String> *p_extensions) const {
+	p_extensions->push_back("jpg");
+	p_extensions->push_back("jpeg");
+}
+
+static Ref<Image> _jpeg_turbo_mem_loader_func(const uint8_t *p_png, int p_size) {
+	Ref<Image> img;
+	img.instantiate();
+	Error err = jpeg_turbo_load_image_from_buffer(img.ptr(), p_png, p_size);
+	ERR_FAIL_COND_V(err, Ref<Image>());
+	return img;
+}
+
+static Vector<uint8_t> _jpeg_turbo_buffer_save_func(const Ref<Image> &p_img, float p_quality) {
+	Vector<uint8_t> output;
+
+	ERR_FAIL_COND_V(p_img.is_null() || p_img->is_empty(), output);
+
+	Ref<Image> image = p_img->duplicate();
+	if (image->is_compressed()) {
+		Error error = image->decompress();
+		ERR_FAIL_COND_V_MSG(error != OK, output, "Couldn't decompress image.");
+	}
+
+	if (image->get_format() != Image::FORMAT_RGB8) {
+		// Allow grayscale L8?
+		image = image->duplicate();
+		image->convert(Image::FORMAT_RGB8);
+	}
+
+	tjhandle tj_instance = tj3Init(TJINIT_COMPRESS);
+	ERR_FAIL_COND_V_MSG(tj_instance == NULL, output, "Couldn't create tjhandle");
+
+	if (tj3Set(tj_instance, TJPARAM_QUALITY, (int)(p_quality * 100)) < 0) {
+		tj3Destroy(tj_instance);
+		ERR_FAIL_V_MSG(output, "Couldn't set jpg quality");
+	}
+
+	if (tj3Set(tj_instance, TJPARAM_PRECISION, 8) < 0) {
+		tj3Destroy(tj_instance);
+		ERR_FAIL_V_MSG(output, "Couldn't set jpg precision");
+	}
+
+	if (tj3Set(tj_instance, TJPARAM_SUBSAMP, TJSAMP_420) < 0) {
+		tj3Destroy(tj_instance);
+		ERR_FAIL_V_MSG(output, "Couldn't set jpg subsamples");
+	}
+
+	// If the godot image format is `Image::FORMAT_L8` we could set the appropriate
+	// color space here rather than defaulting to RGB.
+
+	unsigned char *jpeg_buff = NULL;
+	size_t jpeg_size = 0;
+	int code = tj3Compress8(
+			tj_instance,
+			image->get_data().ptr(),
+			image->get_width(),
+			0,
+			image->get_height(),
+			TJPF_RGB,
+			&jpeg_buff,
+			&jpeg_size);
+
+	if (code < 0) {
+		tj3Destroy(tj_instance);
+		tj3Free(jpeg_buff);
+		ERR_FAIL_V_MSG(output, "Couldn't compress jpg");
+	}
+
+	output.resize(jpeg_size);
+	memcpy(output.ptrw(), jpeg_buff, jpeg_size);
+
+	tj3Destroy(tj_instance);
+	tj3Free(jpeg_buff);
+
+	return output;
+}
+
+static Error _jpeg_turbo_save_func(const String &p_path, const Ref<Image> &p_img, float p_quality) {
+	Error err;
+	Ref<FileAccess> file = FileAccess::open(p_path, FileAccess::WRITE, &err);
+	ERR_FAIL_COND_V_MSG(err, err, vformat("Can't save JPG at path: '%s'.", p_path));
+
+	Vector<uint8_t> data = _jpeg_turbo_buffer_save_func(p_img, p_quality);
+	ERR_FAIL_COND_V(data.size() == 0, FAILED);
+	ERR_FAIL_COND_V_MSG(!file->store_buffer(data.ptr(), data.size()), FAILED, "Failed writing jpg to file");
+
+	return OK;
+}
+
+ImageLoaderLibJPEGTurbo::ImageLoaderLibJPEGTurbo() {
+	Image::_jpg_mem_loader_func = _jpeg_turbo_mem_loader_func;
+	Image::save_jpg_func = _jpeg_turbo_save_func;
+	Image::save_jpg_buffer_func = _jpeg_turbo_buffer_save_func;
+}
diff --git a/modules/jpg/image_loader_jpegd.h b/modules/jpg/image_loader_libjpeg_turbo.h
similarity index 94%
rename from modules/jpg/image_loader_jpegd.h
rename to modules/jpg/image_loader_libjpeg_turbo.h
index 699bf5f5994..8483860ef99 100644
--- a/modules/jpg/image_loader_jpegd.h
+++ b/modules/jpg/image_loader_libjpeg_turbo.h
@@ -1,5 +1,5 @@
 /**************************************************************************/
-/*  image_loader_jpegd.h                                                  */
+/*  image_loader_libjpeg_turbo.h                                          */
 /**************************************************************************/
 /*                         This file is part of:                          */
 /*                             GODOT ENGINE                               */
@@ -32,9 +32,9 @@
 
 #include "core/io/image_loader.h"
 
-class ImageLoaderJPG : public ImageFormatLoader {
+class ImageLoaderLibJPEGTurbo : public ImageFormatLoader {
 public:
 	virtual Error load_image(Ref<Image> p_image, Ref<FileAccess> f, BitField<ImageFormatLoader::LoaderFlags> p_flags, float p_scale);
 	virtual void get_recognized_extensions(List<String> *p_extensions) const;
-	ImageLoaderJPG();
+	ImageLoaderLibJPEGTurbo();
 };
diff --git a/modules/jpg/register_types.cpp b/modules/jpg/register_types.cpp
index 6400aee43bf..aedb6e8da38 100644
--- a/modules/jpg/register_types.cpp
+++ b/modules/jpg/register_types.cpp
@@ -30,17 +30,17 @@
 
 #include "register_types.h"
 
-#include "image_loader_jpegd.h"
+#include "image_loader_libjpeg_turbo.h"
 
-static Ref<ImageLoaderJPG> image_loader_jpg;
+static Ref<ImageLoaderLibJPEGTurbo> image_loader_libjpeg_turbo;
 
 void initialize_jpg_module(ModuleInitializationLevel p_level) {
 	if (p_level != MODULE_INITIALIZATION_LEVEL_SCENE) {
 		return;
 	}
 
-	image_loader_jpg.instantiate();
-	ImageLoader::add_image_format_loader(image_loader_jpg);
+	image_loader_libjpeg_turbo.instantiate();
+	ImageLoader::add_image_format_loader(image_loader_libjpeg_turbo);
 }
 
 void uninitialize_jpg_module(ModuleInitializationLevel p_level) {
@@ -48,6 +48,6 @@ void uninitialize_jpg_module(ModuleInitializationLevel p_level) {
 		return;
 	}
 
-	ImageLoader::remove_image_format_loader(image_loader_jpg);
-	image_loader_jpg.unref();
+	ImageLoader::remove_image_format_loader(image_loader_libjpeg_turbo);
+	image_loader_libjpeg_turbo.unref();
 }
diff --git a/modules/svg/SCsub b/modules/svg/SCsub
index aa7f6a2263b..b7cadf69a64 100644
--- a/modules/svg/SCsub
+++ b/modules/svg/SCsub
@@ -26,8 +26,6 @@ thirdparty_sources = [
     "src/loaders/raw/tvgRawLoader.cpp",
     # image loaders
     "src/loaders/external_png/tvgPngLoader.cpp",
-    "src/loaders/jpg/tvgJpgd.cpp",
-    "src/loaders/jpg/tvgJpgLoader.cpp",
     # renderer common
     "src/renderer/tvgAccessor.cpp",
     # "src/renderer/tvgAnimation.cpp",
@@ -62,6 +60,9 @@ thirdparty_sources = [
 if env["module_webp_enabled"]:
     thirdparty_sources += ["src/loaders/external_webp/tvgWebpLoader.cpp"]
     env_svg.Append(CPPDEFINES=["THORVG_WEBP_LOADER_SUPPORT"])
+if env["module_jpg_enabled"]:
+    thirdparty_sources += ["src/loaders/external_jpg/tvgJpgLoader.cpp"]
+    env_svg.Append(CPPDEFINES=["THORVG_JPG_LOADER_SUPPORT"])
 
 thirdparty_sources = [thirdparty_dir + file for file in thirdparty_sources]
 
@@ -82,7 +83,6 @@ env_thirdparty.Prepend(
         thirdparty_dir + "src/renderer/sw_engine",
         thirdparty_dir + "src/loaders/raw",
         thirdparty_dir + "src/loaders/external_png",
-        thirdparty_dir + "src/loaders/jpg",
     ]
 )
 if env["builtin_libpng"]:
@@ -91,6 +91,10 @@ if env["module_webp_enabled"]:
     env_thirdparty.Prepend(CPPEXTPATH=[thirdparty_dir + "src/loaders/external_webp"])
     if env["builtin_libwebp"]:
         env_thirdparty.Prepend(CPPEXTPATH=["#thirdparty/libwebp/src"])
+if env["module_jpg_enabled"]:
+    env_thirdparty.Prepend(CPPEXTPATH=[thirdparty_dir + "src/loaders/external_jpg"])
+    if env["builtin_libjpeg_turbo"]:
+        env_thirdparty.Prepend(CPPEXTPATH=["#thirdparty/libjpeg-turbo/src"])
 
 env_thirdparty.add_source_files(thirdparty_obj, thirdparty_sources)
 env.modules_sources += thirdparty_obj
diff --git a/modules/svg/config.py b/modules/svg/config.py
index d22f9454ed2..d37f02188fe 100644
--- a/modules/svg/config.py
+++ b/modules/svg/config.py
@@ -1,4 +1,5 @@
 def can_build(env, platform):
+    env.module_add_dependencies("svg", ["jpg", "webp"], True)
     return True
 
 
diff --git a/tests/core/io/test_image.h b/tests/core/io/test_image.h
index 846c2de04f9..72f99380e33 100644
--- a/tests/core/io/test_image.h
+++ b/tests/core/io/test_image.h
@@ -133,6 +133,34 @@ TEST_CASE("[Image] Saving and loading") {
 	CHECK_MESSAGE(
 			image_jpg->load_jpg_from_buffer(data_jpg) == OK,
 			"The JPG image should load successfully.");
+
+	Ref<Image> image_grayscale_jpg = memnew(Image());
+	Ref<FileAccess> f_grayscale_jpg = FileAccess::open(TestUtils::get_data_path("images/grayscale.jpg"), FileAccess::READ, &err);
+	REQUIRE(f_grayscale_jpg.is_valid());
+	PackedByteArray data_grayscale_jpg;
+	data_grayscale_jpg.resize(f_grayscale_jpg->get_length() + 1);
+	f_grayscale_jpg->get_buffer(data_grayscale_jpg.ptrw(), f_grayscale_jpg->get_length());
+	CHECK_MESSAGE(
+			image_jpg->load_jpg_from_buffer(data_grayscale_jpg) == OK,
+			"The grayscale JPG image should load successfully.");
+
+	// Save JPG
+	const String save_path_jpg = TestUtils::get_temp_path("image.jpg");
+	CHECK_MESSAGE(image->save_jpg(save_path_jpg) == OK,
+			"The image should be saved successfully as a .jpg file.");
+
+#ifdef MODULE_SVG_ENABLED
+	// Load SVG with embedded jpg image
+	Ref<Image> image_svg = memnew(Image());
+	Ref<FileAccess> f_svg = FileAccess::open(TestUtils::get_data_path("images/embedded_jpg.svg"), FileAccess::READ, &err);
+	REQUIRE(f_svg.is_valid());
+	PackedByteArray data_svg;
+	data_svg.resize(f_svg->get_length() + 1);
+	f_svg->get_buffer(data_svg.ptrw(), f_svg->get_length());
+	CHECK_MESSAGE(
+			image_svg->load_svg_from_buffer(data_svg) == OK,
+			"The SVG image should load successfully.");
+#endif // MODULE_SVG_ENABLED
 #endif // MODULE_JPG_ENABLED
 
 #ifdef MODULE_WEBP_ENABLED
diff --git a/tests/data/images/embedded_jpg.svg b/tests/data/images/embedded_jpg.svg
new file mode 100644
index 00000000000..0a60876b56a
--- /dev/null
+++ b/tests/data/images/embedded_jpg.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" xml:space="preserve" width="100mm" height="100mm" viewBox="0 0 100 100"><path fill="#5297ff" stroke="#00f" stroke-width=".669" d="M 42.8459,50.960656 24.590845,39.298179 4.6345054,47.724621 10.085058,26.759129 -4.0958127,10.383435 17.527869,9.0885256 28.719948,-9.4586514 36.633566,10.706543 57.731522,15.61945 40.998726,29.377135 Z" transform="translate(85.652 80.23)scale(.17903)"/><text xml:space="preserve" x="45.443" y="88.938" fill="#5297ff" stroke="#00f" stroke-width=".669" font-family="Noto Sans" font-size="21.167" font-weight="900" style="-inkscape-font-specification:&quot;Noto Sans, Heavy&quot;;text-align:center" text-anchor="middle"><tspan x="45.443" y="88.938">godot</tspan></text><image xlink:href="data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wBDAAMCAgICAgMCAgIDAwMDBAYEBAQEBAgGBgUGCQgKCgkI CQkKDA8MCgsOCwkJDRENDg8QEBEQCgwSExIQEw8QEBD/2wBDAQMDAwQDBAgEBAgQCwkLEBAQEBAQ EBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBD/wAARCAEAAQADAREA AhEBAxEB/8QAHQABAAIDAQEBAQAAAAAAAAAAAAcIBQYJBAMCAf/EAE0QAAEDAwMCAgQFDQ8EAwAA AAEAAgMEBREGBxIIIRMxFCJBURU3YaG0IzIzQlJTcXKBhJGSsgkWFxgkJVRWYnWipbPC0zZzgrFV Y8H/xAAbAQEAAgMBAQAAAAAAAAAAAAAABgcDBAUCAf/EAD8RAAIBAgIHBAgDCAIDAQEAAAABAgMR BAUGEiExQVGBYaGx0RMiMjVxkbLBFHLhFjRCQ1JTYvAVIzOSovGC/9oADAMBAAIRAxEAPwDlUgCA IAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAI AgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCA+tJSVVfVQ0NDTS1FTUSNihhiYXvke44a 1rR3JJIAA7kleoxlOSjFXbPkpKCcpOyRKmkOl7d7VsQqZLJDYqZ0bnslvEhgc5zXhvDwmtdK1x7k cmBpDSc9256+GyDHYiz1NVPjLZ3b+44uK0hwGFdtbWf+O3v2LvuSxproltcfhzaw1vVVHKnHiU9t p2w+HOcZxLJz5sHrD7G0nse3ku3Q0Ugtteo3s3JW2/F3uuiOHiNLZvZh6aW3e3fZ8Faz6sgzenaq u2n1lPZeNVNaan6ta62ZjR48WByaS3sXsceLvInAdxaHtUezXLpZbXcNuq/Zb4rzW5/O21EkynMo 5lh1U2KS9pLg/J718ruzNBXMOmEAQBAb9sttTX7saxhs/CrhtFN9WulbAwH0eLBLWgu7B7y3i3zI 9Z3FwY5dPKsulmNdQ26q9prh+r3L52dmcvNszhllBz2a79lPjz6K930V02ic9S9Etrk8SbR+t6qn 4058OnuVM2bxJxnGZY+HBh9UfY3Edz63kpDX0Ug9tCo1s3NX2/FWsujI5h9LZrZXpp7d6dtnwd7v qiJ9X9L272kojUx2WG+0zY2vfLZ5DO5rnPLeHhOa2Vzh2ceLC0BwOezscTEZBjsPd6mslxjt7t/c dvC6Q4DEuzlqv/LZ37V839iK6ukqqCqmoa6mlp6mnkdFNDKwsfG9pw5rmnuCCCCD3BC5EoyhJxkr NHbjJTSlF3TPkvJ9CAIAgCAIAgCAIAgCAIAgCAIAgCAIAgLObQdJNDfLXa9Ya8vnjUVwp4a6nttA XN5xSNjkYJpXAEZBe1zGAHyLZFLss0bjWhGviZXTSaS5Oz2vuaXRkOzTSaVGcqGGjZptNvmrrYu9 N9UWR0foDRugKF1v0fp6ktsT/sjo2l0suHOI8SVxL34L3Y5OOAcDA7KWYXB0MFHUoRUV4/F73v4k SxWNxGNlrV5uXgvgty6GwLZNUICLOo7bX+Ebbmq+D6TxbzZc19v4R8pJOI+qwNw1zzzZnDG45SNi ycBcbPMB+Owr1V60dq+63N7Vw4ux28hx/wCBxa1n6ktj+z322PjwVygqrUs0IAgCAv1047a/wdbc 0vp9J4V5vXG4XDnHxkj5D6lCcsa8cGYyx2eMjpcHBVlZHgPwOFWsvWltf2W5PYuHO5WOfY/8di3q v1Y7Fy7Xva2vit6SJTXZOKEBr+sNAaN1/Qtt+sNPUlyiZ9jdI0tliy5pPhytIezJY3PFwyBg5HZa 2KwdDGx1K8VJd6+D3rdwNrC43EYKWtQm4+D+K3PqVu3g6SqCx2u56w0HfPAorfTz11TbrgS7hFG2 SR4hlaCTgBjWseCfMmRRPM9G40oSr4aWxJtp8ld7H3JPqyW5XpNKtONDExu20k1zdltXe2uiKxqI kxCAIAgCAIAgCAIAgCAIAgCAIAgCAIDpHtRn+C3R2f8A4C3/AEditbLv3Ol+WPgipMy/fa35peLN qW6aQQBAEBQXqO22G3W49UKClEVnvWbhQcI+McfI/VIRhrWDg/OGNzxjdFk5KrXPMB+BxT1V6ktq +63W2PhwVizsizD8fhFrP147H9nvb2ri97uRYuMdoICU+nHbX+EbcWlNfSeLZrJxr7hzj5RycT9S gOWuYeb8ZY7HKNsuDkLsZHgfxuLWsvVjtf2XLa+HK5xc+x/4HCPVfry2L7vensXFbnYv0rLKxCAI AgNV3X+K3WP9wXD6M9aWY/udX8svBm7lv77R/NHxRzcVUlthAEAQBAEAQBAEAQBAEAQBAEAQBAEB f/poq6qt2Q0xNWVMs8jY6mEPleXEMZUysY0E+xrGtaB5AAAdgrMyKcp5fTcnff3NpfJbCr8/hGnm NVQVlsezm0m31e19pJy65xwgCAIDFXzSmltT+B++XTVqu3o3LwPTqOOfwuWOXHmDxzxbnHnge5Ya 2Go4i3pYKVuaT8TPRxNbD39DNxvvs2vAy8ssk0j5ppHSSSOLnPcclzickk+0lZt5qUqUKFONKlFR jFJJJWSS2JJLYkuCEUskMjJoZHRyRuDmvacFrgcgg+whNwq0oV6cqVWKlGSaaaumnsaaexp8UYix 6U0tpjx/3taatVp9J4+P6DRxweLxzx5cAOWOTsZ8sn3rDRw1HD39FBRvySXgbdbE1sRb003K267b 8TKrMYAgCAICMupatrKDZDU89DVzU8joqeFz4pCxxjkqYmSMJH2rmOc0jyIcQexXHz9tZdUt2fUj sZBCM8xpKSvv7k2vk9pz/VaFoBAEAQBAEAQBAEAQBAEAQBAEAQBAEBerpMuNXXbMUFNU2yWljt9Z VU1PK/OKqMyGUytyB2D5Xx9iRmM985AsbRybngIpq1m18dt799uO4rbSWnGGYSad7pN9my1vkk+p Ma7pwAgCAIAgCAIAgCAIAgCAhzqzuNXQ7MV9NTWyWqjuFZS01RKzOKWMSCUSuwD2L4mR9yBmQd84 B4Wkc3DASSV7tL4bb37rdTv6NU4zzCMm7WTa7dlrfJ36FFVXJZIQBAEAQBAEAQBAEAQBAbntVtVq PdnUbbJZW+BSwcZK+vkYTFSRE+Z8uTzghrAQXEHuGhzm9DLsuq5lV9HT2Jb3wS8+S4/C7OfmWZUs to+kqbW9y4t+XN8PjZOXtxOja+23nX7a3X4XgGP5vrnsiqh9YPVl9WOTuXuPLw8AADmV28dovUp+ thJay5PY+HHc+PLqcLA6VUqnqYuOq+a2rjw3rhz6FeLpabrY6+W13u2VdvrYOPi01VC6KVnJocOT HAEZBBGR5EFRepTnSk4VE01wexkqp1YVoqdNpp8VtR5V4PZ77BYLzqm80mn9P2+WuuFdIIoIIh3c fMnJ7AAAkuJAABJIAJWSjRqYiapUleT3IxVq1PD03Vqu0VvZb7bDpF0tpjhctwpoNRXSKZxbBA9/ wc0Ne0xu4uax8pw0kh4DCHlpYccjOcv0bo0Ep4r15cv4ezk312bbW4kAxmlOIxcV+Hi6cWlsdtdN ram05RVr29VvarqVmWAUmI2EAQBAEAQBAEAQBAEAQBAPPsUBAm8HSpp7WbpL5oIUWn7wRBGaRsbY bdKxg4k8ImExvLeJLmghxYct5Pc9RnNdHqWIi6uFWrPlsUXbwfdz3tkkyXPqmCaoYluUG27tuUk5 O+9ttxTbsuCso2jFRKeX+wXnS15q9P6gt8tDcKGTwp4JR3afMEEdiCCCHAkEEEEggqDVqNTDzdKq rSW9E/o1qeIpqrSd4vczwLGZT1Wu03S+V0VrsttqrhWz8vCp6WF0sr8AuPFjQScAEnA8gSvdOnOr JQpptvgtrPFSpCjFzqNJLi9iLEbd9G19uXCv3KuvwRAc/wA30L2S1R+vHrS+tHH3DHDj4mWkg8Cp RgtF6lT1sXLVXJbX89y4c+hFcdpVSp+rhI6z5vYuHDe+K4W7SId1dqdR7TaidZb0zx6Sfk+33BjC IquIEdx58XjIDmEktJHctLXO4eY5dVy2r6OptT3Pg1581w+Fmd3LcypZlS9JT2Nb1xT8uT4/G6Wl rQOgEAQBAEBmtGaRvOvNT2/SVgZE6uuMhZGZpODGBrS573H3Na1zjgEkDABOAdjC4apjK0aFLe// AN8DWxmLp4GhKvV9lctvYu/pzsjoVtnoC2bZaModIWyUTim5SVFUYWxvqZ3nL5HBv5GtyXEMaxvI 8cqzcvwMMvoKjDbxb5vn/vAq3MMdPMMRKvPZfcuS5efbd2NpW8aRq24W2ej9z7THadXW0zinMjqS ojeY5qV728S6Nw/A0lrgWksbyaeIWjjcvw+Pjq143fB8V1+z2dhu4HMMRl89eg7X3rg/j/t99mVh 1J0aa5j1W+i0jcrbUWKodNJBW19SY30rGt5MZO1rCXPcfUDomuBI5OEbSeMPr6MYmFVQpSTi77Xs tsvt379ytfttcl1PSzDxw6nVhJz2JqKW27s2rtK0V6zTd7JqOtKydjtrNn9JbU2aGjtFJFU3Mxlt XdZYWioqC4tLhnuWR5a3EYOBxBPJ2XGW5fllDLqajTV5cZcX5Ls8XtItmOaV8yqOVR2jwjwXm+3w Ww3ldE5oQBAEAQBAEAQBAEAQBAEAQBAEBou6ez2kt1rPNSXejip7oIw2jusULTUU5aXFoJ7F8eXO zGTg8iRxdhw52Y5ZQzGm4zVpcJcV5rs++06WXZrXy2adN3jxjwfk+3xWwrdpjo313U6n9D1bX2+j slNJGZqyjqPEkqoy0uc2BpblpBAYTK1oBPJokAwYjQ0YxdSbjVajFcd99+5L723kvr6VYOnBOinK T4brbtjb+PDWWzeWi2+2z0dthan2rSNs9H9I8N1VUSPMk9S9jeIc95/KeLQGAucWtGSpjgstw2Aj ajHbz4vr03LZ2ENxuZYnMJXry2cuC6dd7u+02lbxoGq7m7fWrc7R1bpK6SeB6RxkpqoRNkfTTtOW SNDvytdggljntyOWVo5hgKeY0HRqbOKfJ/f4ffab+XZhUy2uq1NX4Nc19vj9thz21npG86D1PX6S 1BHE2ut0gZJ4Ugexwc0PY9p9zmua4ZAIzggHIFY4rDVMJWlQqb0WjhMVTxlGNeluf++JhVgNgIAg CAvf067LfwWacfcb9TUrtS3UB1TIwcnUsGGltMH5IOCOTy3ALiB6wY1xsfJMq/46lrVUvSS39i5X 73bjzsmVpnubf8jV1KTfo47u18/K/Dldol5dw4QQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQB AEAQERdRGyh3V07HW6fpaNup7Yc00sp4OqoMO5UxeSGjLiHNL8gOBGWB73LhZ7lf4+jr01/2R3dq 5eXbyuzu5Hm3/G1XGq36OW/sfO3c7cOdkih6rkssIAgJ36RNv6XVOu6nVVyEUlNpeOOaOF2CXVUv IROLS0gtYGSOzkEPEZGe6kmjWDWIxLrT3Q8Xe3y2vmnYjWk+NlhsMqMN8/Bb/ndL4XLrKfleBAEA QBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQFKur3QFLpfXlNqu3NijptURySywsAaW1UXE SuDQ0AB4fG7JLnF5kJxkKAaS4NYfEqvHdPxVr/PY+bdywtF8Y8RhXQlvh4O9uPDauCSskQOo2SYI Df8AZXdau2m1jDeOVXNaKn6jdKKB7R6RFghrgHdi9jncm+RPrN5ND3FdHK8e8uxKrWutzXZ/+2fS 1zm5rgFmWGdHc96fb/t11vY6EUdXSXCkgr6CpiqKapjbNDNE8OZJG4Za5rh2IIIII96tCMlNKUXd MquUZQk4yVmj7L0eQgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgCAIAgPlVVVLQ0s1dXVMVNTU 8bpZppnhkcbGjLnOcezQACST2AC8ylGCcpOyR6hCVSShFXbOe29O69x3Z1fNdjJVxWelJitVDM8E U8WGhzuLewfIWhzvM/Wt5ODGqsM1zB5jiHV3R3Jcl+r2919haWU5bDLcOqezWftPm/Jbl87JtmgL mnUCAIC0nSVvNQUdN/BZqq5sp8yj4Ac6ENY50jnulgfLn64vc0xgt9Yue3lnw2GYaN5pGC/B1nb+ nrvV/Dqr7kQnSfK6sqixlCN42evtd1a2q1G21WvrPWVrR9V3k1alTMhoQBAEAQBAEAQBAEAQBAEA QBAEAQBAEAQBAEAQBAEAQFVurTeegrKY7WaVuTKjEp+H3Nha5jXRuY6KBkuT6we1xkAaOJaxvLPi MEM0kzSMl+Dou/8AV03K/wAd/RX3omWjOV1YVHjK8bRstTa73d9ZuNtitbVes73l6qtFurah5Ngg CAID60lXVUFVDX0FTLTVNNI2aGaF5Y+N7TlrmuHcEEAgjuCEB0P2Q1deddbW2HVGoHxSXCrjmZPJ FHwEhimkiDy0dgXBgJxgZJwAMAWllOJqYvBwrVfad79G19iqc3wtPB42dGl7Ktbqk/ubyuic0IAg ABJAAySgJN0l037v6xgZW0WlZKKkkGWz3GQUwcD5ENd65HyhuFycTneBwz1ZTu+S2/p3nXw2RY7F LWjCy5vZ+vcbTU9Ge78EJlinsFQ4DPhxVrw4/rRgfOtOOk+Cbs9ZdP1NyWi2OSutV9f0Iw1ntlrz b6UR6v0xWW9rncWTOaHwvPubKwlhPyA5XXwuPw2MV6E0/H5bzkYrAYnBO1eDXh89xrC2zTCAIAgC AIAgCAIAgNn0ZtlrzcGUx6Q0xWXBrXcXzNaGQsPudK8hgPyZytTFY/DYNXrzS8flvNzC4DE412oQ b8PnuJPpujPd+eESyz2CncRnw5a15cP1YyPnXIlpPgk7LWfT9Trx0WxzV24rr+hq2rum/d/R0D62 t0rJW0kYy6e3SCoAHtJa31wPlLcLcw2d4HEvVjOz5PZ+neaeJyLHYVa0oXXNbf17iMiCDgjBC6xy AgCAIDRt79XXnQm1t91Tp98UdwpI4WQSSx8xGZZo4i/iexLQ8kZyMgZBGQedm2JqYPBzrUvaVrdW l9zpZRhaeMxsKNX2Xe/RN/Y541tbWXGsnuFwq5qqqqpXTTzzSF8ksjiS57nHu5xJJJPckqrm23dl qQhGEVGKsluR8V8PQQBAEAQF+ul/4i9M/nv0yZWVo/7up9fqZWOkPvKp0+lEprsnFCA/cUUs8rII I3SSSODGMYMuc4nAAA8yvjaSuz6k5OyLxbAdN9n0Db6XVGr6GGt1PM0StZKA+O357hrB5GQe1/sP ZvbJdX+cZ3PGSdGg7U/H9Oz5liZLkUMHFVq6vUf/AM/r2/Imm8Xqz6ets95v91o7bQUrec9VVzth hib73PcQGj8JUdJGQ9V9avS7RVxt027tudKHceUNJVSxZ/7jIizHy5wvtgSRp/VW3m7GnZqjTl8s uqLNUjwZxTzR1MRyPrJG9+J/suAPyL1CcqUlODs0eZwjUi4TV0yoXUl0+M21qBq3SUcj9OVkvCSE kudQSnyaSe5jd9qT3B7E9wTPsjzn8cvQ1vbXevMr3Pcl/AP09D/xvuflyIHUiI2EAQBAEAQBAEBP HTb0+N3KqDq3VscjNN0kvCOEEtdXyjzaCO4jH2xHcnsD2JEdzzOfwK9DR9t9y8ySZFkv49+nr/8A jXe/It5f9U7d7S6chqNRXuyaWs1OPBgFRLHTRdvtI29uR/stBKgM5zqyc5u7ZYNOnClFQgrJcERx SdavS7W1wt0O7tubKXceU1JVRRZ/7j4gzHy5wvNj2TDZ7zZ9Q22C82C60dyoKpvOCqpJ2zQyt97X tJDh+Ar4CFuoDpwtGv7fU6o0jRRUWp4WmVzYwGR3DHctePISe5/tPZ3bBbIcnzueDkqNd3p+H6dn yI5nWRQxsXWoK1Rf/X69vzKOSxSwSvgnjdHJG4sex4w5rgcEEHyKsFNNXRXbTi7M/K+nwICLOqD4 i9TfmX0yFcbSD3dU6fUjtaPe8qfX6WUFValnBAEAQBAEBfrpf+IvTP579MmVlaP+7qfX6mVjpD7y qdPpRKa7JxQgJv6RdEU+qt0BeK+ESUunac1wDhkGoJDYs/gJc8fKwKP6R4p4fCakd83bpx8upIdG sIsRjPSS3QV+vDz6F29Q3+06VsNx1NfqxlJbbVSy1lXO/wAo4o2lznfL2B7KvCxzjn1G9SOtOoTV 09yutXPSaepZXC0WZsn1Kmj8g54HZ8pHdzznzwMNAC9JAiFfQbXtnujrfaLVVNrDQd8mt1fTuHNo JMVRHnvFKzyew+4/hGCAR8B172z13pXqi2OivYpmxU9+pJKG40nLm6jq2jD2A+9rsPYexILHdsrJ h688NVjVhvTuYcRQhiaUqM90lYoJd7ZU2W7VtmrW8aigqJKaYe57HFrvnBVtU6iqwVSO5q/zKgq0 3Sm6ct6bXyPIvZ4CAtNsl0mWTUel6LV+4VXWH4TibUUlBTSCMNhcMsfI7BJLhggDGARkk9hEM10i qUKzoYZLZsbfPsJllOjdOvRVfFN+ttSWzZ2n83t6TLLpzS9bq/b2rrB8GROqKu31LxIHQtGXvjdg EFoySDnIBwQexZVpFUr1lQxKW3YmufaM20bp0KLr4Vv1drT27Owq0peQ0ID12i2VV7u1FZqJvKor 6iOlhHve9wa35yFjqVFSg6ktyV/ke6VN1ZqnHe2l8y/e5uvNK9Lux0t8NM2WmsNJHQ26k5cHVlU7 sxmfe52XvPcgB7u+FU2IrTxNWVWe9u5b+HoQw1KNGG6KschNzN0db7u6qqdYa8vk1xr6gng0nEVP HntFEzyYwe4fhOSSTjMxqi+gl7py6kdadPeroLlaquer09UytF3szpPqVTF5FzAezJQO7XjHlg5a SF8aB2N09f7Tqqw27U1irGVVuutLFWUk7PKSKRoc135QQvIKR9XOiKfSu6BvFBCI6XUVOK4taMAV AJbLj8JDXn5XlWHo5iniMJqS3wdunDy6FcaS4RYbGekjumr9ePn1IQUgI8EBFnVB8RepvzL6ZCuN pB7uqdPqR2tHveVPr9LKCqtSzggCAIAgCAv10v8AxF6Z/PfpkysrR/3dT6/UysdIfeVTp9KJTXZO KEBazoVMPPWjTjxiLeR7+P8AKM4/Lj5lDtLL2o8vW+xNNELf93P1fubZ17XCut/S1q40LnN9IfQU 8zm+YidWRB35D9afkcociaHIdegEAQHRb9yxuFdLo7Xtqkc70OmudHUQg+Qlkie2THy4ij+ZeWCP N53Qu3b1iafHH4brM4+68V3L58q08sv+CpX/AKV4FTZrb8dVt/U/E0xb5oBAdPdAgDQmnAB2+CaP /Raqlxn7zU/M/Et/Bfu1P8q8Br4A6E1GCMg2is/0XL5g/wB4p/mXiMZ+7VPyvwOYStsqAIDc9mHQ t3b0cajHD4bo8Z+68VvH58LQzO/4Krb+l+Bv5Vb8dSv/AFLxJD/dTrhXRaP0Fao3O9DqblW1Ew9h ljijbHn5cSyfpKqxFsnOlegEAQHXnoLuNdcelrSDq9znGndXU8T3ebomVkwb+QfWj5Grywaj11GH xNFtGPGDbgT7+P8AJ8Z/Ln51MdE72rcvV+5CtL7Xo8/W+xVRTEhgQEWdUHxF6m/MvpkK42kHu6p0 +pHa0e95U+v0soKq1LOCAIAgCAIC/XS/8Remfz36ZMrK0f8Ad1Pr9TKx0h95VOn0olNdk4oQE0dJ +u6fR26MVvuE4iotQwm3uc44a2YuDoifwuHD/wA1wdIcI8Tg3KO+G3px8+h39HMYsLjFGT2T2deH l1Lh7vbd0G7O2eoturjKIY75RPgZKRnwZgQ+KTHt4yNY7HyKuiyTidrPR2o9v9UXLRurLbJQXa1T up6mB/scPIg+TmkEOa4diCCOxXsGFQH7hhmqJmU9PE+WWVwYxjGlznOJwAAPMk+xAdZOmHbpnSz0 3VN61tF6PeKwSXy6wuIDo5HMayGlz90AGNx9293sWXDYeeLrRow3tmvisRDCUZVp7kv9+ZUG5XCq u1xqrrWv51FbO+omd9097i5x/SSrYhBU4qEdy2FRVJurNzlvbueZezwEB090D/0Lpz+6aP8A0Wqp cZ+81PzPxLfwX7tT/KvAa+/6F1H/AHTWf6Ll8wf7xT/MvEYz92qflfgcwlbZUAQHptlwqrTcqS60 T+FRRTx1ELvc9jg5p/SAvFSCqQcJbnsPdObpTU4707lvep/bpnVN03Ut60TF6ReKMR3y1QNILpJG sc2elz90QXtx92xoVT4nDywlaVGe9P8A35lu4XEQxdGNaG5r/fkcnJoZaeV8E8T45Y3Fj2PaQ5rg cEEHyIKxGwfhAZrRmjtRbgaotujdJ22Svu11nbT00EY83HzcT9q1oy5zj2ABJ7BAdstotu6HafbT Tu3dulE0djoWU75gMCaY5dLJj2cpHPdj+0vAKd9WGuqfWO6U1vt84lotPQi3Nc05a6YOLpSPwOPD /wAFYuj2EeGwalLfPb04efUrbSPGLFYxxg9kNnXj5dCF13jgBARZ1QfEXqb8y+mQrjaQe7qnT6kd rR73lT6/SygqrUs4IAgCAIAgL9dL/wARemfz36ZMrK0f93U+v1MrHSH3lU6fSiU12TihAf1rnMcH scWuacgg4IPvXzeE7bUXS6f+pu06ooKXSO4Fxjor7C1sMFbO4NirwOw5OPZsvvB7OPcdzhQTOMhn h5OvhleD4cV+ngT/ACXP4YiKoYp2mtz4P9fE3HfLpl2p6gKKMa0tMkN1pozHSXeheIquFuc8eRBb IzOfVeHAZOME5UY3EpKs1f7lW11cTQb2llGXZAm0/wApWj3ZFQA4/LgfgX24Jp2l6Qdh+mpn7/r7 cPhW7UA5tvF6cxsdI73wRD1WuPsJ5vz2aRnCyUqVTETVOkrt8EYq1anh4OpVkklxZEXURv8Ay7q1 zLBp7xqfTNDJzYHji+slGQJXj2NAJ4t8+5J7kBtgZLk6y+PpKu2o+5cvMrzO85eYy9FS2U13vn5E KrvEfCAIDp3t89kugtNSRuDmvtFG5pHkQYWd1UuNVsTUT/qfiW/gnfDU2v6V4DcGRkWgdSyyODWM tFY5xPkAIXpg1fE00v6l4jGtLDVG/wCl+BzEVtFQBAEBNXTvv/NtVXPsGoRLUaarpObwzLn0cp7G Vg9rT25N8+wI7gh3BzrJ1mEfSUtlRd65eRIMkzl5dL0VXbTfc+fmS9u10hbD9S0f7/rFcPgq7V45 uvNlcx8dU7HnPEfVc4e0jg/PZx7YVf1aVTDzdOqrNcGWHRrU68FUpNNPiiFqT9yra2uBr97S+jDs kQ6f4yuHuyaghp+XB/AsdzKWk2N6Zdqen+jk/eXaZJ7rUx+HVXiveJauZvnxDgA2NmceqwNBwM5I yl7g0/qA6m7RpegqtI7f3GOtvszXQz1sDg6KhB7Hi4dnS+4Ds09z3GFJsnyGdeSr4lWguD3v9PEi 2dZ/DDxdDCu83xW5fr4FLXOc9xe9xc5xySTkkqd7iAbz+L6AgIs6oPiL1N+ZfTIVxtIPd1Tp9SO1 o97yp9fpZQVVqWcEAQBAEAQF+ul/4i9M/nv0yZWVo/7up9fqZWOkPvKp0+lEprsnFCAIAgN70lvl utoiBlHYNZ1rKWMcWU1RxqImt9zWyBwaPxcLnYnKcHinrVaavzWx9x0sNm+Nwi1aVR25Pau82mp6 tt7aiExR36ip3EY8SK3w8v8AECPmWnHRzAJ3cW+rNyWkmYSVlJLoiNtUa21drWqFbqvUVddJW54e kTFzY8+fBv1rR8gAXVw+Fo4WOrRio/A5OIxdfFy1q83L4mEWwa4QBAEBPmzvVdd9u7FBpPUdkN6t tIOFJLHN4c8DPuO4Ie0ezyIHbJAAEbzPR6GNqOtSlqye/k/IkuV6RzwNNUKsdaK3c15jeLquu+4l in0npyyGy2yrHGrlkm8Sednnw7ABjT7R3JHbIBIP3LNHoYKoq1WWtJbuS8xmmkdTHU3QpR1Yvfzf kQGpGRoIAgCAzel9bau0VVGt0pqKvtcrsc/R5i1smPLm361w/CCtfEYWjio6taKl8TYw+Lr4SWtQ m4/Akmm6tt7aeERSX6iqHAY8SW3w8v8ACAPmXKlo5gG7qLXVnWjpLmCVnJPojVtW75bra3gfR6g1 nWvpZBxfTU/Gnic33ObGGhw/GytzDZTg8K9alTV+b2vvNPE5vjcWtWrUduS2LuNEXROaEAQBARZ1 QfEXqb8y+mQrjaQe7qnT6kdrR73lT6/SygqrUs4IAgCAIAgL99MDSNi9MEjAcK0j5f5ZOrK0f93U +v1MrHSL3lU6fSiUl2TihAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEBFvU+1zti9TkDIaKIn 5B6bAP8A9XG0g93VOn1I7WjvvKn1+llBFWpZwQBAEAQBAd7P3MrTWnLx0K7aPu9gttc53wyCamlj lJ/nit+6BWWFerS9iTXwbMU6FKrtnFP4pMsRXbLbSXEEVO3GnhnzMVvjiP6WAFbUM0xsN1WXzb8T UnlWBnvpR+SXga9X9Lux1fknRLYHfdQVtQzH5A/HzLZhn+YQ/mX+KXka09H8un/Lt8G/M12v6M9o asH0ae/0RPl4Nax2P12OW1DSfGx3qL6eTNSei2Blucl180zXa7oc0vJn4M13dIPd49LHN+yWLahp XWXt00/g2vM1Z6JUX7FRr4pPyK/bzbOXjZy/09quFwiuFJXRGakrI4zH4gacOa5hJ4uBI7ZIw4d/ YJJlmZ08zpucVZreiMZpldTK6qhJ3T3Mj5dM5gQBAEB+44ZpQTFE9+PPi0nC+Npbz6ot7j9+h1f9 Fm/UK+a0eZ91Jch6HV/0Wb9QprR5jUlyHodX/RZv1CmtHmNSXIeh1f8ARZv1CmtHmNSXI/EkM0QB liezPlyaRlfU09x8cWt5+F9PgQBAEBIOzOzl43jv9RarfcIrfSUMQmrKySMyeGHHDWtYCOTiQe2Q MNPfyB5mZ5nTyympyV29yOnleV1M0quEXZLeywND0OaXjx8J67uk/v8AApY4f2i9RueldZ+xTS+L b8iTw0Sor26jfwSXmbBRdGO0dLg1FZqGsPt8asjaP8EbVqz0nxstyiuj+7NqGi2Bjvcn1X2RsNB0 tbHUOCdGmpePtp6+od83MN+Za08/zCf8y3wS8jaho9l0P5d/i35mxUGyW0VtwabbmwEt8jNRMmP6 XgrVnmuNnvqy+dvA2oZTgae6lH5X8SvP7prprTln6Fdy32iwW2hc34GANNSxxEfzxRfcgLVnXq1f bk38WzbhQpUvYil8EkcE1iMoQBAEAQBAdD+knVmqbDsTpllj1Ldbc0em4FJWSQgfyyc/akKxMjw9 Gtl1N1IJ796T/iZW+e4mtRzKp6ObW7c2v4UTvQ757wW7Ho+499fj7/Vun/1OS3p5TgZ76Ueit4Gh DN8dDdVl1d/E2Kh6q98aMBr9WxVTR7J7fTn52sB+da09H8vl/Bbq/M2oaRZjD+O/xS8jYqDrS3Wp cNrLXp2sb7S+llY79LZAPmWrPRfBy9lyXVeRtw0qxsfaUX0fmTz09b73PeV97p7tY6S3y2ltO9hp 5HOEokMgOQ7yxwHt9qjuc5RDLNRwk3rX39liSZJnE8011OKTjbd238iPOuil5Uejq4D7HLXRE/jC Ej9krp6Jy9arH8v3OXpdH1aMvzfYqYpmQkIAgPtRUktfWQUMAzLUStiYPe5xAHzleZSUIuT4HqEX OSiuJ1C03pyz6SsdHp+x0UVLRUUTYo2MaG5wMFxx5uPmSe5Jyqjr154mo6tR3bLgoUKeGpqlTVkj IeND99Z+sFjszLdDxofvrP1glmLoeND99Z+sEsxdDxofvrP1glmLox2pdOWfVtjrNPX2jiqqKtid FIx7QcZHZwz5OHmCO4IystCvPDVFVpuzRir0KeJpulUV0zl9W0stDWT0M4xLTyuiePc5pIPzhW3G SnFSXEp+cXCTi+B8V6PIQBAWz6F6TjR6xriPsktDED+KJif2goZpZL1qUfzfYm2iMfVrS/L9yQ+o Xfe57NPslPabHSXCW7NqHvNRI5oiEZjAwG+eeZ9vsXMybKIZnrucmtW27tudTO84nleooRTcr7+y 3mQRXdam6tRltJaNOUjfYW0sz3D8rpSPmUihovg4+1KT6ryI3PSrGy9mMV0fma9XdV2+FZkRapgp AfZBb6f/ANuY4/OtmGj2Xx3wv1fmas9I8xlunb4JeRrtfvvvFcs+kbjXtmf6PUmD/T4rahlGBp7q S6q/ias84x9TfVl0dvAgXq21Xqm/bE6mZfNS3W4tPoWRV1kkwP8ALIPuiVo55h6NHLqjpwS3bkl/ EjeyLE1q2ZU/STb3723/AAs54KuyyAgCAIAgCAv10v8AxF6Z/PfpkysrR/3dT6/UysdIfeVTp9KJ TXZOKEAQFleh2q4aw1JRZ+y22OXH4koH+9RTSuN6FOXb9v0JbolK1epHs+/6m79b9Lz0JYK3H2K7 mLP48Lz/ALFoaKytiJx/x+68zoaWxvhqcv8AL7PyKZqdECCAIDctmbT8N7r6Ttxbya67U8rx72Rv D3D9DStDM6nosHVl/i+/Yb+V0/S42lH/ACXdtL/7q3b4D201TdQ7i+ntFUYz/wDYYnBn+IhVvl9P 0uLpw5yXiWbmNT0OEqz5Rfgczla5UYQBAEB0x2qu3w5tppa6l3J9RaKUyH+2Imh/+IFVRmFP0WLq Q5SfiW3l1T02EpT5xXgUA3mtPwJuvqy3BvFrbtUSsHuZI8vaP0OCsjLKnpcHSl/iu7YVnmlP0WNq w/yfftNNW+aAQBAXM6IKXhoS/wBdj7LdxFn8SFh/3qC6VSviIR/x+78ie6JRthqkv8vsvM0jriqu esNN0OfsVtklx+PKR/sW/opG1CpLt+36nP0tlevTj2ff9CtSlZEggCAizqg+IzUv5l9MhXG0g93V On1I7Wj3vKn1+llBVWpZwQBAEAQBAXG6LdSfCGh71piWWrkls9wbUM8R2Yo4Khnqsj75HrxTOIwB l+e5JxO9Fa+vh50Xf1Xfss+C6pvqQHSygqeJhVVvWXW8Xtb6NL/UWHUoIqEAQE3dIWprXpzdh0V2 roqWO7W2WhhfK4NaZjJG9rcnsCfDcB7yQPMhR/SShOvgrwV9Vp9LNfckOjOIhQxtpu2smut0/sXm rKGhuMPo9wo4KmLPLhNGHtz78Ht7Sq+jOUHeLsWLKEaitJXRgqrbbbquya3QOnKgn77aoHf+2rYj jsVD2akl1fma8sBhZ+1Si/8A+V5GIq9itnq0ETbc2NufvVKIv2MLPHNsdHdVl87mvLKMBPfSj8re Bhavpe2Mq8l2h2xO98NdUs+YSY+ZZ45/mEf5ncvIwS0fy6X8vvfmfTSXTftfojVFFq7T1BXQ11AZ DCH1bpIwXscw5Ds57OOO/nhfMTneLxVF0KrVn2dT7hsiweErKvSTTXb2WPH1WXb4L2SvUTX8X18t NSMP4ZmucP1WOWTR6n6TMIPld9xj0iqejy+a52XeUBVkFZhAEAQF/ulO7fCmyVlic7k+glqaR5/B M5zR+q9qrbSGn6PMJvnZ9xZmjtT0mXwXK67z2at6b9r9b6ordXahoK6aurzGZgyrdHGSxjWDAbjH Zoz388rxhs7xeFoqhSasuzqZMTkWDxdZ16qbb7eyx8qbpb2LpsEaIErh7ZbhVOz+TxMfMvss/wAx l/M7l5HmOj2XR/l978zMUmwuzlEAIdurM7H32Dxf2yVglm+Olvqvw8DYjk+AjupL5X8TNUm2u3VB j0HQOnYMeRjtcDT+kNWCWOxU/aqSfVmeOAwsPZpRXReRnaOhobdD6Pb6OCmizy4Qxhjc+/A7ewLX lOU3eTubMYRpq0VZFGer3U1r1Huw2K010VVHabbDQzPicHNEwkke5uR2JHiAH3EEeYVg6N0J0MFe atrNvpZL7FdaTYiFfG2g76qS63b+5CKkBHggCArz1pakdb9DWXTMUlXHJebg6d5jPGKSGnZ60cnf 1vXmhcG4IyzPYgZi2lOJUKEKCe2Tv0XPq1b4dhKtFMOqmJnWdvVXVN8V0TXUpwoKT4IAgCAIAgJB 2I3CbttuVbL3WVLYLZVO+D7o9zSQylkc3lIeLHvxG4MkwxvJ3h8R9cV08oxv4DFRqN+q9j+D+b2b 9nKxyM9wjxmBnGEXKcVrRStdySdkruMby9lazSV7t7DoPSVdLX0sNdQ1MVRTVEbZYZonh7JGOGWu a4diCCCCOxBVnRlGaUou6ZWEoyhJxkrNH1Xo8hAEBkKTUWoLeAKC+3CmDfLwap7MfoKxSo05+1FP oZY16sPZk11ZmaXdTc6iwKXcTUsQH2rbrPx/RywsEsvwkt9KP/qvIzxzHGQ9mrL/ANn5mXpN/wDe aiIMO4l3dj79IJf2wVglk+AlvpLw8DPHOcfDdVfj4mapOqrfKlwH6wjqGj2TW6mPziMH51glo/l8 v4LdX5mxHSLMY/zL9F5GZpOsneGnx4wsVVj77QuGf1HtWCWjOBlu1l180bEdKMfHfqvp5M1DdLfb Xe7kFLRalfQ09FRyeMykoYnRxGXBHN3JznEgEgd8DJwO63cvynD5c3Kldt8X/qNDMM4xOZJRq2SX BbvuR2uocsIAgCAkTa3fbXe0cFVQ6bfRVFDVv8Z9JXROkiEuAObeLmuBIAB74OBkdguXmGU4fMWp Vbpriv8AWdTL84xOWpxpWafB7r8+Bt9X1k7w1GfBFipc/eqFxx+u9y0o6M4GO/WfXyRvy0ox8t2q unmzD1XVZvnUE+Hq+KnB9kVtpv8A26MlZ46PZfHfC/V+Zry0izGW6pbovIw9X1Cb0VpJm3Dujc/e iyL9hoWeOTYCO6kvHxMEs6zCe+q/DwMNWbrbn1+fS9xNSSA/am6Thv6A7Czxy/CQ9mlH5IwSzHGT 9qrL/wBmYWr1FqC4AivvtwqQ7z8aqe/P6Ss8aNOHsxS6GvKvVn7Um+rMespiCAID5VdXS2+kmr66 pipqamjdNNNK8MZGxoy5znHsAACST5BeZSUIuUnZI9RjKclGKu2c8N59ypd1dd1ep2wSwULI2Ulv gl4c4qdmSORaPNznPeRl2C/iHEAFVfmmPeY4l1rWW5Ls/wBu+trlqZTl6y3DKje8t7fa/wBLLhuv Y0Zc46QQBAEAQBAEBY3po6hKXScVNtvraaKCzukcLdcCA0Uj3vLnRzH725ziQ892EnkeBzHKshzq OHSwmI2R4Plfg+xvjw47N0Tz/I3iW8Xhts+K52VrrtSW7jw277geanJAwgCAIAgCAIAgCAIAgCAI AgCAIAgCAIAgCAICn3Ux1B0mrI6jbjRU0U9nbI34RuAAc2sexwc2OE/e2uaCXj68gcfUGZILn2dR xKeFw+2PF87cF2dvF7tm+e6P5JLC2xeIVp8FyT4vtfLgt+3dXNRYlYQBAEAQBAEAQBATVs71Oan2 8Is+qHVuorGWwxRRy1JdPQMYGsAhc/OY2xNAEJLW5a3iWety7uWZ9XwLUKnrQ7d6W7Z5btmy28jm YaN4TExcsPFU57XsSScpNybkkt7bbct7bbdy3+hNw9Jbj2aO86Vu0VS0xsfPTF7RUUhcXAMmjBJY cscB7HcSWlwwTO8JjaGOpqpRlfs4r4rh9+GwguMwNfA1HTrRt28H8Hx39Nzs9hsi2zUCAIAgCAIA gCAIAgCAIAgCAIAgCAIDW9d7iaS24s0l51VdoqZoje+CmD2moqy3iCyGMkF5y5oPsbyBcWjJGpjM dQwMNetK3JcX8F/vabeDwNfH1PR0I35vgvi+H34bSoG9HUxf9yWssumI63T9iY2WOeNtV9WuHIvb mUsA4xmNwBhy5vIuJc/1Q2C5tnk8xXoqa1YcuL5X8tu3i9lp1kuQ/wDGylVrSU5vdZW1VZXW1u7v f1rR9Vpaqs3KFVwSRhAEAQBAEAQBAEAQBAeq13a62OviulkudXb62Hl4VTSzOilZyaWni9pBGWkg 4PkSF7p1J0pKdNtNcVsZ4qUoVouFRJp8HtRPuhusnWNrnp6TXlqpb3RD1ZaqmYKesGZAS/A+pP4s 5AMDWZw3Lh3JkmD0nxFK0cQlJc9z8n8LL4kYxmi2HqpywzcXye1ea+N38Cd9IdSO0Or42hmqIrPU +G6R9Pd8UpY0O495HHwiTkENa8nB8uxxJcNnmBxK2T1Xyls793yb8SNYrIcfhX7Gsucdvdv7iT11 YyjOKlF3TORKMoScZKzQXo8hAEAQBAEAQBAEAQBAEAXmUowi5Sdkj1GMpyUYq7ZGOrupHaDSMbvE 1VFd6kRtlZTWjFU6QF/HAkafCaRguLXPBwM47tzycTnuBwy9vWfKO3v3d51sLkOPxT2Q1Vzls4X3 b/knt6kD656ytY3SappNB2qkstEfVhqqlgqKztISH4P1JnJgaCwtfjLsOPYiOYvSivUvHDxUVze1 7/lu4WfxJLhNFaFNKWJk5PilsW7dz2Pbe64bN94Cul2ut8r5bpernV3Ctm4+LU1UzpZX8Who5PcS TgAAZPkAFGqlSdWTnUbbfF7WSinThRioU0klwWxHlXg9hAEAQBAEAQBAEAQBAEAQBAEBlLHqvVGm PHOmtSXW0+k8fG9BrJIPF4548uBHLHI4z5ZPvWajiK2Hv6Kbjfk2vAw1sNRxFlWgpW5pPxJPsvVn vPa6p9RXXi33iN0ZYIK23xsY05B5gwCN2exHdxGCe2cEdilpJj6crykpdjS+1jjVNGcvnHVhFx7U 23/9X8zdLL1uX+ClezUWgrfXVJkJZJRVr6VgZgYBY9spJzk55AYIGO2Tv0tK6qX/AG0k32O3jfxO fW0RpSl/1VWl2q/hbwN0pOtTbh9LC+v01qSGpdG0zRwxQSMY/HrNa8ytLgDnBLWkjvgeS346VYRx WtCV+nmvBHPloni1J6s426+T8TZLV1W7KXChjq6vUVXbJX55UtVbp3SswSBkxNezuBkYcexGcHIW 3T0iy+cVKU2uxp37k13mpU0bzGEtWMFJc01bvs+49f8AGg2L/rx/llZ/xL3+0GXf3O6XkY/2ezL+ 33x8x/Gg2L/rx/llZ/xJ+0GXf3O6XkP2ezL+33x8x/Gg2L/rx/llZ/xJ+0GXf3O6XkP2ezL+33x8 x/Gg2L/rx/llZ/xJ+0GXf3O6XkP2ezL+33x8zyXXqt2Ut9DJV0moqu5ysxxpaW3TtlfkgHBlaxnY HJy4dgcZOAvFTSLL4RcozbfJJ370l3mSno3mM5asoKK5tq3dd9xrdX1qbcMpZn0GmtSTVLY3GGOa GCNj349VrniVxaCcZIa4jzwfJaktKsKk9WEr9PNm3HRPFuS1pxt18l4mo3PrfuctBNHZtu6WlrXB vgzVVydPEw8hnlG2OMuy3kBh7cEg98YOhV0sqOP/AFUkn2u/dZeJvU9EYKSdSq2uxWfzu/A0S99W W891qmz0N4oLPG2MMMFFQRuY45J5kziR2e4HZwGAO2ck8+rpHj6jvGSj2JL73Z0qOjWX0laUXLtb f2su4jK+as1TqfwBqXUt1u3o3LwPTqySfwuWOXHmTxzxbnHnge5citia2It6ablbm2/E7NHDUcPf 0MFG/JJeBilhMwQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQB AEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEAQBAEB//2Q==" width="67.733" height="67.733" x="16.053" y="2.079" preserveAspectRatio="none"/></svg>
diff --git a/tests/data/images/grayscale.jpg b/tests/data/images/grayscale.jpg
new file mode 100644
index 00000000000..9a4f4773fd9
Binary files /dev/null and b/tests/data/images/grayscale.jpg differ
diff --git a/thirdparty/README.md b/thirdparty/README.md
index 8bd36d1b153..8e2483a8c66 100644
--- a/thirdparty/README.md
+++ b/thirdparty/README.md
@@ -80,17 +80,15 @@ Files extracted from upstream source:
 Files extracted from upstream source:
 
 - `encoder/` and `transcoder/` folders, with the following files removed from `encoder`:
-  `jpgd.{cpp,h}`, `3rdparty/{qoi.h,tinydds.h,tinyexr.cpp,tinyexr.h}`
+  `3rdparty/{qoi.h,tinydds.h,tinyexr.cpp,tinyexr.h}`
 - `LICENSE`
 
 Patches:
 
 - `0001-external-zstd-pr344.patch` (GH-73441)
-- `0002-external-jpgd.patch` (GH-88508)
-- `0003-external-tinyexr.patch` (GH-97582)
-- `0004-remove-tinydds-qoi.patch` (GH-97582)
-- `0005-ambiguous-calls.patch` (GH-103968)
-
+- `0002-external-tinyexr.patch` (GH-97582)
+- `0003-remove-tinydds-qoi.patch` (GH-97582)
+- `0004-ambiguous-calls.patch` (GH-103968)
 
 ## brotli
 
@@ -469,22 +467,6 @@ Files extracted from upstream source:
 - `LICENSE`
 
 
-## jpeg-compressor
-
-- Upstream: https://github.com/richgel999/jpeg-compressor
-- Version: 2.00 (aeb7d3b463aa8228b87a28013c15ee50a7e6fcf3, 2020)
-- License: Public domain or MIT
-
-Files extracted from upstream source:
-
-- `jpgd*.{c,h}`
-- `jpge*.{c,h}`
-
-Patches:
-
-- `0001-clang-fortify-fix.patch` (GH-101927)
-
-
 ## libbacktrace
 
 - Upstream: https://github.com/ianlancetaylor/libbacktrace
@@ -503,6 +485,25 @@ Patches:
 - `0001-big-files-support.patch` (GH-100281)
 
 
+## libjpeg-turbo
+
+- Upstream: https://github.com/libjpeg-turbo/libjpeg-turbo
+- Version: git (20ade4dea9589515a69793e447a6c6220b464535, 2024)
+- License: BSD-3-Clause and IJG
+
+Files extracted from upstream source:
+
+- `src/*.{c,h}` except for:
+  * `cdjpeg.c cjpeg.c djpeg.c example.c jcdiffct.c jclhuff.c jclossls.c jcstest.c jddiffct.c jdlhuff.c jdlossls.c jlossls.h jpegtran.c rdbmp.c rdcolmap.c rdgif.c rdjpgcom.c rdppm.c rdswitch.c rdtarga.c strtest.c tjbench.c tjcomp.c tjdecomp.c tjtran.c tjunittest.c tjutil.c wrbmp.c wrgif.c wrjpgcom.c wrppm.c wrtarga.c`
+- `LICENSE.md`
+- `README.ijg`
+
+Patches:
+
+- `0001-cmake-generated-headers.patch` (GH-104347)
+- `0002-disable-16bitlossless.patch` (GH-104347)
+- `0003-remove-bmp-ppm-support.patch` (GH-104347)
+
 ## libktx
 
 - Upstream: https://github.com/KhronosGroup/KTX-Software
diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
index 5987685ae71..b9804090b15 100644
--- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
@@ -492,7 +492,7 @@ namespace basisu
 	bool load_jpg(const char *pFilename, image& img)
 	{
 		int width = 0, height = 0, actual_comps = 0;
-		uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
+		uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
 		if (!pImage_data)
 			return false;
 		
@@ -512,7 +512,7 @@ namespace basisu
 		}
 
 		int width = 0, height = 0, actual_comps = 0;
-		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
+		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
 		if (!pImage_data)
 			return false;
 
diff --git a/thirdparty/jpeg-compressor/jpgd.cpp b/thirdparty/basis_universal/encoder/jpgd.cpp
similarity index 93%
rename from thirdparty/jpeg-compressor/jpgd.cpp
rename to thirdparty/basis_universal/encoder/jpgd.cpp
index d6b5d96aac0..539d3ed8fa7 100644
--- a/thirdparty/jpeg-compressor/jpgd.cpp
+++ b/thirdparty/basis_universal/encoder/jpgd.cpp
@@ -23,9 +23,7 @@
 // v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings
 // v2.00, March 20, 2020: Fuzzed with zzuf and afl. Fixed several issues, converted most assert()'s to run-time checks. Added chroma upsampling. Removed freq. domain upsampling. gcc/clang warnings.
 //
-// Important:
-// #define JPGD_USE_SSE2 to 0 to completely disable SSE2 usage.
-//
+
 #include "jpgd.h"
 #include <string.h>
 #include <algorithm>
@@ -35,20 +33,6 @@
 #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
 #endif
 
-#ifndef JPGD_USE_SSE2
-
-	#if defined(__GNUC__)
-		#if defined(__SSE2__)
-			#define JPGD_USE_SSE2 (1)
-		#endif
-	#elif defined(_MSC_VER)
-		#if defined(_M_X64)
-			#define JPGD_USE_SSE2 (1)
-		#endif
-	#endif
-
-#endif
-
 #define JPGD_TRUE (1)
 #define JPGD_FALSE (0)
 
@@ -74,10 +58,6 @@ namespace jpgd {
 
 	enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
 
-#if JPGD_USE_SSE2
-#include "jpgd_idct.h"
-#endif
-
 #define CONST_BITS  13
 #define PASS1_BITS  2
 #define SCALEDONE ((int32)1)
@@ -111,7 +91,7 @@ namespace jpgd {
 	template <int NONZERO_COLS>
 	struct Row
 	{
-		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+		static void idct(int* pTemp, const jpgd_block_t* pSrc)
 		{
 			// ACCESS_COL() will be optimized at compile time to either an array access, or 0. Good compilers will then optimize out muls against 0.
 #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
@@ -156,7 +136,7 @@ namespace jpgd {
 	template <>
 	struct Row<0>
 	{
-		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+		static void idct(int* pTemp, const jpgd_block_t* pSrc)
 		{
 			(void)pTemp; 
 			(void)pSrc;
@@ -166,7 +146,7 @@ namespace jpgd {
 	template <>
 	struct Row<1>
 	{
-		static void idct(int* pTemp, const jpgd_block_coeff_t* pSrc)
+		static void idct(int* pTemp, const jpgd_block_t* pSrc)
 		{
 			const int dcval = left_shifti(pSrc[0], PASS1_BITS);
 
@@ -280,13 +260,11 @@ namespace jpgd {
 	};
 
 	// Scalar "fast pathing" IDCT.
-	static void idct(const jpgd_block_coeff_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag, bool use_simd)
+	static void idct(const jpgd_block_t* pSrc_ptr, uint8* pDst_ptr, int block_max_zag)
 	{
-		(void)use_simd;
-
 		assert(block_max_zag >= 1);
 		assert(block_max_zag <= 64);
-				
+
 		if (block_max_zag <= 1)
 		{
 			int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
@@ -303,19 +281,9 @@ namespace jpgd {
 			return;
 		}
 
-#if JPGD_USE_SSE2
-		if (use_simd)
-		{
-			assert((((uintptr_t)pSrc_ptr) & 15) == 0);
-			assert((((uintptr_t)pDst_ptr) & 15) == 0);
-			idctSSEShortU8(pSrc_ptr, pDst_ptr);
-			return;
-		}
-#endif
-
 		int temp[64];
 
-		const jpgd_block_coeff_t* pSrc = pSrc_ptr;
+		const jpgd_block_t* pSrc = pSrc_ptr;
 		int* pTemp = temp;
 
 		const uint8* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
@@ -642,7 +610,7 @@ namespace jpgd {
 		free_all_blocks();
 		longjmp(m_jmp_state, status);
 	}
-		
+
 	void* jpeg_decoder::alloc(size_t nSize, bool zero)
 	{
 		nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
@@ -658,7 +626,7 @@ namespace jpgd {
 		}
 		if (!rv)
 		{
-			int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
+			int capacity = JPGD_MAX(32768 - 256, ((int)nSize + 2047) & ~2047);
 			mem_block* b = (mem_block*)jpgd_malloc(sizeof(mem_block) + capacity);
 			if (!b)
 			{
@@ -675,14 +643,6 @@ namespace jpgd {
 		return rv;
 	}
 
-	void* jpeg_decoder::alloc_aligned(size_t nSize, uint32_t align, bool zero)
-	{
-		assert((align >= 1U) && ((align & (align - 1U)) == 0U));
-		void *p = alloc(nSize + align - 1U, zero);
-		p = (void *)( ((uintptr_t)p + (align - 1U)) & ~((uintptr_t)(align - 1U)) );
-		return p;
-	}
-
 	void jpeg_decoder::word_clear(void* p, uint16 c, uint n)
 	{
 		uint8* pD = (uint8*)p;
@@ -987,15 +947,15 @@ namespace jpgd {
 	// Finds the next marker.
 	int jpeg_decoder::next_marker()
 	{
-		uint c, bytes;
+		uint c;// , bytes;
 
-		bytes = 0;
+		//bytes = 0;
 
 		do
 		{
 			do
 			{
-				bytes++;
+				//bytes++;
 				c = get_bits(8);
 			} while (c != 0xFF);
 
@@ -1191,7 +1151,7 @@ namespace jpgd {
 		m_image_x_size = m_image_y_size = 0;
 		m_pStream = pStream;
 		m_progressive_flag = JPGD_FALSE;
-				
+
 		memset(m_huff_ac, 0, sizeof(m_huff_ac));
 		memset(m_huff_num, 0, sizeof(m_huff_num));
 		memset(m_huff_val, 0, sizeof(m_huff_val));
@@ -1280,19 +1240,6 @@ namespace jpgd {
 
 		for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++)
 			m_mcu_block_max_zag[i] = 64;
-
-		m_has_sse2 = false;
-
-#if JPGD_USE_SSE2
-#ifdef _MSC_VER
-		int cpu_info[4];
-		__cpuid(cpu_info, 1);
-		const int cpu_info3 = cpu_info[3];
-		m_has_sse2 = ((cpu_info3 >> 26U) & 1U) != 0U;
-#else
-		m_has_sse2 = true;
-#endif
-#endif
 	}
 
 #define SCALEBITS 16
@@ -1335,7 +1282,7 @@ namespace jpgd {
 
 	void jpeg_decoder::transform_mcu(int mcu_row)
 	{
-		jpgd_block_coeff_t* pSrc_ptr = m_pMCU_coefficients;
+		jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
 		if (mcu_row * m_blocks_per_mcu >= m_max_blocks_per_row)
 			stop_decoding(JPGD_DECODE_ERROR);
 
@@ -1343,7 +1290,7 @@ namespace jpgd {
 
 		for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++)
 		{
-			idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block], ((m_flags & cFlagDisableSIMD) == 0) && m_has_sse2);
+			idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
 			pSrc_ptr += 64;
 			pDst_ptr += 64;
 		}
@@ -1354,9 +1301,9 @@ namespace jpgd {
 	void jpeg_decoder::load_next_row()
 	{
 		int i;
-		jpgd_block_coeff_t* p;
+		jpgd_block_t* p;
 		jpgd_quant_t* q;
-		int mcu_row, mcu_block, row_block = 0;
+		int mcu_row, mcu_block;// , row_block = 0;
 		int component_num, component_id;
 		int block_x_mcu[JPGD_MAX_COMPONENTS];
 
@@ -1376,10 +1323,10 @@ namespace jpgd {
 
 				p = m_pMCU_coefficients + 64 * mcu_block;
 
-				jpgd_block_coeff_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
-				jpgd_block_coeff_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+				jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
+				jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
 				p[0] = pDC[0];
-				memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_coeff_t));
+				memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
 
 				for (i = 63; i > 0; i--)
 					if (p[g_ZAG[i]])
@@ -1389,9 +1336,9 @@ namespace jpgd {
 
 				for (; i >= 0; i--)
 					if (p[g_ZAG[i]])
-						p[g_ZAG[i]] = static_cast<jpgd_block_coeff_t>(p[g_ZAG[i]] * q[i]);
+						p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
 
-				row_block++;
+				//row_block++;
 
 				if (m_comps_in_scan == 1)
 					block_x_mcu[component_id]++;
@@ -1478,14 +1425,14 @@ namespace jpgd {
 	// Decodes and dequantizes the next row of coefficients.
 	void jpeg_decoder::decode_next_row()
 	{
-		int row_block = 0;
+		//int row_block = 0;
 
 		for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++)
 		{
 			if ((m_restart_interval) && (m_restarts_left == 0))
 				process_restart();
 
-			jpgd_block_coeff_t* p = m_pMCU_coefficients;
+			jpgd_block_t* p = m_pMCU_coefficients;
 			for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64)
 			{
 				int component_id = m_mcu_org[mcu_block];
@@ -1503,7 +1450,7 @@ namespace jpgd {
 
 				m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
 
-				p[0] = static_cast<jpgd_block_coeff_t>(s * q[0]);
+				p[0] = static_cast<jpgd_block_t>(s * q[0]);
 
 				int prev_num_set = m_mcu_block_max_zag[mcu_block];
 
@@ -1541,7 +1488,7 @@ namespace jpgd {
 						if (k >= 64)
 							stop_decoding(JPGD_DECODE_ERROR);
 
-						p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(dequantize_ac(s, q[k])); //s * q[k];
+						p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
 					}
 					else
 					{
@@ -1581,7 +1528,7 @@ namespace jpgd {
 
 				m_mcu_block_max_zag[mcu_block] = k;
 
-				row_block++;
+				//row_block++;
 			}
 
 			transform_mcu(mcu_row);
@@ -1669,7 +1616,7 @@ namespace jpgd {
 		int row = m_max_mcu_y_size - m_mcu_lines_left;
 		uint8* d0 = m_pScan_line_0;
 
-		const int half_image_x_size = (m_image_x_size == 1) ? 0 : (m_image_x_size >> 1) - 1;
+		const int half_image_x_size = (m_image_x_size >> 1) - 1;
 		const int row_x8 = row * 8;
 
 		for (int x = 0; x < m_image_x_size; x++)
@@ -1762,7 +1709,7 @@ namespace jpgd {
 		int y = m_image_y_size - m_total_lines_left;
 		int row = y & 15;
 
-		const int half_image_y_size = (m_image_y_size == 1) ? 0 : (m_image_y_size >> 1) - 1;
+		const int half_image_y_size = (m_image_y_size >> 1) - 1;
 
 		uint8* d0 = m_pScan_line_0;
 
@@ -1891,7 +1838,7 @@ namespace jpgd {
 		int y = m_image_y_size - m_total_lines_left;
 		int row = y & 15;
 
-		const int half_image_y_size = (m_image_y_size == 1) ? 0 : (m_image_y_size >> 1) - 1;
+		const int half_image_y_size = (m_image_y_size >> 1) - 1;
 
 		uint8* d0 = m_pScan_line_0;
 
@@ -1915,7 +1862,7 @@ namespace jpgd {
 		const int y0_base = (c_y0 & 7) * 8 + 256;
 		const int y1_base = (c_y1 & 7) * 8 + 256;
 
-		const int half_image_x_size = (m_image_x_size == 1) ? 0 : (m_image_x_size >> 1) - 1;
+		const int half_image_x_size = (m_image_x_size >> 1) - 1;
 
 		static const uint8_t s_muls[2][2][4] =
 		{
@@ -2124,10 +2071,10 @@ namespace jpgd {
 
 	int jpeg_decoder::decode_next_mcu_row()
 	{
-		if (::setjmp(m_jmp_state))
+		if (setjmp(m_jmp_state))
 			return JPGD_FAILED;
 
-		const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
+		const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)) && (m_image_x_size >= 2) && (m_image_y_size >= 2);
 		if (chroma_y_filtering)
 		{
 			std::swap(m_pSample_buf, m_pSample_buf_prev);
@@ -2156,7 +2103,7 @@ namespace jpgd {
 		if (m_total_lines_left == 0)
 			return JPGD_DONE;
 
-		const bool chroma_y_filtering = ((m_flags & cFlagBoxChromaFiltering) == 0) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2));
+		const bool chroma_y_filtering = (m_flags & cFlagLinearChromaFiltering) && ((m_scan_type == JPGD_YH2V2) || (m_scan_type == JPGD_YH1V2)) && (m_image_x_size >= 2) && (m_image_y_size >= 2);
 
 		bool get_another_mcu_row = false;
 		bool got_mcu_early = false;
@@ -2186,7 +2133,7 @@ namespace jpgd {
 		{
 		case JPGD_YH2V2:
 		{
-			if ((m_flags & cFlagBoxChromaFiltering) == 0)
+			if ((m_flags & cFlagLinearChromaFiltering) && (m_image_x_size >= 2) && (m_image_y_size >= 2))
 			{
 				if (m_num_buffered_scanlines == 1)
 				{
@@ -2215,7 +2162,7 @@ namespace jpgd {
 		}
 		case JPGD_YH2V1:
 		{
-			if ((m_flags & cFlagBoxChromaFiltering) == 0)
+			if ((m_flags & cFlagLinearChromaFiltering) && (m_image_x_size >= 2) && (m_image_y_size >= 2))
 				H2V1ConvertFiltered();
 			else
 				H2V1Convert();
@@ -2630,9 +2577,9 @@ namespace jpgd {
 		m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
 
 		// Initialize two scan line buffers.
-		m_pScan_line_0 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
+		m_pScan_line_0 = (uint8*)alloc(m_dest_bytes_per_scan_line, true);
 		if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2))
-			m_pScan_line_1 = (uint8*)alloc_aligned(m_dest_bytes_per_scan_line, true);
+			m_pScan_line_1 = (uint8*)alloc(m_dest_bytes_per_scan_line, true);
 
 		m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
 
@@ -2641,13 +2588,13 @@ namespace jpgd {
 			stop_decoding(JPGD_DECODE_ERROR);
 
 		// Allocate the coefficient buffer, enough for one MCU
-		m_pMCU_coefficients = (jpgd_block_coeff_t *)alloc_aligned(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_coeff_t));
-				
+		m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
+
 		for (i = 0; i < m_max_blocks_per_mcu; i++)
 			m_mcu_block_max_zag[i] = 64;
 
-		m_pSample_buf = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
-		m_pSample_buf_prev = (uint8*)alloc_aligned(m_max_blocks_per_row * 64);
+		m_pSample_buf = (uint8*)alloc(m_max_blocks_per_row * 64);
+		m_pSample_buf_prev = (uint8*)alloc(m_max_blocks_per_row * 64);
 
 		m_total_lines_left = m_image_y_size;
 
@@ -2668,17 +2615,17 @@ namespace jpgd {
 		cb->block_num_y = block_num_y;
 		cb->block_len_x = block_len_x;
 		cb->block_len_y = block_len_y;
-		cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_coeff_t);
+		cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
 		cb->pData = (uint8*)alloc(cb->block_size * block_num_x * block_num_y, true);
 		return cb;
 	}
 
-	inline jpgd_block_coeff_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y)
+	inline jpgd_block_t* jpeg_decoder::coeff_buf_getp(coeff_buf* cb, int block_x, int block_y)
 	{
 		if ((block_x >= cb->block_num_x) || (block_y >= cb->block_num_y))
 			stop_decoding(JPGD_DECODE_ERROR);
 
-		return (jpgd_block_coeff_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
+		return (jpgd_block_t*)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
 	}
 
 	// The following methods decode the various types of m_blocks encountered
@@ -2686,7 +2633,7 @@ namespace jpgd {
 	void jpeg_decoder::decode_block_dc_first(jpeg_decoder* pD, int component_id, int block_x, int block_y)
 	{
 		int s, r;
-		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+		jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
 
 		if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0)
 		{
@@ -2699,14 +2646,14 @@ namespace jpgd {
 
 		pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
 
-		p[0] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
+		p[0] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
 	}
 
 	void jpeg_decoder::decode_block_dc_refine(jpeg_decoder* pD, int component_id, int block_x, int block_y)
 	{
 		if (pD->get_bits_no_markers(1))
 		{
-			jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
+			jpgd_block_t* p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
 
 			p[0] |= (1 << pD->m_successive_low);
 		}
@@ -2722,7 +2669,7 @@ namespace jpgd {
 			return;
 		}
 
-		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+		jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
 
 		for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++)
 		{
@@ -2743,7 +2690,7 @@ namespace jpgd {
 				r = pD->get_bits_no_markers(s);
 				s = JPGD_HUFF_EXTEND(r, s);
 
-				p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s << pD->m_successive_low);
+				p[g_ZAG[k]] = static_cast<jpgd_block_t>(s << pD->m_successive_low);
 			}
 			else
 			{
@@ -2776,7 +2723,7 @@ namespace jpgd {
 		//int m1 = (-1) << pD->m_successive_low;
 		int m1 = static_cast<int>((UINT32_MAX << pD->m_successive_low));
 
-		jpgd_block_coeff_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
+		jpgd_block_t* p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
 		if (pD->m_spectral_end > 63)
 			pD->stop_decoding(JPGD_DECODE_ERROR);
 
@@ -2820,7 +2767,7 @@ namespace jpgd {
 
 				do
 				{
-					jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63];
+					jpgd_block_t* this_coef = p + g_ZAG[k & 63];
 
 					if (*this_coef != 0)
 					{
@@ -2829,9 +2776,9 @@ namespace jpgd {
 							if ((*this_coef & p1) == 0)
 							{
 								if (*this_coef >= 0)
-									*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
+									*this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
 								else
-									*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
+									*this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
 							}
 						}
 					}
@@ -2847,7 +2794,7 @@ namespace jpgd {
 
 				if ((s) && (k < 64))
 				{
-					p[g_ZAG[k]] = static_cast<jpgd_block_coeff_t>(s);
+					p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
 				}
 			}
 		}
@@ -2856,7 +2803,7 @@ namespace jpgd {
 		{
 			for (; k <= pD->m_spectral_end; k++)
 			{
-				jpgd_block_coeff_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
+				jpgd_block_t* this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
 
 				if (*this_coef != 0)
 				{
@@ -2865,9 +2812,9 @@ namespace jpgd {
 						if ((*this_coef & p1) == 0)
 						{
 							if (*this_coef >= 0)
-								*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + p1);
+								*this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
 							else
-								*this_coef = static_cast<jpgd_block_coeff_t>(*this_coef + m1);
+								*this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
 						}
 					}
 				}
@@ -3040,7 +2987,7 @@ namespace jpgd {
 
 	jpeg_decoder::jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags)
 	{
-		if (::setjmp(m_jmp_state))
+		if (setjmp(m_jmp_state))
 			return;
 		decode_init(pStream, flags);
 	}
@@ -3053,7 +3000,7 @@ namespace jpgd {
 		if (m_error_code)
 			return JPGD_FAILED;
 
-		if (::setjmp(m_jmp_state))
+		if (setjmp(m_jmp_state))
 			return JPGD_FAILED;
 
 		decode_start();
diff --git a/thirdparty/jpeg-compressor/jpgd.h b/thirdparty/basis_universal/encoder/jpgd.h
similarity index 96%
rename from thirdparty/jpeg-compressor/jpgd.h
rename to thirdparty/basis_universal/encoder/jpgd.h
index 39136696ba5..86a7814cae5 100644
--- a/thirdparty/jpeg-compressor/jpgd.h
+++ b/thirdparty/basis_universal/encoder/jpgd.h
@@ -1,6 +1,5 @@
 // jpgd.h - C++ class for JPEG decompression.
-// Richard Geldreich <richgel99@gmail.com>
-// See jpgd.cpp for license (Public Domain or Apache 2.0).
+// Public domain, Rich Geldreich <richgel99@gmail.com>
 #ifndef JPEG_DECODER_H
 #define JPEG_DECODER_H
 
@@ -119,20 +118,19 @@ namespace jpgd
 	};
 
 	typedef int16 jpgd_quant_t;
-	typedef int16 jpgd_block_coeff_t;
+	typedef int16 jpgd_block_t;
 
 	class jpeg_decoder
 	{
 	public:
 		enum
 		{
-			cFlagBoxChromaFiltering = 1,
-			cFlagDisableSIMD = 2
+			cFlagLinearChromaFiltering = 1
 		};
 
 		// Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
 		// methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
-		jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = 0);
+		jpeg_decoder(jpeg_decoder_stream* pStream, uint32_t flags = cFlagLinearChromaFiltering);
 
 		~jpeg_decoder();
 
@@ -257,7 +255,7 @@ namespace jpgd
 
 		int m_max_mcus_per_col;
 		uint m_last_dc_val[JPGD_MAX_COMPONENTS];
-		jpgd_block_coeff_t* m_pMCU_coefficients;
+		jpgd_block_t* m_pMCU_coefficients;
 		int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
 		uint8* m_pSample_buf;
 		uint8* m_pSample_buf_prev;
@@ -273,13 +271,11 @@ namespace jpgd
 		bool m_ready_flag;
 		bool m_eof_flag;
 		bool m_sample_buf_prev_valid;
-		bool m_has_sse2;
 
 		inline int check_sample_buf_ofs(int ofs) const { assert(ofs >= 0); assert(ofs < m_max_blocks_per_row * 64); return ofs; }
 		void free_all_blocks();
 		JPGD_NORETURN void stop_decoding(jpgd_status status);
 		void* alloc(size_t n, bool zero = false);
-		void* alloc_aligned(size_t nSize, uint32_t align = 16, bool zero = false);
 		void word_clear(void* p, uint16 c, uint n);
 		void prep_in_buffer();
 		void read_dht_marker();
@@ -298,7 +294,7 @@ namespace jpgd
 		void fix_in_buffer();
 		void transform_mcu(int mcu_row);
 		coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
-		inline jpgd_block_coeff_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y);
+		inline jpgd_block_t* coeff_buf_getp(coeff_buf* cb, int block_x, int block_y);
 		void load_next_row();
 		void decode_next_row();
 		void make_huff_table(int index, huff_tables* pH);
diff --git a/thirdparty/basis_universal/patches/0002-external-jpgd.patch b/thirdparty/basis_universal/patches/0002-external-jpgd.patch
deleted file mode 100644
index ac415e3e732..00000000000
--- a/thirdparty/basis_universal/patches/0002-external-jpgd.patch
+++ /dev/null
@@ -1,22 +0,0 @@
-diff --git a/thirdparty/basis_universal/encoder/basisu_enc.cpp b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-index 1f870c5de4..1cc982b134 100644
---- a/thirdparty/basis_universal/encoder/basisu_enc.cpp
-+++ b/thirdparty/basis_universal/encoder/basisu_enc.cpp
-@@ -504,7 +504,7 @@ namespace basisu
- 	bool load_jpg(const char *pFilename, image& img)
- 	{
- 		int width = 0, height = 0, actual_comps = 0;
--		uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
-+		uint8_t *pImage_data = jpgd::decompress_jpeg_image_from_file(pFilename, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
- 		if (!pImage_data)
- 			return false;
- 		
-@@ -524,7 +524,7 @@ namespace basisu
- 		}
- 
- 		int width = 0, height = 0, actual_comps = 0;
--		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagLinearChromaFiltering);
-+		uint8_t* pImage_data = jpgd::decompress_jpeg_image_from_memory(pBuf, (int)buf_size, &width, &height, &actual_comps, 4, jpgd::jpeg_decoder::cFlagBoxChromaFiltering);
- 		if (!pImage_data)
- 			return false;
- 
diff --git a/thirdparty/basis_universal/patches/0003-external-tinyexr.patch b/thirdparty/basis_universal/patches/0002-external-tinyexr.patch
similarity index 100%
rename from thirdparty/basis_universal/patches/0003-external-tinyexr.patch
rename to thirdparty/basis_universal/patches/0002-external-tinyexr.patch
diff --git a/thirdparty/basis_universal/patches/0004-remove-tinydds-qoi.patch b/thirdparty/basis_universal/patches/0003-remove-tinydds-qoi.patch
similarity index 100%
rename from thirdparty/basis_universal/patches/0004-remove-tinydds-qoi.patch
rename to thirdparty/basis_universal/patches/0003-remove-tinydds-qoi.patch
diff --git a/thirdparty/basis_universal/patches/0005-ambiguous-calls.patch b/thirdparty/basis_universal/patches/0004-ambiguous-calls.patch
similarity index 100%
rename from thirdparty/basis_universal/patches/0005-ambiguous-calls.patch
rename to thirdparty/basis_universal/patches/0004-ambiguous-calls.patch
diff --git a/thirdparty/jpeg-compressor/jpgd_idct.h b/thirdparty/jpeg-compressor/jpgd_idct.h
deleted file mode 100644
index 876425a959e..00000000000
--- a/thirdparty/jpeg-compressor/jpgd_idct.h
+++ /dev/null
@@ -1,462 +0,0 @@
-// Copyright 2009 Intel Corporation
-// All Rights Reserved
-//
-// Permission is granted to use, copy, distribute and prepare derivative works of this
-// software for any purpose and without fee, provided, that the above copyright notice
-// and this statement appear in all copies.  Intel makes no representations about the
-// suitability of this software for any purpose.  THIS SOFTWARE IS PROVIDED "AS IS."
-// INTEL SPECIFICALLY DISCLAIMS ALL WARRANTIES, EXPRESS OR IMPLIED, AND ALL LIABILITY,
-// INCLUDING CONSEQUENTIAL AND OTHER INDIRECT DAMAGES, FOR THE USE OF THIS SOFTWARE,
-// INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PROPRIETARY RIGHTS, AND INCLUDING THE
-// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.  Intel does not
-// assume any responsibility for any errors which may appear in this software nor any
-// responsibility to update it.
-//
-// From:
-// https://software.intel.com/sites/default/files/m/d/4/1/d/8/UsingIntelAVXToImplementIDCT-r1_5.pdf
-// https://software.intel.com/file/29048
-//
-// Requires SSE
-//
-#ifdef _MSC_VER
-#include <intrin.h>
-#endif
-#include <immintrin.h>
-
-#ifdef _MSC_VER
-	#define JPGD_SIMD_ALIGN(type, name) __declspec(align(16)) type name
-#else
-	#define JPGD_SIMD_ALIGN(type, name) type name __attribute__((aligned(16)))
-#endif
-
-#define BITS_INV_ACC 4
-#define SHIFT_INV_ROW 16 - BITS_INV_ACC
-#define SHIFT_INV_COL 1 + BITS_INV_ACC
-const short IRND_INV_ROW = 1024 * (6 - BITS_INV_ACC);	//1 << (SHIFT_INV_ROW-1)
-const short IRND_INV_COL = 16 * (BITS_INV_ACC - 3);		// 1 << (SHIFT_INV_COL-1)
-const short IRND_INV_CORR = IRND_INV_COL - 1;			// correction -1.0 and round
-
-JPGD_SIMD_ALIGN(short, shortM128_one_corr[8]) = {1, 1, 1, 1, 1, 1, 1, 1};
-JPGD_SIMD_ALIGN(short, shortM128_round_inv_row[8]) = {IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0, IRND_INV_ROW, 0};
-JPGD_SIMD_ALIGN(short, shortM128_round_inv_col[8]) = {IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL, IRND_INV_COL};
-JPGD_SIMD_ALIGN(short, shortM128_round_inv_corr[8])= {IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR, IRND_INV_CORR};
-JPGD_SIMD_ALIGN(short, shortM128_tg_1_16[8]) = {13036, 13036, 13036, 13036, 13036, 13036, 13036, 13036}; // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_tg_2_16[8]) = {27146, 27146, 27146, 27146, 27146, 27146, 27146, 27146}; // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_tg_3_16[8]) = {-21746, -21746, -21746, -21746, -21746, -21746, -21746, -21746}; // tg * (2<<16) + 0.5
-JPGD_SIMD_ALIGN(short, shortM128_cos_4_16[8]) = {-19195, -19195, -19195, -19195, -19195, -19195, -19195, -19195};// cos * (2<<16) + 0.5
-
-//-----------------------------------------------------------------------------
-// Table for rows 0,4 - constants are multiplied on cos_4_16
-// w15 w14 w11 w10 w07 w06 w03 w02
-// w29 w28 w25 w24 w21 w20 w17 w16
-// w31 w30 w27 w26 w23 w22 w19 w18
-//movq -> w05 w04 w01 w00
-JPGD_SIMD_ALIGN(short, shortM128_tab_i_04[]) = {
-	16384, 21407, 16384, 8867,
-	16384, -8867, 16384, -21407, // w13 w12 w09 w08
-	16384, 8867, -16384, -21407, // w07 w06 w03 w02
-	-16384, 21407, 16384, -8867, // w15 w14 w11 w10
-	22725, 19266, 19266, -4520, // w21 w20 w17 w16
-	12873, -22725, 4520, -12873, // w29 w28 w25 w24
-	12873, 4520, -22725, -12873, // w23 w22 w19 w18
-	4520, 19266, 19266, -22725}; // w31 w30 w27 w26
-
-	// Table for rows 1,7 - constants are multiplied on cos_1_16
-//movq -> w05 w04 w01 w00
-JPGD_SIMD_ALIGN(short, shortM128_tab_i_17[]) = {
-	22725, 29692, 22725, 12299,
-	22725, -12299, 22725, -29692, // w13 w12 w09 w08
-	22725, 12299, -22725, -29692, // w07 w06 w03 w02
-	-22725, 29692, 22725, -12299, // w15 w14 w11 w10
-	31521, 26722, 26722, -6270, // w21 w20 w17 w16
-	17855, -31521, 6270, -17855, // w29 w28 w25 w24
-	17855, 6270, -31521, -17855, // w23 w22 w19 w18
-	6270, 26722, 26722, -31521}; // w31 w30 w27 w26
-
-// Table for rows 2,6 - constants are multiplied on cos_2_16
-//movq -> w05 w04 w01 w00
-JPGD_SIMD_ALIGN(short, shortM128_tab_i_26[]) = {
-	21407, 27969, 21407, 11585,
-	21407, -11585, 21407, -27969, // w13 w12 w09 w08
-	21407, 11585, -21407, -27969, // w07 w06 w03 w02
-	-21407, 27969, 21407, -11585, // w15 w14 w11 w10
-	29692, 25172, 25172, -5906,	// w21 w20 w17 w16
-	16819, -29692, 5906, -16819, // w29 w28 w25 w24
-	16819, 5906, -29692, -16819, // w23 w22 w19 w18
-	5906, 25172, 25172, -29692}; // w31 w30 w27 w26
-// Table for rows 3,5 - constants are multiplied on cos_3_16
-//movq -> w05 w04 w01 w00
-JPGD_SIMD_ALIGN(short, shortM128_tab_i_35[]) = {
-	19266, 25172, 19266, 10426,
-	19266, -10426, 19266, -25172, // w13 w12 w09 w08
-	19266, 10426, -19266, -25172, // w07 w06 w03 w02
-	-19266, 25172, 19266, -10426, // w15 w14 w11 w10
-	26722, 22654, 22654, -5315, // w21 w20 w17 w16
-	15137, -26722, 5315, -15137, // w29 w28 w25 w24
-	15137, 5315, -26722, -15137, // w23 w22 w19 w18
-	5315, 22654, 22654, -26722}; // w31 w30 w27 w26
-
-JPGD_SIMD_ALIGN(short, shortM128_128[8]) = { 128, 128, 128, 128, 128, 128, 128, 128 };
-
-void idctSSEShortU8(const short *pInput, uint8_t * pOutputUB)
-{
-	__m128i r_xmm0, r_xmm4;
-	__m128i r_xmm1, r_xmm2, r_xmm3, r_xmm5, r_xmm6, r_xmm7;
-	__m128i row0, row1, row2, row3, row4, row5, row6, row7;
-	short * pTab_i_04 = shortM128_tab_i_04;
-	short * pTab_i_26 = shortM128_tab_i_26;
-
-	//Get pointers for this input and output
-	pTab_i_04 = shortM128_tab_i_04;
-	pTab_i_26 = shortM128_tab_i_26;
-
-	//Row 1 and Row 3
-	r_xmm0 = _mm_load_si128((__m128i *) pInput);
-	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[2*8]));
-
-	// *** Work on the data in xmm0
-	//low shuffle mask = 0xd8 = 11 01 10 00
-	//get short 2 and short 0 into ls 32-bits
-	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
-
-	// copy short 2 and short 0 to all locations
-	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-		
-	// add to those copies
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
-
-	// shuffle mask = 0x55 = 01 01 01 01
-	// copy short 3 and short 1 to all locations
-	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
-		
-	// high shuffle mask = 0xd8 = 11 01 10 00
-	// get short 6 and short 4 into bit positions 64-95
-	// get short 7 and short 5 into bit positions 96-127
-	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-		
-	// add to short 3 and short 1
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
-		
-	// shuffle mask = 0xaa = 10 10 10 10
-	// copy short 6 and short 4 to all locations
-	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
-		
-	// shuffle mask = 0xaa = 11 11 11 11
-	// copy short 7 and short 5 to all locations
-	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-		
-	// add to short 6 and short 4
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
-		
-	// *** Work on the data in xmm4
-	// high shuffle mask = 0xd8 11 01 10 00
-	// get short 6 and short 4 into bit positions 64-95
-	// get short 7 and short 5 into bit positions 96-127
-	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-		
-	// (xmm0 short 2 and short 0 plus pSi) + some constants
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
-	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
-	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
-	r_xmm2 = r_xmm1;
-	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
-	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
-	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
-	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
-	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
-	r_xmm6 = r_xmm5;
-	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
-	r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
-	row0 = _mm_packs_epi32(r_xmm0, r_xmm2);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
-	r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
-	r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
-	r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
-	row2 = _mm_packs_epi32(r_xmm4, r_xmm6);
-
-	//Row 5 and row 7
-	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[4*8]));
-	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[6*8]));
-
-	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
-	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
-	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
-	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
-	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
-	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
-	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
-	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &shortM128_tab_i_26[0]));
-	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
-	r_xmm2 = r_xmm1;
-	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &shortM128_tab_i_26[8])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
-	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
-	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &shortM128_tab_i_26[16])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
-	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &shortM128_tab_i_26[24]));
-	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
-	r_xmm6 = r_xmm5;
-	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
-	r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
-	row4 = _mm_packs_epi32(r_xmm0, r_xmm2);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
-	r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
-	r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
-	r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
-	row6 = _mm_packs_epi32(r_xmm4, r_xmm6);
-
-	//Row 4 and row 2
-	pTab_i_04 = shortM128_tab_i_35;
-	pTab_i_26 = shortM128_tab_i_17;
-	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[3*8]));
-	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[1*8]));
-
-	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
-	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
-	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
-	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
-	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
-	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
-	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
-	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
-	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
-	r_xmm2 = r_xmm1;
-	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
-	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
-	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
-	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
-	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
-	r_xmm6 = r_xmm5;
-	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
-	r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
-	row3 = _mm_packs_epi32(r_xmm0, r_xmm2);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
-	r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
-	r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
-	r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
-	row1 = _mm_packs_epi32(r_xmm4, r_xmm6);
-
-	//Row 6 and row 8
-	r_xmm0 = _mm_load_si128((__m128i *) (&pInput[5*8]));
-	r_xmm4 = _mm_load_si128((__m128i *) (&pInput[7*8]));
-
-	r_xmm0 = _mm_shufflelo_epi16(r_xmm0, 0xd8);
-	r_xmm1 = _mm_shuffle_epi32(r_xmm0, 0);
-	r_xmm1 = _mm_madd_epi16(r_xmm1, *((__m128i *) pTab_i_04));
-	r_xmm3 = _mm_shuffle_epi32(r_xmm0, 0x55);
-	r_xmm0 = _mm_shufflehi_epi16(r_xmm0, 0xd8);
-	r_xmm3 = _mm_madd_epi16(r_xmm3, *((__m128i *) &pTab_i_04[16]));
-	r_xmm2 = _mm_shuffle_epi32(r_xmm0, 0xaa);
-	r_xmm0 = _mm_shuffle_epi32(r_xmm0, 0xff);
-	r_xmm2 = _mm_madd_epi16(r_xmm2, *((__m128i *) &pTab_i_04[8])); 
-	r_xmm4 = _mm_shufflehi_epi16(r_xmm4, 0xd8);
-	r_xmm1 = _mm_add_epi32(r_xmm1, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_shufflelo_epi16(r_xmm4, 0xd8);
-	r_xmm0 = _mm_madd_epi16(r_xmm0, *((__m128i *) &pTab_i_04[24]));
-	r_xmm5 = _mm_shuffle_epi32(r_xmm4, 0);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm4, 0xaa);
-	r_xmm5 = _mm_madd_epi16(r_xmm5, *((__m128i *) &pTab_i_26[0]));
-	r_xmm1 = _mm_add_epi32(r_xmm1, r_xmm2);
-	r_xmm2 = r_xmm1;
-	r_xmm7 = _mm_shuffle_epi32(r_xmm4, 0x55);
-	r_xmm6 = _mm_madd_epi16(r_xmm6, *((__m128i *) &pTab_i_26[8])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm3);
-	r_xmm4 = _mm_shuffle_epi32(r_xmm4, 0xff);
-	r_xmm2 = _mm_sub_epi32(r_xmm2, r_xmm0);
-	r_xmm7 = _mm_madd_epi16(r_xmm7, *((__m128i *) &pTab_i_26[16])); 
-	r_xmm0 = _mm_add_epi32(r_xmm0, r_xmm1);
-	r_xmm2 = _mm_srai_epi32(r_xmm2, 12);
-	r_xmm5 = _mm_add_epi32(r_xmm5, *((__m128i *) shortM128_round_inv_row));
-	r_xmm4 = _mm_madd_epi16(r_xmm4, *((__m128i *) &pTab_i_26[24]));
-	r_xmm5 = _mm_add_epi32(r_xmm5, r_xmm6);
-	r_xmm6 = r_xmm5;
-	r_xmm0 = _mm_srai_epi32(r_xmm0, 12);
-	r_xmm2 = _mm_shuffle_epi32(r_xmm2, 0x1b);
-	row5 = _mm_packs_epi32(r_xmm0, r_xmm2);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm7);
-	r_xmm6 = _mm_sub_epi32(r_xmm6, r_xmm4);
-	r_xmm4 = _mm_add_epi32(r_xmm4, r_xmm5);
-	r_xmm6 = _mm_srai_epi32(r_xmm6, 12);
-	r_xmm4 = _mm_srai_epi32(r_xmm4, 12);
-	r_xmm6 = _mm_shuffle_epi32(r_xmm6, 0x1b);
-	row7 = _mm_packs_epi32(r_xmm4, r_xmm6);
-
-	r_xmm1 = _mm_load_si128((__m128i *) shortM128_tg_3_16);
-	r_xmm2 = row5;
-	r_xmm3 = row3;
-	r_xmm0 = _mm_mulhi_epi16(row5, r_xmm1);
-
-	r_xmm1 = _mm_mulhi_epi16(r_xmm1, r_xmm3);
-	r_xmm5 = _mm_load_si128((__m128i *) shortM128_tg_1_16);
-	r_xmm6 = row7;
-	r_xmm4 = _mm_mulhi_epi16(row7, r_xmm5);
-
-	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm2);
-	r_xmm5 = _mm_mulhi_epi16(r_xmm5, row1);
-	r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm3);
-	r_xmm7 = row6;
-
-	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm3);
-	r_xmm3 = _mm_load_si128((__m128i *) shortM128_tg_2_16);
-	r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm1);
-	r_xmm7 = _mm_mulhi_epi16(r_xmm7, r_xmm3);
-	r_xmm1 = r_xmm0;
-	r_xmm3 = _mm_mulhi_epi16(r_xmm3, row2);
-	r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm6);
-	r_xmm4 = _mm_adds_epi16(r_xmm4, row1);
-	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm4);
-	r_xmm0 = _mm_adds_epi16(r_xmm0, *((__m128i *) shortM128_one_corr));
-	r_xmm4 = _mm_subs_epi16(r_xmm4, r_xmm1);
-	r_xmm6 = r_xmm5;
-	r_xmm5 = _mm_subs_epi16(r_xmm5, r_xmm2);
-	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_one_corr));
-	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
-
-	//Intermediate results, needed later
-	__m128i temp3, temp7;
-	temp7 = r_xmm0;
-
-	r_xmm1 = r_xmm4;
-	r_xmm0 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
-	r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm5);
-	r_xmm2 = _mm_load_si128((__m128i *) shortM128_cos_4_16);
-	r_xmm2 = _mm_mulhi_epi16(r_xmm2, r_xmm4);
-
-	//Intermediate results, needed later
-	temp3 = r_xmm6;
-
-	r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm5);
-	r_xmm7 = _mm_adds_epi16(r_xmm7, row2);
-	r_xmm3 = _mm_subs_epi16(r_xmm3, row6);
-	r_xmm6 = row0;
-	r_xmm0 = _mm_mulhi_epi16(r_xmm0, r_xmm1);
-	r_xmm5 = row4;
-	r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm6);
-	r_xmm6 = _mm_subs_epi16(r_xmm6, row4);
-	r_xmm4 = _mm_adds_epi16(r_xmm4, r_xmm2);
-
-	r_xmm4 = _mm_or_si128(r_xmm4, *((__m128i *) shortM128_one_corr));
-	r_xmm0 = _mm_adds_epi16(r_xmm0, r_xmm1);
-	r_xmm0 = _mm_or_si128(r_xmm0, *((__m128i *) shortM128_one_corr));
-
-	r_xmm2 = r_xmm5;
-	r_xmm5 = _mm_adds_epi16(r_xmm5, r_xmm7);
-	r_xmm1 = r_xmm6;
-	r_xmm5 = _mm_adds_epi16(r_xmm5, *((__m128i *) shortM128_round_inv_col));
-	r_xmm2 = _mm_subs_epi16(r_xmm2, r_xmm7);
-	r_xmm7 = temp7;
-	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm3);
-	r_xmm6 = _mm_adds_epi16(r_xmm6, *((__m128i *) shortM128_round_inv_col));
-	r_xmm7 = _mm_adds_epi16(r_xmm7, r_xmm5);
-	r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
-	r_xmm1 = _mm_subs_epi16(r_xmm1, r_xmm3);
-	r_xmm1 = _mm_adds_epi16(r_xmm1, *((__m128i *) shortM128_round_inv_corr));
-	r_xmm3 = r_xmm6;
-	r_xmm2 = _mm_adds_epi16(r_xmm2, *((__m128i *) shortM128_round_inv_corr));
-	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm4);
-
-	//Store results for row 0
-	//_mm_store_si128((__m128i *) pOutput, r_xmm7);
-	__m128i r0 = r_xmm7;
-
-	r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
-	r_xmm7 = r_xmm1;
-	r_xmm1 = _mm_adds_epi16(r_xmm1, r_xmm0);
-
-	//Store results for row 1
-	//_mm_store_si128((__m128i *) (&pOutput[1*8]), r_xmm6); 
-	__m128i r1 = r_xmm6;
-
-	r_xmm1 = _mm_srai_epi16(r_xmm1, SHIFT_INV_COL);
-	r_xmm6 = temp3;
-	r_xmm7 = _mm_subs_epi16(r_xmm7, r_xmm0);
-	r_xmm7 = _mm_srai_epi16(r_xmm7, SHIFT_INV_COL);
-
-	//Store results for row 2
-	//_mm_store_si128((__m128i *) (&pOutput[2*8]), r_xmm1); 
-	__m128i r2 = r_xmm1;
-
-	r_xmm5 = _mm_subs_epi16(r_xmm5, temp7); 
-	r_xmm5 = _mm_srai_epi16(r_xmm5, SHIFT_INV_COL);
-
-	//Store results for row 7
-	//_mm_store_si128((__m128i *) (&pOutput[7*8]), r_xmm5); 
-	__m128i r7 = r_xmm5;
-
-	r_xmm3 = _mm_subs_epi16(r_xmm3, r_xmm4);
-	r_xmm6 = _mm_adds_epi16(r_xmm6, r_xmm2);
-	r_xmm2 = _mm_subs_epi16(r_xmm2, temp3); 
-	r_xmm6 = _mm_srai_epi16(r_xmm6, SHIFT_INV_COL);
-	r_xmm2 = _mm_srai_epi16(r_xmm2, SHIFT_INV_COL);
-
-	//Store results for row 3
-	//_mm_store_si128((__m128i *) (&pOutput[3*8]), r_xmm6); 
-	__m128i r3 = r_xmm6;
-
-	r_xmm3 = _mm_srai_epi16(r_xmm3, SHIFT_INV_COL);
-
-	//Store results for rows 4, 5, and 6
-	//_mm_store_si128((__m128i *) (&pOutput[4*8]), r_xmm2);
-	//_mm_store_si128((__m128i *) (&pOutput[5*8]), r_xmm7);
-	//_mm_store_si128((__m128i *) (&pOutput[6*8]), r_xmm3);
-
-	__m128i r4 = r_xmm2;
-	__m128i r5 = r_xmm7;
-	__m128i r6 = r_xmm3;
-
-	r0 = _mm_add_epi16(*(const __m128i *)shortM128_128, r0);
-	r1 = _mm_add_epi16(*(const __m128i *)shortM128_128, r1);
-	r2 = _mm_add_epi16(*(const __m128i *)shortM128_128, r2);
-	r3 = _mm_add_epi16(*(const __m128i *)shortM128_128, r3);
-	r4 = _mm_add_epi16(*(const __m128i *)shortM128_128, r4);
-	r5 = _mm_add_epi16(*(const __m128i *)shortM128_128, r5);
-	r6 = _mm_add_epi16(*(const __m128i *)shortM128_128, r6);
-	r7 = _mm_add_epi16(*(const __m128i *)shortM128_128, r7);
-
-	((__m128i *)pOutputUB)[0] = _mm_packus_epi16(r0, r1);
-	((__m128i *)pOutputUB)[1] = _mm_packus_epi16(r2, r3);
-	((__m128i *)pOutputUB)[2] = _mm_packus_epi16(r4, r5);
-	((__m128i *)pOutputUB)[3] = _mm_packus_epi16(r6, r7);
-}
diff --git a/thirdparty/jpeg-compressor/jpge.cpp b/thirdparty/jpeg-compressor/jpge.cpp
deleted file mode 100644
index bb0c54bbf0d..00000000000
--- a/thirdparty/jpeg-compressor/jpge.cpp
+++ /dev/null
@@ -1,1076 +0,0 @@
-// jpge.cpp - C++ class for JPEG compression. Richard Geldreich <richgel99@gmail.com>
-// Supports grayscale, H1V1, H2V1, and H2V2 chroma subsampling factors, one or two pass Huffman table optimization, libjpeg-style quality 1-100 quality factors.
-// Also supports using luma quantization tables for chroma.
-//
-// Released under two licenses. You are free to choose which license you want:
-// License 1: 
-// Public Domain
-//
-// License 2:
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//    http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-// v1.01, Dec. 18, 2010 - Initial release
-// v1.02, Apr. 6, 2011 - Removed 2x2 ordered dither in H2V1 chroma subsampling method load_block_16_8_8(). (The rounding factor was 2, when it should have been 1. Either way, it wasn't helping.)
-// v1.03, Apr. 16, 2011 - Added support for optimized Huffman code tables, optimized dynamic memory allocation down to only 1 alloc.
-//                        Also from Alex Evans: Added RGBA support, linear memory allocator (no longer needed in v1.03).
-// v1.04, May. 19, 2012: Forgot to set m_pFile ptr to NULL in cfile_stream::close(). Thanks to Owen Kaluza for reporting this bug.
-//                       Code tweaks to fix VS2008 static code analysis warnings (all looked harmless).
-//                       Code review revealed method load_block_16_8_8() (used for the non-default H2V1 sampling mode to downsample chroma) somehow didn't get the rounding factor fix from v1.02.
-// v1.05, March 25, 2020: Added Apache 2.0 alternate license
-
-#include "jpge.h"
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#define JPGE_MAX(a,b) (((a)>(b))?(a):(b))
-#define JPGE_MIN(a,b) (((a)<(b))?(a):(b))
-
-namespace jpge {
-
-	static inline void* jpge_malloc(size_t nSize) { return malloc(nSize); }
-	static inline void jpge_free(void* p) { free(p); }
-
-	// Various JPEG enums and tables.
-	enum { M_SOF0 = 0xC0, M_DHT = 0xC4, M_SOI = 0xD8, M_EOI = 0xD9, M_SOS = 0xDA, M_DQT = 0xDB, M_APP0 = 0xE0 };
-	enum { DC_LUM_CODES = 12, AC_LUM_CODES = 256, DC_CHROMA_CODES = 12, AC_CHROMA_CODES = 256, MAX_HUFF_SYMBOLS = 257, MAX_HUFF_CODESIZE = 32 };
-
-	static uint8 s_zag[64] = { 0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
-	static int16 s_std_lum_quant[64] = { 16,11,12,14,12,10,16,14,13,14,18,17,16,19,24,40,26,24,22,22,24,49,35,37,29,40,58,51,61,60,57,51,56,55,64,72,92,78,64,68,87,69,55,56,80,109,81,87,95,98,103,104,103,62,77,113,121,112,100,120,92,101,103,99 };
-	static int16 s_std_croma_quant[64] = { 17,18,18,24,21,24,47,26,26,47,99,66,56,66,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99,99 };
-
-	// Table from http://www.imagemagick.org/discourse-server/viewtopic.php?f=22&t=20333&p=98008#p98008
-	// This is mozjpeg's default table, in zag order.
-	static int16 s_alt_quant[64] = { 16,16,16,16,17,16,18,20,20,18,25,27,24,27,25,37,34,31,31,34,37,56,40,43,40,43,40,56,85,53,62,53,53,62,53,85,75,91,74,69,74,91,75,135,106,94,94,106,135,156,131,124,131,156,189,169,169,189,238,226,238,311,311,418 };
-
-	static uint8 s_dc_lum_bits[17] = { 0,0,1,5,1,1,1,1,1,1,0,0,0,0,0,0,0 };
-	static uint8 s_dc_lum_val[DC_LUM_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
-	static uint8 s_ac_lum_bits[17] = { 0,0,2,1,3,3,2,4,3,5,5,4,4,0,0,1,0x7d };
-	static uint8 s_ac_lum_val[AC_LUM_CODES] =
-	{
-	  0x01,0x02,0x03,0x00,0x04,0x11,0x05,0x12,0x21,0x31,0x41,0x06,0x13,0x51,0x61,0x07,0x22,0x71,0x14,0x32,0x81,0x91,0xa1,0x08,0x23,0x42,0xb1,0xc1,0x15,0x52,0xd1,0xf0,
-	  0x24,0x33,0x62,0x72,0x82,0x09,0x0a,0x16,0x17,0x18,0x19,0x1a,0x25,0x26,0x27,0x28,0x29,0x2a,0x34,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,0x49,
-	  0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x83,0x84,0x85,0x86,0x87,0x88,0x89,
-	  0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,0xc4,0xc5,
-	  0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe1,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf1,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
-	  0xf9,0xfa
-	};
-	static uint8 s_dc_chroma_bits[17] = { 0,0,3,1,1,1,1,1,1,1,1,1,0,0,0,0,0 };
-	static uint8 s_dc_chroma_val[DC_CHROMA_CODES] = { 0,1,2,3,4,5,6,7,8,9,10,11 };
-	static uint8 s_ac_chroma_bits[17] = { 0,0,2,1,2,4,4,3,4,7,5,4,4,0,1,2,0x77 };
-	static uint8 s_ac_chroma_val[AC_CHROMA_CODES] =
-	{
-	  0x00,0x01,0x02,0x03,0x11,0x04,0x05,0x21,0x31,0x06,0x12,0x41,0x51,0x07,0x61,0x71,0x13,0x22,0x32,0x81,0x08,0x14,0x42,0x91,0xa1,0xb1,0xc1,0x09,0x23,0x33,0x52,0xf0,
-	  0x15,0x62,0x72,0xd1,0x0a,0x16,0x24,0x34,0xe1,0x25,0xf1,0x17,0x18,0x19,0x1a,0x26,0x27,0x28,0x29,0x2a,0x35,0x36,0x37,0x38,0x39,0x3a,0x43,0x44,0x45,0x46,0x47,0x48,
-	  0x49,0x4a,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5a,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0x6a,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7a,0x82,0x83,0x84,0x85,0x86,0x87,
-	  0x88,0x89,0x8a,0x92,0x93,0x94,0x95,0x96,0x97,0x98,0x99,0x9a,0xa2,0xa3,0xa4,0xa5,0xa6,0xa7,0xa8,0xa9,0xaa,0xb2,0xb3,0xb4,0xb5,0xb6,0xb7,0xb8,0xb9,0xba,0xc2,0xc3,
-	  0xc4,0xc5,0xc6,0xc7,0xc8,0xc9,0xca,0xd2,0xd3,0xd4,0xd5,0xd6,0xd7,0xd8,0xd9,0xda,0xe2,0xe3,0xe4,0xe5,0xe6,0xe7,0xe8,0xe9,0xea,0xf2,0xf3,0xf4,0xf5,0xf6,0xf7,0xf8,
-	  0xf9,0xfa
-	};
-
-	// Low-level helper functions.
-	template <class T> inline void clear_obj(T& obj) { memset(&obj, 0, sizeof(obj)); }
-
-	const int YR = 19595, YG = 38470, YB = 7471, CB_R = -11059, CB_G = -21709, CB_B = 32768, CR_R = 32768, CR_G = -27439, CR_B = -5329;
-	static inline uint8 clamp(int i) { if (static_cast<uint>(i) > 255U) { if (i < 0) i = 0; else if (i > 255) i = 255; } return static_cast<uint8>(i); }
-
-	static inline int left_shifti(int val, uint32 bits)
-	{
-		return static_cast<int>(static_cast<uint32>(val) << bits);
-	}
-
-	static void RGB_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
-	{
-		for (; num_pixels; pDst += 3, pSrc += 3, num_pixels--)
-		{
-			const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
-			pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
-			pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
-			pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
-		}
-	}
-
-	static void RGB_to_Y(uint8* pDst, const uint8* pSrc, int num_pixels)
-	{
-		for (; num_pixels; pDst++, pSrc += 3, num_pixels--)
-			pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
-	}
-
-	static void RGBA_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
-	{
-		for (; num_pixels; pDst += 3, pSrc += 4, num_pixels--)
-		{
-			const int r = pSrc[0], g = pSrc[1], b = pSrc[2];
-			pDst[0] = static_cast<uint8>((r * YR + g * YG + b * YB + 32768) >> 16);
-			pDst[1] = clamp(128 + ((r * CB_R + g * CB_G + b * CB_B + 32768) >> 16));
-			pDst[2] = clamp(128 + ((r * CR_R + g * CR_G + b * CR_B + 32768) >> 16));
-		}
-	}
-
-	static void RGBA_to_Y(uint8* pDst, const uint8* pSrc, int num_pixels)
-	{
-		for (; num_pixels; pDst++, pSrc += 4, num_pixels--)
-			pDst[0] = static_cast<uint8>((pSrc[0] * YR + pSrc[1] * YG + pSrc[2] * YB + 32768) >> 16);
-	}
-
-	static void Y_to_YCC(uint8* pDst, const uint8* pSrc, int num_pixels)
-	{
-		for (; num_pixels; pDst += 3, pSrc++, num_pixels--) { pDst[0] = pSrc[0]; pDst[1] = 128; pDst[2] = 128; }
-	}
-
-	// Forward DCT - DCT derived from jfdctint.
-	enum { CONST_BITS = 13, ROW_BITS = 2 };
-#define DCT_DESCALE(x, n) (((x) + (((int32)1) << ((n) - 1))) >> (n))
-#define DCT_MUL(var, c) (static_cast<int16>(var) * static_cast<int32>(c))
-#define DCT1D(s0, s1, s2, s3, s4, s5, s6, s7) \
-  int32 t0 = s0 + s7, t7 = s0 - s7, t1 = s1 + s6, t6 = s1 - s6, t2 = s2 + s5, t5 = s2 - s5, t3 = s3 + s4, t4 = s3 - s4; \
-  int32 t10 = t0 + t3, t13 = t0 - t3, t11 = t1 + t2, t12 = t1 - t2; \
-  int32 u1 = DCT_MUL(t12 + t13, 4433); \
-  s2 = u1 + DCT_MUL(t13, 6270); \
-  s6 = u1 + DCT_MUL(t12, -15137); \
-  u1 = t4 + t7; \
-  int32 u2 = t5 + t6, u3 = t4 + t6, u4 = t5 + t7; \
-  int32 z5 = DCT_MUL(u3 + u4, 9633); \
-  t4 = DCT_MUL(t4, 2446); t5 = DCT_MUL(t5, 16819); \
-  t6 = DCT_MUL(t6, 25172); t7 = DCT_MUL(t7, 12299); \
-  u1 = DCT_MUL(u1, -7373); u2 = DCT_MUL(u2, -20995); \
-  u3 = DCT_MUL(u3, -16069); u4 = DCT_MUL(u4, -3196); \
-  u3 += z5; u4 += z5; \
-  s0 = t10 + t11; s1 = t7 + u1 + u4; s3 = t6 + u2 + u3; s4 = t10 - t11; s5 = t5 + u2 + u4; s7 = t4 + u1 + u3;
-
-	static void DCT2D(int32* p)
-	{
-		int32 c, * q = p;
-		for (c = 7; c >= 0; c--, q += 8)
-		{
-			int32 s0 = q[0], s1 = q[1], s2 = q[2], s3 = q[3], s4 = q[4], s5 = q[5], s6 = q[6], s7 = q[7];
-			DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
-			q[0] = left_shifti(s0, ROW_BITS); q[1] = DCT_DESCALE(s1, CONST_BITS - ROW_BITS); q[2] = DCT_DESCALE(s2, CONST_BITS - ROW_BITS); q[3] = DCT_DESCALE(s3, CONST_BITS - ROW_BITS);
-			q[4] = left_shifti(s4, ROW_BITS); q[5] = DCT_DESCALE(s5, CONST_BITS - ROW_BITS); q[6] = DCT_DESCALE(s6, CONST_BITS - ROW_BITS); q[7] = DCT_DESCALE(s7, CONST_BITS - ROW_BITS);
-		}
-		for (q = p, c = 7; c >= 0; c--, q++)
-		{
-			int32 s0 = q[0 * 8], s1 = q[1 * 8], s2 = q[2 * 8], s3 = q[3 * 8], s4 = q[4 * 8], s5 = q[5 * 8], s6 = q[6 * 8], s7 = q[7 * 8];
-			DCT1D(s0, s1, s2, s3, s4, s5, s6, s7);
-			q[0 * 8] = DCT_DESCALE(s0, ROW_BITS + 3); q[1 * 8] = DCT_DESCALE(s1, CONST_BITS + ROW_BITS + 3); q[2 * 8] = DCT_DESCALE(s2, CONST_BITS + ROW_BITS + 3); q[3 * 8] = DCT_DESCALE(s3, CONST_BITS + ROW_BITS + 3);
-			q[4 * 8] = DCT_DESCALE(s4, ROW_BITS + 3); q[5 * 8] = DCT_DESCALE(s5, CONST_BITS + ROW_BITS + 3); q[6 * 8] = DCT_DESCALE(s6, CONST_BITS + ROW_BITS + 3); q[7 * 8] = DCT_DESCALE(s7, CONST_BITS + ROW_BITS + 3);
-		}
-	}
-
-	struct sym_freq { uint m_key, m_sym_index; };
-
-	// Radix sorts sym_freq[] array by 32-bit key m_key. Returns ptr to sorted values.
-	static inline sym_freq* radix_sort_syms(uint num_syms, sym_freq* pSyms0, sym_freq* pSyms1)
-	{
-		const uint cMaxPasses = 4;
-		uint32 hist[256 * cMaxPasses]; clear_obj(hist);
-		for (uint i = 0; i < num_syms; i++) { uint freq = pSyms0[i].m_key; hist[freq & 0xFF]++; hist[256 + ((freq >> 8) & 0xFF)]++; hist[256 * 2 + ((freq >> 16) & 0xFF)]++; hist[256 * 3 + ((freq >> 24) & 0xFF)]++; }
-		sym_freq* pCur_syms = pSyms0, * pNew_syms = pSyms1;
-		uint total_passes = cMaxPasses; while ((total_passes > 1) && (num_syms == hist[(total_passes - 1) * 256])) total_passes--;
-		for (uint pass_shift = 0, pass = 0; pass < total_passes; pass++, pass_shift += 8)
-		{
-			const uint32* pHist = &hist[pass << 8];
-			uint offsets[256], cur_ofs = 0;
-			for (uint i = 0; i < 256; i++) { offsets[i] = cur_ofs; cur_ofs += pHist[i]; }
-			for (uint i = 0; i < num_syms; i++)
-				pNew_syms[offsets[(pCur_syms[i].m_key >> pass_shift) & 0xFF]++] = pCur_syms[i];
-			sym_freq* t = pCur_syms; pCur_syms = pNew_syms; pNew_syms = t;
-		}
-		return pCur_syms;
-	}
-
-	// calculate_minimum_redundancy() originally written by: Alistair Moffat, alistair@cs.mu.oz.au, Jyrki Katajainen, jyrki@diku.dk, November 1996.
-	static void calculate_minimum_redundancy(sym_freq* A, int n)
-	{
-		int root, leaf, next, avbl, used, dpth;
-		if (n == 0) return; else if (n == 1) { A[0].m_key = 1; return; }
-		A[0].m_key += A[1].m_key; root = 0; leaf = 2;
-		for (next = 1; next < n - 1; next++)
-		{
-			if (leaf >= n || A[root].m_key < A[leaf].m_key) { A[next].m_key = A[root].m_key; A[root++].m_key = next; }
-			else A[next].m_key = A[leaf++].m_key;
-			if (leaf >= n || (root < next && A[root].m_key < A[leaf].m_key)) { A[next].m_key += A[root].m_key; A[root++].m_key = next; }
-			else A[next].m_key += A[leaf++].m_key;
-		}
-		A[n - 2].m_key = 0;
-		for (next = n - 3; next >= 0; next--) A[next].m_key = A[A[next].m_key].m_key + 1;
-		avbl = 1; used = dpth = 0; root = n - 2; next = n - 1;
-		while (avbl > 0)
-		{
-			while (root >= 0 && (int)A[root].m_key == dpth) { used++; root--; }
-			while (avbl > used) { A[next--].m_key = dpth; avbl--; }
-			avbl = 2 * used; dpth++; used = 0;
-		}
-	}
-
-	// Limits canonical Huffman code table's max code size to max_code_size.
-	static void huffman_enforce_max_code_size(int* pNum_codes, int code_list_len, int max_code_size)
-	{
-		if (code_list_len <= 1) return;
-
-		for (int i = max_code_size + 1; i <= MAX_HUFF_CODESIZE; i++) pNum_codes[max_code_size] += pNum_codes[i];
-
-		uint32 total = 0;
-		for (int i = max_code_size; i > 0; i--)
-			total += (((uint32)pNum_codes[i]) << (max_code_size - i));
-
-		while (total != (1UL << max_code_size))
-		{
-			pNum_codes[max_code_size]--;
-			for (int i = max_code_size - 1; i > 0; i--)
-			{
-				if (pNum_codes[i]) { pNum_codes[i]--; pNum_codes[i + 1] += 2; break; }
-			}
-			total--;
-		}
-	}
-
-	// Generates an optimized offman table.
-	void jpeg_encoder::optimize_huffman_table(int table_num, int table_len)
-	{
-		sym_freq syms0[MAX_HUFF_SYMBOLS], syms1[MAX_HUFF_SYMBOLS];
-		syms0[0].m_key = 1; syms0[0].m_sym_index = 0;  // dummy symbol, assures that no valid code contains all 1's
-		int num_used_syms = 1;
-		const uint32* pSym_count = &m_huff_count[table_num][0];
-		for (int i = 0; i < table_len; i++)
-			if (pSym_count[i]) { syms0[num_used_syms].m_key = pSym_count[i]; syms0[num_used_syms++].m_sym_index = i + 1; }
-		sym_freq* pSyms = radix_sort_syms(num_used_syms, syms0, syms1);
-		calculate_minimum_redundancy(pSyms, num_used_syms);
-
-		// Count the # of symbols of each code size.
-		int num_codes[1 + MAX_HUFF_CODESIZE]; clear_obj(num_codes);
-		for (int i = 0; i < num_used_syms; i++)
-			num_codes[pSyms[i].m_key]++;
-
-		const uint JPGE_CODE_SIZE_LIMIT = 16; // the maximum possible size of a JPEG Huffman code (valid range is [9,16] - 9 vs. 8 because of the dummy symbol)
-		huffman_enforce_max_code_size(num_codes, num_used_syms, JPGE_CODE_SIZE_LIMIT);
-
-		// Compute m_huff_bits array, which contains the # of symbols per code size.
-		clear_obj(m_huff_bits[table_num]);
-		for (int i = 1; i <= (int)JPGE_CODE_SIZE_LIMIT; i++)
-			m_huff_bits[table_num][i] = static_cast<uint8>(num_codes[i]);
-
-		// Remove the dummy symbol added above, which must be in largest bucket.
-		for (int i = JPGE_CODE_SIZE_LIMIT; i >= 1; i--)
-		{
-			if (m_huff_bits[table_num][i]) { m_huff_bits[table_num][i]--; break; }
-		}
-
-		// Compute the m_huff_val array, which contains the symbol indices sorted by code size (smallest to largest).
-		for (int i = num_used_syms - 1; i >= 1; i--)
-			m_huff_val[table_num][num_used_syms - 1 - i] = static_cast<uint8>(pSyms[i].m_sym_index - 1);
-	}
-
-	// JPEG marker generation.
-	void jpeg_encoder::emit_byte(uint8 i)
-	{
-		m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_obj(i);
-	}
-
-	void jpeg_encoder::emit_word(uint i)
-	{
-		emit_byte(uint8(i >> 8)); emit_byte(uint8(i & 0xFF));
-	}
-
-	void jpeg_encoder::emit_marker(int marker)
-	{
-		emit_byte(uint8(0xFF)); emit_byte(uint8(marker));
-	}
-
-	// Emit JFIF marker
-	void jpeg_encoder::emit_jfif_app0()
-	{
-		emit_marker(M_APP0);
-		emit_word(2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1);
-		emit_byte(0x4A); emit_byte(0x46); emit_byte(0x49); emit_byte(0x46); /* Identifier: ASCII "JFIF" */
-		emit_byte(0);
-		emit_byte(1);      /* Major version */
-		emit_byte(1);      /* Minor version */
-		emit_byte(0);      /* Density unit */
-		emit_word(1);
-		emit_word(1);
-		emit_byte(0);      /* No thumbnail image */
-		emit_byte(0);
-	}
-
-	// Emit quantization tables
-	void jpeg_encoder::emit_dqt()
-	{
-		for (int i = 0; i < ((m_num_components == 3) ? 2 : 1); i++)
-		{
-			emit_marker(M_DQT);
-			emit_word(64 + 1 + 2);
-			emit_byte(static_cast<uint8>(i));
-			for (int j = 0; j < 64; j++)
-				emit_byte(static_cast<uint8>(m_quantization_tables[i][j]));
-		}
-	}
-
-	// Emit start of frame marker
-	void jpeg_encoder::emit_sof()
-	{
-		emit_marker(M_SOF0);                           /* baseline */
-		emit_word(3 * m_num_components + 2 + 5 + 1);
-		emit_byte(8);                                  /* precision */
-		emit_word(m_image_y);
-		emit_word(m_image_x);
-		emit_byte(m_num_components);
-		for (int i = 0; i < m_num_components; i++)
-		{
-			emit_byte(static_cast<uint8>(i + 1));                                   /* component ID     */
-			emit_byte((m_comp_h_samp[i] << 4) + m_comp_v_samp[i]);  /* h and v sampling */
-			emit_byte(i > 0);                                   /* quant. table num */
-		}
-	}
-
-	// Emit Huffman table.
-	void jpeg_encoder::emit_dht(uint8* bits, uint8* val, int index, bool ac_flag)
-	{
-		emit_marker(M_DHT);
-
-		int length = 0;
-		for (int i = 1; i <= 16; i++)
-			length += bits[i];
-
-		emit_word(length + 2 + 1 + 16);
-		emit_byte(static_cast<uint8>(index + (ac_flag << 4)));
-
-		for (int i = 1; i <= 16; i++)
-			emit_byte(bits[i]);
-
-		for (int i = 0; i < length; i++)
-			emit_byte(val[i]);
-	}
-
-	// Emit all Huffman tables.
-	void jpeg_encoder::emit_dhts()
-	{
-		emit_dht(m_huff_bits[0 + 0], m_huff_val[0 + 0], 0, false);
-		emit_dht(m_huff_bits[2 + 0], m_huff_val[2 + 0], 0, true);
-		if (m_num_components == 3)
-		{
-			emit_dht(m_huff_bits[0 + 1], m_huff_val[0 + 1], 1, false);
-			emit_dht(m_huff_bits[2 + 1], m_huff_val[2 + 1], 1, true);
-		}
-	}
-
-	// emit start of scan
-	void jpeg_encoder::emit_sos()
-	{
-		emit_marker(M_SOS);
-		emit_word(2 * m_num_components + 2 + 1 + 3);
-		emit_byte(m_num_components);
-		for (int i = 0; i < m_num_components; i++)
-		{
-			emit_byte(static_cast<uint8>(i + 1));
-			if (i == 0)
-				emit_byte((0 << 4) + 0);
-			else
-				emit_byte((1 << 4) + 1);
-		}
-		emit_byte(0);     /* spectral selection */
-		emit_byte(63);
-		emit_byte(0);
-	}
-
-	// Emit all markers at beginning of image file.
-	void jpeg_encoder::emit_markers()
-	{
-		emit_marker(M_SOI);
-		emit_jfif_app0();
-		emit_dqt();
-		emit_sof();
-		emit_dhts();
-		emit_sos();
-	}
-
-	// Compute the actual canonical Huffman codes/code sizes given the JPEG huff bits and val arrays.
-	void jpeg_encoder::compute_huffman_table(uint* codes, uint8* code_sizes, uint8* bits, uint8* val)
-	{
-		int i, l, last_p, si;
-		uint8 huff_size[257];
-		uint huff_code[257];
-		uint code;
-
-		int p = 0;
-		for (l = 1; l <= 16; l++)
-			for (i = 1; i <= bits[l]; i++)
-				huff_size[p++] = (char)l;
-
-		huff_size[p] = 0; last_p = p; // write sentinel
-
-		code = 0; si = huff_size[0]; p = 0;
-
-		while (huff_size[p])
-		{
-			while (huff_size[p] == si)
-				huff_code[p++] = code++;
-			code <<= 1;
-			si++;
-		}
-
-		memset(codes, 0, sizeof(codes[0]) * 256);
-		memset(code_sizes, 0, sizeof(code_sizes[0]) * 256);
-		for (p = 0; p < last_p; p++)
-		{
-			codes[val[p]] = huff_code[p];
-			code_sizes[val[p]] = huff_size[p];
-		}
-	}
-
-	// Quantization table generation.
-	void jpeg_encoder::compute_quant_table(int32* pDst, int16* pSrc)
-	{
-		int32 q;
-		if (m_params.m_quality < 50)
-			q = 5000 / m_params.m_quality;
-		else
-			q = 200 - m_params.m_quality * 2;
-		for (int i = 0; i < 64; i++)
-		{
-			int32 j = *pSrc++; j = (j * q + 50L) / 100L;
-			*pDst++ = JPGE_MIN(JPGE_MAX(j, 1), 255);
-		}
-	}
-
-	// Higher-level methods.
-	void jpeg_encoder::first_pass_init()
-	{
-		m_bit_buffer = 0; m_bits_in = 0;
-		memset(m_last_dc_val, 0, 3 * sizeof(m_last_dc_val[0]));
-		m_mcu_y_ofs = 0;
-		m_pass_num = 1;
-	}
-
-	bool jpeg_encoder::second_pass_init()
-	{
-		compute_huffman_table(&m_huff_codes[0 + 0][0], &m_huff_code_sizes[0 + 0][0], m_huff_bits[0 + 0], m_huff_val[0 + 0]);
-		compute_huffman_table(&m_huff_codes[2 + 0][0], &m_huff_code_sizes[2 + 0][0], m_huff_bits[2 + 0], m_huff_val[2 + 0]);
-		if (m_num_components > 1)
-		{
-			compute_huffman_table(&m_huff_codes[0 + 1][0], &m_huff_code_sizes[0 + 1][0], m_huff_bits[0 + 1], m_huff_val[0 + 1]);
-			compute_huffman_table(&m_huff_codes[2 + 1][0], &m_huff_code_sizes[2 + 1][0], m_huff_bits[2 + 1], m_huff_val[2 + 1]);
-		}
-		first_pass_init();
-		emit_markers();
-		m_pass_num = 2;
-		return true;
-	}
-
-	bool jpeg_encoder::jpg_open(int p_x_res, int p_y_res, int src_channels)
-	{
-		m_num_components = 3;
-		switch (m_params.m_subsampling)
-		{
-		case Y_ONLY:
-		{
-			m_num_components = 1;
-			m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
-			m_mcu_x = 8; m_mcu_y = 8;
-			break;
-		}
-		case H1V1:
-		{
-			m_comp_h_samp[0] = 1; m_comp_v_samp[0] = 1;
-			m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
-			m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
-			m_mcu_x = 8; m_mcu_y = 8;
-			break;
-		}
-		case H2V1:
-		{
-			m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 1;
-			m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
-			m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
-			m_mcu_x = 16; m_mcu_y = 8;
-			break;
-		}
-		case H2V2:
-		{
-			m_comp_h_samp[0] = 2; m_comp_v_samp[0] = 2;
-			m_comp_h_samp[1] = 1; m_comp_v_samp[1] = 1;
-			m_comp_h_samp[2] = 1; m_comp_v_samp[2] = 1;
-			m_mcu_x = 16; m_mcu_y = 16;
-		}
-		}
-
-		m_image_x = p_x_res; m_image_y = p_y_res;
-		m_image_bpp = src_channels;
-		m_image_bpl = m_image_x * src_channels;
-		m_image_x_mcu = (m_image_x + m_mcu_x - 1) & (~(m_mcu_x - 1));
-		m_image_y_mcu = (m_image_y + m_mcu_y - 1) & (~(m_mcu_y - 1));
-		m_image_bpl_xlt = m_image_x * m_num_components;
-		m_image_bpl_mcu = m_image_x_mcu * m_num_components;
-		m_mcus_per_row = m_image_x_mcu / m_mcu_x;
-
-		if ((m_mcu_lines[0] = static_cast<uint8*>(jpge_malloc(m_image_bpl_mcu * m_mcu_y))) == NULL) return false;
-		for (int i = 1; i < m_mcu_y; i++)
-			m_mcu_lines[i] = m_mcu_lines[i - 1] + m_image_bpl_mcu;
-
-		if (m_params.m_use_std_tables)
-		{
-			compute_quant_table(m_quantization_tables[0], s_std_lum_quant);
-			compute_quant_table(m_quantization_tables[1], m_params.m_no_chroma_discrim_flag ? s_std_lum_quant : s_std_croma_quant);
-		}
-		else
-		{
-			compute_quant_table(m_quantization_tables[0], s_alt_quant);
-			memcpy(m_quantization_tables[1], m_quantization_tables[0], sizeof(m_quantization_tables[1]));
-		}
-
-		m_out_buf_left = JPGE_OUT_BUF_SIZE;
-		m_pOut_buf = m_out_buf;
-
-		if (m_params.m_two_pass_flag)
-		{
-			clear_obj(m_huff_count);
-			first_pass_init();
-		}
-		else
-		{
-			memcpy(m_huff_bits[0 + 0], s_dc_lum_bits, 17);    memcpy(m_huff_val[0 + 0], s_dc_lum_val, DC_LUM_CODES);
-			memcpy(m_huff_bits[2 + 0], s_ac_lum_bits, 17);    memcpy(m_huff_val[2 + 0], s_ac_lum_val, AC_LUM_CODES);
-			memcpy(m_huff_bits[0 + 1], s_dc_chroma_bits, 17); memcpy(m_huff_val[0 + 1], s_dc_chroma_val, DC_CHROMA_CODES);
-			memcpy(m_huff_bits[2 + 1], s_ac_chroma_bits, 17); memcpy(m_huff_val[2 + 1], s_ac_chroma_val, AC_CHROMA_CODES);
-			if (!second_pass_init()) return false;   // in effect, skip over the first pass
-		}
-		return m_all_stream_writes_succeeded;
-	}
-
-	void jpeg_encoder::load_block_8_8_grey(int x)
-	{
-		uint8* pSrc;
-		sample_array_t* pDst = m_sample_array;
-		x <<= 3;
-		for (int i = 0; i < 8; i++, pDst += 8)
-		{
-			pSrc = m_mcu_lines[i] + x;
-			pDst[0] = pSrc[0] - 128; pDst[1] = pSrc[1] - 128; pDst[2] = pSrc[2] - 128; pDst[3] = pSrc[3] - 128;
-			pDst[4] = pSrc[4] - 128; pDst[5] = pSrc[5] - 128; pDst[6] = pSrc[6] - 128; pDst[7] = pSrc[7] - 128;
-		}
-	}
-
-	void jpeg_encoder::load_block_8_8(int x, int y, int c)
-	{
-		uint8* pSrc;
-		sample_array_t* pDst = m_sample_array;
-		x = (x * (8 * 3)) + c;
-		y <<= 3;
-		for (int i = 0; i < 8; i++, pDst += 8)
-		{
-			pSrc = m_mcu_lines[y + i] + x;
-			pDst[0] = pSrc[0 * 3] - 128; pDst[1] = pSrc[1 * 3] - 128; pDst[2] = pSrc[2 * 3] - 128; pDst[3] = pSrc[3 * 3] - 128;
-			pDst[4] = pSrc[4 * 3] - 128; pDst[5] = pSrc[5 * 3] - 128; pDst[6] = pSrc[6 * 3] - 128; pDst[7] = pSrc[7 * 3] - 128;
-		}
-	}
-
-	void jpeg_encoder::load_block_16_8(int x, int c)
-	{
-		uint8* pSrc1, * pSrc2;
-		sample_array_t* pDst = m_sample_array;
-		x = (x * (16 * 3)) + c;
-		for (int i = 0; i < 16; i += 2, pDst += 8)
-		{
-			pSrc1 = m_mcu_lines[i + 0] + x;
-			pSrc2 = m_mcu_lines[i + 1] + x;
-			pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3] + pSrc2[0 * 3] + pSrc2[1 * 3] + 2) >> 2) - 128; pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3] + pSrc2[2 * 3] + pSrc2[3 * 3] + 2) >> 2) - 128;
-			pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3] + pSrc2[4 * 3] + pSrc2[5 * 3] + 2) >> 2) - 128; pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3] + pSrc2[6 * 3] + pSrc2[7 * 3] + 2) >> 2) - 128;
-			pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3] + pSrc2[8 * 3] + pSrc2[9 * 3] + 2) >> 2) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + pSrc2[10 * 3] + pSrc2[11 * 3] + 2) >> 2) - 128;
-			pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + pSrc2[12 * 3] + pSrc2[13 * 3] + 2) >> 2) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + pSrc2[14 * 3] + pSrc2[15 * 3] + 2) >> 2) - 128;
-		}
-	}
-
-	void jpeg_encoder::load_block_16_8_8(int x, int c)
-	{
-		uint8* pSrc1;
-		sample_array_t* pDst = m_sample_array;
-		x = (x * (16 * 3)) + c;
-		for (int i = 0; i < 8; i++, pDst += 8)
-		{
-			pSrc1 = m_mcu_lines[i + 0] + x;
-			pDst[0] = ((pSrc1[0 * 3] + pSrc1[1 * 3] + 1) >> 1) - 128; pDst[1] = ((pSrc1[2 * 3] + pSrc1[3 * 3] + 1) >> 1) - 128;
-			pDst[2] = ((pSrc1[4 * 3] + pSrc1[5 * 3] + 1) >> 1) - 128; pDst[3] = ((pSrc1[6 * 3] + pSrc1[7 * 3] + 1) >> 1) - 128;
-			pDst[4] = ((pSrc1[8 * 3] + pSrc1[9 * 3] + 1) >> 1) - 128; pDst[5] = ((pSrc1[10 * 3] + pSrc1[11 * 3] + 1) >> 1) - 128;
-			pDst[6] = ((pSrc1[12 * 3] + pSrc1[13 * 3] + 1) >> 1) - 128; pDst[7] = ((pSrc1[14 * 3] + pSrc1[15 * 3] + 1) >> 1) - 128;
-		}
-	}
-
-	void jpeg_encoder::load_quantized_coefficients(int component_num)
-	{
-		int32* q = m_quantization_tables[component_num > 0];
-		int16* pDst = m_coefficient_array;
-		for (int i = 0; i < 64; i++)
-		{
-			sample_array_t j = m_sample_array[s_zag[i]];
-			if (j < 0)
-			{
-				if ((j = -j + (*q >> 1)) < *q)
-					*pDst++ = 0;
-				else
-					*pDst++ = static_cast<int16>(-(j / *q));
-			}
-			else
-			{
-				if ((j = j + (*q >> 1)) < *q)
-					*pDst++ = 0;
-				else
-					*pDst++ = static_cast<int16>((j / *q));
-			}
-			q++;
-		}
-	}
-
-	void jpeg_encoder::flush_output_buffer()
-	{
-		if (m_out_buf_left != JPGE_OUT_BUF_SIZE)
-			m_all_stream_writes_succeeded = m_all_stream_writes_succeeded && m_pStream->put_buf(m_out_buf, JPGE_OUT_BUF_SIZE - m_out_buf_left);
-		m_pOut_buf = m_out_buf;
-		m_out_buf_left = JPGE_OUT_BUF_SIZE;
-	}
-
-	void jpeg_encoder::put_bits(uint bits, uint len)
-	{
-		m_bit_buffer |= ((uint32)bits << (24 - (m_bits_in += len)));
-		while (m_bits_in >= 8)
-		{
-			uint8 c;
-#define JPGE_PUT_BYTE(c) { *m_pOut_buf++ = (c); if (--m_out_buf_left == 0) flush_output_buffer(); }
-			JPGE_PUT_BYTE(c = (uint8)((m_bit_buffer >> 16) & 0xFF));
-			if (c == 0xFF) JPGE_PUT_BYTE(0);
-			m_bit_buffer <<= 8;
-			m_bits_in -= 8;
-		}
-	}
-
-	void jpeg_encoder::code_coefficients_pass_one(int component_num)
-	{
-		if (component_num >= 3) return; // just to shut up static analysis
-		int i, run_len, nbits, temp1;
-		int16* src = m_coefficient_array;
-		uint32* dc_count = component_num ? m_huff_count[0 + 1] : m_huff_count[0 + 0], * ac_count = component_num ? m_huff_count[2 + 1] : m_huff_count[2 + 0];
-
-		temp1 = src[0] - m_last_dc_val[component_num];
-		m_last_dc_val[component_num] = src[0];
-		if (temp1 < 0) temp1 = -temp1;
-
-		nbits = 0;
-		while (temp1)
-		{
-			nbits++; temp1 >>= 1;
-		}
-
-		dc_count[nbits]++;
-		for (run_len = 0, i = 1; i < 64; i++)
-		{
-			if ((temp1 = m_coefficient_array[i]) == 0)
-				run_len++;
-			else
-			{
-				while (run_len >= 16)
-				{
-					ac_count[0xF0]++;
-					run_len -= 16;
-				}
-				if (temp1 < 0) temp1 = -temp1;
-				nbits = 1;
-				while (temp1 >>= 1) nbits++;
-				ac_count[(run_len << 4) + nbits]++;
-				run_len = 0;
-			}
-		}
-		if (run_len) ac_count[0]++;
-	}
-
-	void jpeg_encoder::code_coefficients_pass_two(int component_num)
-	{
-		int i, j, run_len, nbits, temp1, temp2;
-		int16* pSrc = m_coefficient_array;
-		uint* codes[2];
-		uint8* code_sizes[2];
-
-		if (component_num == 0)
-		{
-			codes[0] = m_huff_codes[0 + 0]; codes[1] = m_huff_codes[2 + 0];
-			code_sizes[0] = m_huff_code_sizes[0 + 0]; code_sizes[1] = m_huff_code_sizes[2 + 0];
-		}
-		else
-		{
-			codes[0] = m_huff_codes[0 + 1]; codes[1] = m_huff_codes[2 + 1];
-			code_sizes[0] = m_huff_code_sizes[0 + 1]; code_sizes[1] = m_huff_code_sizes[2 + 1];
-		}
-
-		temp1 = temp2 = pSrc[0] - m_last_dc_val[component_num];
-		m_last_dc_val[component_num] = pSrc[0];
-
-		if (temp1 < 0)
-		{
-			temp1 = -temp1; temp2--;
-		}
-
-		nbits = 0;
-		while (temp1)
-		{
-			nbits++; temp1 >>= 1;
-		}
-
-		put_bits(codes[0][nbits], code_sizes[0][nbits]);
-		if (nbits) put_bits(temp2 & ((1 << nbits) - 1), nbits);
-
-		for (run_len = 0, i = 1; i < 64; i++)
-		{
-			if ((temp1 = m_coefficient_array[i]) == 0)
-				run_len++;
-			else
-			{
-				while (run_len >= 16)
-				{
-					put_bits(codes[1][0xF0], code_sizes[1][0xF0]);
-					run_len -= 16;
-				}
-				if ((temp2 = temp1) < 0)
-				{
-					temp1 = -temp1;
-					temp2--;
-				}
-				nbits = 1;
-				while (temp1 >>= 1)
-					nbits++;
-				j = (run_len << 4) + nbits;
-				put_bits(codes[1][j], code_sizes[1][j]);
-				put_bits(temp2 & ((1 << nbits) - 1), nbits);
-				run_len = 0;
-			}
-		}
-		if (run_len)
-			put_bits(codes[1][0], code_sizes[1][0]);
-	}
-
-	void jpeg_encoder::code_block(int component_num)
-	{
-		DCT2D(m_sample_array);
-		load_quantized_coefficients(component_num);
-		if (m_pass_num == 1)
-			code_coefficients_pass_one(component_num);
-		else
-			code_coefficients_pass_two(component_num);
-	}
-
-	void jpeg_encoder::process_mcu_row()
-	{
-		if (m_num_components == 1)
-		{
-			for (int i = 0; i < m_mcus_per_row; i++)
-			{
-				load_block_8_8_grey(i); code_block(0);
-			}
-		}
-		else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1))
-		{
-			for (int i = 0; i < m_mcus_per_row; i++)
-			{
-				load_block_8_8(i, 0, 0); code_block(0); load_block_8_8(i, 0, 1); code_block(1); load_block_8_8(i, 0, 2); code_block(2);
-			}
-		}
-		else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1))
-		{
-			for (int i = 0; i < m_mcus_per_row; i++)
-			{
-				load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
-				load_block_16_8_8(i, 1); code_block(1); load_block_16_8_8(i, 2); code_block(2);
-			}
-		}
-		else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2))
-		{
-			for (int i = 0; i < m_mcus_per_row; i++)
-			{
-				load_block_8_8(i * 2 + 0, 0, 0); code_block(0); load_block_8_8(i * 2 + 1, 0, 0); code_block(0);
-				load_block_8_8(i * 2 + 0, 1, 0); code_block(0); load_block_8_8(i * 2 + 1, 1, 0); code_block(0);
-				load_block_16_8(i, 1); code_block(1); load_block_16_8(i, 2); code_block(2);
-			}
-		}
-	}
-
-	bool jpeg_encoder::terminate_pass_one()
-	{
-		optimize_huffman_table(0 + 0, DC_LUM_CODES); optimize_huffman_table(2 + 0, AC_LUM_CODES);
-		if (m_num_components > 1)
-		{
-			optimize_huffman_table(0 + 1, DC_CHROMA_CODES); optimize_huffman_table(2 + 1, AC_CHROMA_CODES);
-		}
-		return second_pass_init();
-	}
-
-	bool jpeg_encoder::terminate_pass_two()
-	{
-		put_bits(0x7F, 7);
-		flush_output_buffer();
-		emit_marker(M_EOI);
-		m_pass_num++; // purposely bump up m_pass_num, for debugging
-		return true;
-	}
-
-	bool jpeg_encoder::process_end_of_image()
-	{
-		if (m_mcu_y_ofs)
-		{
-			if (m_mcu_y_ofs < 16) // check here just to shut up static analysis
-			{
-				for (int i = m_mcu_y_ofs; i < m_mcu_y; i++)
-					memcpy(m_mcu_lines[i], m_mcu_lines[m_mcu_y_ofs - 1], m_image_bpl_mcu);
-			}
-
-			process_mcu_row();
-		}
-
-		if (m_pass_num == 1)
-			return terminate_pass_one();
-		else
-			return terminate_pass_two();
-	}
-
-	void jpeg_encoder::load_mcu(const void* pSrc)
-	{
-		const uint8* Psrc = reinterpret_cast<const uint8*>(pSrc);
-
-		uint8* pDst = m_mcu_lines[m_mcu_y_ofs]; // OK to write up to m_image_bpl_xlt bytes to pDst
-
-		if (m_num_components == 1)
-		{
-			if (m_image_bpp == 4)
-				RGBA_to_Y(pDst, Psrc, m_image_x);
-			else if (m_image_bpp == 3)
-				RGB_to_Y(pDst, Psrc, m_image_x);
-			else
-				memcpy(pDst, Psrc, m_image_x);
-		}
-		else
-		{
-			if (m_image_bpp == 4)
-				RGBA_to_YCC(pDst, Psrc, m_image_x);
-			else if (m_image_bpp == 3)
-				RGB_to_YCC(pDst, Psrc, m_image_x);
-			else
-				Y_to_YCC(pDst, Psrc, m_image_x);
-		}
-
-		// Possibly duplicate pixels at end of scanline if not a multiple of 8 or 16
-		if (m_num_components == 1)
-			memset(m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt, pDst[m_image_bpl_xlt - 1], m_image_x_mcu - m_image_x);
-		else
-		{
-			const uint8 y = pDst[m_image_bpl_xlt - 3 + 0], cb = pDst[m_image_bpl_xlt - 3 + 1], cr = pDst[m_image_bpl_xlt - 3 + 2];
-			uint8* q = m_mcu_lines[m_mcu_y_ofs] + m_image_bpl_xlt;
-			for (int i = m_image_x; i < m_image_x_mcu; i++)
-			{
-				*q++ = y; *q++ = cb; *q++ = cr;
-			}
-		}
-
-		if (++m_mcu_y_ofs == m_mcu_y)
-		{
-			process_mcu_row();
-			m_mcu_y_ofs = 0;
-		}
-	}
-
-	void jpeg_encoder::clear()
-	{
-		m_mcu_lines[0] = NULL;
-		m_pass_num = 0;
-		m_all_stream_writes_succeeded = true;
-	}
-
-	jpeg_encoder::jpeg_encoder()
-	{
-		clear();
-	}
-
-	jpeg_encoder::~jpeg_encoder()
-	{
-		deinit();
-	}
-
-	bool jpeg_encoder::init(output_stream* pStream, int width, int height, int src_channels, const params& comp_params)
-	{
-		deinit();
-		if (((!pStream) || (width < 1) || (height < 1)) || ((src_channels != 1) && (src_channels != 3) && (src_channels != 4)) || (!comp_params.check())) return false;
-		m_pStream = pStream;
-		m_params = comp_params;
-		return jpg_open(width, height, src_channels);
-	}
-
-	void jpeg_encoder::deinit()
-	{
-		jpge_free(m_mcu_lines[0]);
-		clear();
-	}
-
-	bool jpeg_encoder::process_scanline(const void* pScanline)
-	{
-		if ((m_pass_num < 1) || (m_pass_num > 2)) return false;
-		if (m_all_stream_writes_succeeded)
-		{
-			if (!pScanline)
-			{
-				if (!process_end_of_image()) return false;
-			}
-			else
-			{
-				load_mcu(pScanline);
-			}
-		}
-		return m_all_stream_writes_succeeded;
-	}
-
-	// Higher level wrappers/examples (optional).
-
-	class cfile_stream : public output_stream
-	{
-		cfile_stream(const cfile_stream&);
-		cfile_stream& operator= (const cfile_stream&);
-
-		FILE* m_pFile;
-		bool m_bStatus;
-
-	public:
-		cfile_stream() : m_pFile(NULL), m_bStatus(false) { }
-
-		virtual ~cfile_stream()
-		{
-			close();
-		}
-
-		bool open(const char* pFilename)
-		{
-			close();
-			m_pFile = fopen(pFilename, "wb");
-			m_bStatus = (m_pFile != NULL);
-			return m_bStatus;
-		}
-
-		bool close()
-		{
-			if (m_pFile)
-			{
-				if (fclose(m_pFile) == EOF)
-				{
-					m_bStatus = false;
-				}
-				m_pFile = NULL;
-			}
-			return m_bStatus;
-		}
-
-		virtual bool put_buf(const void* pBuf, int len)
-		{
-			m_bStatus = m_bStatus && (fwrite(pBuf, len, 1, m_pFile) == 1);
-			return m_bStatus;
-		}
-
-		uint get_size() const
-		{
-			return m_pFile ? ftell(m_pFile) : 0;
-		}
-	};
-
-	// Writes JPEG image to file.
-	bool compress_image_to_jpeg_file(const char* pFilename, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params)
-	{
-		cfile_stream dst_stream;
-		if (!dst_stream.open(pFilename))
-			return false;
-
-		jpge::jpeg_encoder dst_image;
-		if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
-			return false;
-
-		for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
-		{
-			for (int i = 0; i < height; i++)
-			{
-				const uint8* pBuf = pImage_data + i * width * num_channels;
-				if (!dst_image.process_scanline(pBuf))
-					return false;
-			}
-			if (!dst_image.process_scanline(NULL))
-				return false;
-		}
-
-		dst_image.deinit();
-
-		return dst_stream.close();
-	}
-
-	class memory_stream : public output_stream
-	{
-		memory_stream(const memory_stream&);
-		memory_stream& operator= (const memory_stream&);
-
-		uint8* m_pBuf;
-		uint m_buf_size, m_buf_ofs;
-
-	public:
-		memory_stream(void* pBuf, uint buf_size) : m_pBuf(static_cast<uint8*>(pBuf)), m_buf_size(buf_size), m_buf_ofs(0) { }
-
-		virtual ~memory_stream() { }
-
-		virtual bool put_buf(const void* pBuf, int len)
-		{
-			uint buf_remaining = m_buf_size - m_buf_ofs;
-			if ((uint)len > buf_remaining)
-				return false;
-			memcpy(m_pBuf + m_buf_ofs, pBuf, len);
-			m_buf_ofs += len;
-			return true;
-		}
-
-		uint get_size() const
-		{
-			return m_buf_ofs;
-		}
-	};
-
-	bool compress_image_to_jpeg_file_in_memory(void* pDstBuf, int& buf_size, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params)
-	{
-		if ((!pDstBuf) || (!buf_size))
-			return false;
-
-		memory_stream dst_stream(pDstBuf, buf_size);
-
-		buf_size = 0;
-
-		jpge::jpeg_encoder dst_image;
-		if (!dst_image.init(&dst_stream, width, height, num_channels, comp_params))
-			return false;
-
-		for (uint pass_index = 0; pass_index < dst_image.get_total_passes(); pass_index++)
-		{
-			for (int i = 0; i < height; i++)
-			{
-				const uint8* pScanline = pImage_data + i * width * num_channels;
-				if (!dst_image.process_scanline(pScanline))
-					return false;
-			}
-			if (!dst_image.process_scanline(NULL))
-				return false;
-		}
-
-		dst_image.deinit();
-
-		buf_size = dst_stream.get_size();
-		return true;
-	}
-
-} // namespace jpge
-
diff --git a/thirdparty/jpeg-compressor/jpge.h b/thirdparty/jpeg-compressor/jpge.h
deleted file mode 100644
index d10510e553f..00000000000
--- a/thirdparty/jpeg-compressor/jpge.h
+++ /dev/null
@@ -1,174 +0,0 @@
-// jpge.h - C++ class for JPEG compression.
-// Public Domain or Apache 2.0, Richard Geldreich <richgel99@gmail.com>
-// Alex Evans: Added RGBA support, linear memory allocator.
-#ifndef JPEG_ENCODER_H
-#define JPEG_ENCODER_H
-
-namespace jpge
-{
-	typedef unsigned char  uint8;
-	typedef signed short   int16;
-	typedef signed int     int32;
-	typedef unsigned short uint16;
-	typedef unsigned int   uint32;
-	typedef unsigned int   uint;
-
-	// JPEG chroma subsampling factors. Y_ONLY (grayscale images) and H2V2 (color images) are the most common.
-	enum subsampling_t { Y_ONLY = 0, H1V1 = 1, H2V1 = 2, H2V2 = 3 };
-
-	// JPEG compression parameters structure.
-	struct params
-	{
-		inline params() : m_quality(85), m_subsampling(H2V2), m_no_chroma_discrim_flag(false), m_two_pass_flag(false), m_use_std_tables(false) { }
-
-		inline bool check() const
-		{
-			if ((m_quality < 1) || (m_quality > 100)) return false;
-			if ((uint)m_subsampling > (uint)H2V2) return false;
-			return true;
-		}
-
-		// Quality: 1-100, higher is better. Typical values are around 50-95.
-		int m_quality;
-
-		// m_subsampling:
-		// 0 = Y (grayscale) only
-		// 1 = YCbCr, no subsampling (H1V1, YCbCr 1x1x1, 3 blocks per MCU)
-		// 2 = YCbCr, H2V1 subsampling (YCbCr 2x1x1, 4 blocks per MCU)
-		// 3 = YCbCr, H2V2 subsampling (YCbCr 4x1x1, 6 blocks per MCU-- very common)
-		subsampling_t m_subsampling;
-
-		// Disables CbCr discrimination - only intended for testing.
-		// If true, the Y quantization table is also used for the CbCr channels.
-		bool m_no_chroma_discrim_flag;
-
-		bool m_two_pass_flag;
-
-		// By default we use the same quantization tables as mozjpeg's default. 
-		// Set to true to use the traditional tables from JPEG Annex K.
-		bool m_use_std_tables;
-	};
-
-	// Writes JPEG image to a file. 
-	// num_channels must be 1 (Y) or 3 (RGB), image pitch must be width*num_channels.
-	bool compress_image_to_jpeg_file(const char* pFilename, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params());
-
-	// Writes JPEG image to memory buffer. 
-	// On entry, buf_size is the size of the output buffer pointed at by pBuf, which should be at least ~1024 bytes. 
-	// If return value is true, buf_size will be set to the size of the compressed data.
-	bool compress_image_to_jpeg_file_in_memory(void* pBuf, int& buf_size, int width, int height, int num_channels, const uint8* pImage_data, const params& comp_params = params());
-
-	// Output stream abstract class - used by the jpeg_encoder class to write to the output stream. 
-	// put_buf() is generally called with len==JPGE_OUT_BUF_SIZE bytes, but for headers it'll be called with smaller amounts.
-	class output_stream
-	{
-	public:
-		virtual ~output_stream() { };
-		virtual bool put_buf(const void* Pbuf, int len) = 0;
-		template<class T> inline bool put_obj(const T& obj) { return put_buf(&obj, sizeof(T)); }
-	};
-
-	// Lower level jpeg_encoder class - useful if more control is needed than the above helper functions.
-	class jpeg_encoder
-	{
-	public:
-		jpeg_encoder();
-		~jpeg_encoder();
-
-		// Initializes the compressor.
-		// pStream: The stream object to use for writing compressed data.
-		// params - Compression parameters structure, defined above.
-		// width, height  - Image dimensions.
-		// channels - May be 1, or 3. 1 indicates grayscale, 3 indicates RGB source data.
-		// Returns false on out of memory or if a stream write fails.
-		bool init(output_stream* pStream, int width, int height, int src_channels, const params& comp_params = params());
-
-		const params& get_params() const { return m_params; }
-
-		// Deinitializes the compressor, freeing any allocated memory. May be called at any time.
-		void deinit();
-
-		uint get_total_passes() const { return m_params.m_two_pass_flag ? 2 : 1; }
-		inline uint get_cur_pass() { return m_pass_num; }
-
-		// Call this method with each source scanline.
-		// width * src_channels bytes per scanline is expected (RGB or Y format).
-		// You must call with NULL after all scanlines are processed to finish compression.
-		// Returns false on out of memory or if a stream write fails.
-		bool process_scanline(const void* pScanline);
-
-	private:
-		jpeg_encoder(const jpeg_encoder&);
-		jpeg_encoder& operator =(const jpeg_encoder&);
-
-		typedef int32 sample_array_t;
-
-		output_stream* m_pStream;
-		params m_params;
-		uint8 m_num_components;
-		uint8 m_comp_h_samp[3], m_comp_v_samp[3];
-		int m_image_x, m_image_y, m_image_bpp, m_image_bpl;
-		int m_image_x_mcu, m_image_y_mcu;
-		int m_image_bpl_xlt, m_image_bpl_mcu;
-		int m_mcus_per_row;
-		int m_mcu_x, m_mcu_y;
-		uint8* m_mcu_lines[16];
-		uint8 m_mcu_y_ofs;
-		sample_array_t m_sample_array[64];
-		int16 m_coefficient_array[64];
-		int32 m_quantization_tables[2][64];
-		uint m_huff_codes[4][256];
-		uint8 m_huff_code_sizes[4][256];
-		uint8 m_huff_bits[4][17];
-		uint8 m_huff_val[4][256];
-		uint32 m_huff_count[4][256];
-		int m_last_dc_val[3];
-		enum { JPGE_OUT_BUF_SIZE = 2048 };
-		uint8 m_out_buf[JPGE_OUT_BUF_SIZE];
-		uint8* m_pOut_buf;
-		uint m_out_buf_left;
-		uint32 m_bit_buffer;
-		uint m_bits_in;
-		uint8 m_pass_num;
-		bool m_all_stream_writes_succeeded;
-
-		void optimize_huffman_table(int table_num, int table_len);
-		void emit_byte(uint8 i);
-		void emit_word(uint i);
-		void emit_marker(int marker);
-		void emit_jfif_app0();
-		void emit_dqt();
-		void emit_sof();
-		void emit_dht(uint8* bits, uint8* val, int index, bool ac_flag);
-		void emit_dhts();
-		void emit_sos();
-		void emit_markers();
-		void compute_huffman_table(uint* codes, uint8* code_sizes, uint8* bits, uint8* val);
-		void compute_quant_table(int32* dst, int16* src);
-		void adjust_quant_table(int32* dst, int32* src);
-		void first_pass_init();
-		bool second_pass_init();
-		bool jpg_open(int p_x_res, int p_y_res, int src_channels);
-		void load_block_8_8_grey(int x);
-		void load_block_8_8(int x, int y, int c);
-		void load_block_16_8(int x, int c);
-		void load_block_16_8_8(int x, int c);
-		void load_quantized_coefficients(int component_num);
-		void flush_output_buffer();
-		void put_bits(uint bits, uint len);
-		void code_coefficients_pass_one(int component_num);
-		void code_coefficients_pass_two(int component_num);
-		void code_block(int component_num);
-		void process_mcu_row();
-		bool terminate_pass_one();
-		bool terminate_pass_two();
-		bool process_end_of_image();
-		void load_mcu(const void* src);
-		void clear();
-		void init();
-	};
-
-} // namespace jpge
-
-#endif // JPEG_ENCODER
-
diff --git a/thirdparty/jpeg-compressor/patches/0001-clang-fortify-fix.patch b/thirdparty/jpeg-compressor/patches/0001-clang-fortify-fix.patch
deleted file mode 100644
index bcfebef7b68..00000000000
--- a/thirdparty/jpeg-compressor/patches/0001-clang-fortify-fix.patch
+++ /dev/null
@@ -1,20 +0,0 @@
-diff --git a/thirdparty/jpeg-compressor/jpge.cpp b/thirdparty/jpeg-compressor/jpge.cpp
-index 5a36c19653..bb0c54bbf0 100644
---- a/thirdparty/jpeg-compressor/jpge.cpp
-+++ b/thirdparty/jpeg-compressor/jpge.cpp
-@@ -30,6 +30,7 @@
- 
- #include "jpge.h"
- 
-+#include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- 
-@@ -933,7 +934,6 @@ namespace jpge {
- 	}
- 
- 	// Higher level wrappers/examples (optional).
--#include <stdio.h>
- 
- 	class cfile_stream : public output_stream
- 	{
diff --git a/thirdparty/libjpeg-turbo/LICENSE.md b/thirdparty/libjpeg-turbo/LICENSE.md
new file mode 100644
index 00000000000..a785258bcda
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/LICENSE.md
@@ -0,0 +1,135 @@
+libjpeg-turbo Licenses
+======================
+
+libjpeg-turbo is covered by two compatible BSD-style open source licenses:
+
+- The IJG (Independent JPEG Group) License, which is listed in
+  [README.ijg](README.ijg)
+
+  This license applies to the libjpeg API library and associated programs,
+  including any code inherited from libjpeg and any modifications to that
+  code.  Note that the libjpeg-turbo SIMD source code bears the
+  [zlib License](https://opensource.org/licenses/Zlib), but in the context of
+  the overall libjpeg API library, the terms of the zlib License are subsumed
+  by the terms of the IJG License.
+
+- The Modified (3-clause) BSD License, which is listed below
+
+  This license applies to the TurboJPEG API library and associated programs, as
+  well as the build system.  Note that the TurboJPEG API library wraps the
+  libjpeg API library, so in the context of the overall TurboJPEG API library,
+  both the terms of the IJG License and the terms of the Modified (3-clause)
+  BSD License apply.
+
+
+Complying with the libjpeg-turbo Licenses
+=========================================
+
+This section provides a roll-up of the libjpeg-turbo licensing terms, to the
+best of our understanding.  This is not a license in and of itself.  It is
+intended solely for clarification.
+
+1.  If you are distributing a modified version of the libjpeg-turbo source,
+    then:
+
+    1.  You cannot alter or remove any existing copyright or license notices
+        from the source.
+
+        **Origin**
+        - Clause 1 of the IJG License
+        - Clause 1 of the Modified BSD License
+        - Clauses 1 and 3 of the zlib License
+
+    2.  You must add your own copyright notice to the header of each source
+        file you modified, so others can tell that you modified that file.  (If
+        there is not an existing copyright header in that file, then you can
+        simply add a notice stating that you modified the file.)
+
+        **Origin**
+        - Clause 1 of the IJG License
+        - Clause 2 of the zlib License
+
+    3.  You must include the IJG README file, and you must not alter any of the
+        copyright or license text in that file.
+
+        **Origin**
+        - Clause 1 of the IJG License
+
+2.  If you are distributing only libjpeg-turbo binaries without the source, or
+    if you are distributing an application that statically links with
+    libjpeg-turbo, then:
+
+    1.  Your product documentation must include a message stating:
+
+        This software is based in part on the work of the Independent JPEG
+        Group.
+
+        **Origin**
+        - Clause 2 of the IJG license
+
+    2.  If your binary distribution includes or uses the TurboJPEG API, then
+        your product documentation must include the text of the Modified BSD
+        License (see below.)
+
+        **Origin**
+        - Clause 2 of the Modified BSD License
+
+3.  You cannot use the name of the IJG or The libjpeg-turbo Project or the
+    contributors thereof in advertising, publicity, etc.
+
+    **Origin**
+    - IJG License
+    - Clause 3 of the Modified BSD License
+
+4.  The IJG and The libjpeg-turbo Project do not warrant libjpeg-turbo to be
+    free of defects, nor do we accept any liability for undesirable
+    consequences resulting from your use of the software.
+
+    **Origin**
+    - IJG License
+    - Modified BSD License
+    - zlib License
+
+
+The Modified (3-clause) BSD License
+===================================
+
+Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.<br>
+Copyright (C)2015 Viktor Szathmáry.  All Rights Reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+- Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
+- Neither the name of the libjpeg-turbo Project nor the names of its
+  contributors may be used to endorse or promote products derived from this
+  software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+
+
+Why Two Licenses?
+=================
+
+The zlib License could have been used instead of the Modified (3-clause) BSD
+License, and since the IJG License effectively subsumes the distribution
+conditions of the zlib License, this would have effectively placed
+libjpeg-turbo binary distributions under the IJG License.  However, the IJG
+License specifically refers to the Independent JPEG Group and does not extend
+attribution and endorsement protections to other entities.  Thus, it was
+desirable to choose a license that granted us the same protections for new code
+that were granted to the IJG for code derived from their software.
diff --git a/thirdparty/libjpeg-turbo/README.ijg b/thirdparty/libjpeg-turbo/README.ijg
new file mode 100644
index 00000000000..dbf8070cac8
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/README.ijg
@@ -0,0 +1,260 @@
+libjpeg-turbo note:  This file has been modified by The libjpeg-turbo Project
+to include only information relevant to libjpeg-turbo, to wordsmith certain
+sections, and to remove impolitic language that existed in the libjpeg v8
+README.  It is included only for reference.  Please see README.md for
+information specific to libjpeg-turbo.
+
+
+The Independent JPEG Group's JPEG software
+==========================================
+
+This distribution contains a release of the Independent JPEG Group's free JPEG
+software.  You are welcome to redistribute this software and to use it for any
+purpose, subject to the conditions under LEGAL ISSUES, below.
+
+This software is the work of Tom Lane, Guido Vollbeding, Philip Gladstone,
+Bill Allombert, Jim Boucher, Lee Crocker, Bob Friesenhahn, Ben Jackson,
+Julian Minguillon, Luis Ortiz, George Phillips, Davide Rossi, Ge' Weijers,
+and other members of the Independent JPEG Group.
+
+IJG is not affiliated with the ISO/IEC JTC1/SC29/WG1 standards committee
+(also known as JPEG, together with ITU-T SG16).
+
+
+DOCUMENTATION ROADMAP
+=====================
+
+This file contains the following sections:
+
+OVERVIEW            General description of JPEG and the IJG software.
+LEGAL ISSUES        Copyright, lack of warranty, terms of distribution.
+REFERENCES          Where to learn more about JPEG.
+ARCHIVE LOCATIONS   Where to find newer versions of this software.
+FILE FORMAT WARS    Software *not* to get.
+TO DO               Plans for future IJG releases.
+
+Other documentation files in the distribution are:
+
+User documentation:
+  doc/usage.txt         Usage instructions for cjpeg, djpeg, jpegtran,
+                        rdjpgcom, and wrjpgcom.
+  doc/*.1               Unix-style man pages for programs (same info as
+                        usage.txt).
+  doc/wizard.txt        Advanced usage instructions for JPEG wizards only.
+  doc/change.log        Version-to-version change highlights.
+Programmer and internal documentation:
+  doc/libjpeg.txt       How to use the JPEG library in your own programs.
+  src/example.c         Sample code for calling the JPEG library.
+  doc/structure.txt     Overview of the JPEG library's internal structure.
+  doc/coderules.txt     Coding style rules --- please read if you contribute
+                        code.
+
+Please read at least usage.txt.  Some information can also be found in the JPEG
+FAQ (Frequently Asked Questions) article.  See ARCHIVE LOCATIONS below to find
+out where to obtain the FAQ article.
+
+If you want to understand how the JPEG code works, we suggest reading one or
+more of the REFERENCES, then looking at the documentation files (in roughly
+the order listed) before diving into the code.
+
+
+OVERVIEW
+========
+
+This package contains C software to implement JPEG image encoding, decoding,
+and transcoding.  JPEG (pronounced "jay-peg") is a standardized compression
+method for full-color and grayscale images.  JPEG's strong suit is compressing
+photographic images or other types of images that have smooth color and
+brightness transitions between neighboring pixels.  Images with sharp lines or
+other abrupt features may not compress well with JPEG, and a higher JPEG
+quality may have to be used to avoid visible compression artifacts with such
+images.
+
+JPEG is normally lossy, meaning that the output pixels are not necessarily
+identical to the input pixels.  However, on photographic content and other
+"smooth" images, very good compression ratios can be obtained with no visible
+compression artifacts, and extremely high compression ratios are possible if
+you are willing to sacrifice image quality (by reducing the "quality" setting
+in the compressor.)
+
+This software implements JPEG baseline, extended-sequential, progressive, and
+lossless compression processes.  Provision is made for supporting all variants
+of these processes, although some uncommon parameter settings aren't
+implemented yet.  We have made no provision for supporting the hierarchical
+processes defined in the standard.
+
+We provide a set of library routines for reading and writing JPEG image files,
+plus two sample applications "cjpeg" and "djpeg", which use the library to
+perform conversion between JPEG and some other popular image file formats.
+The library is intended to be reused in other applications.
+
+In order to support file conversion and viewing software, we have included
+considerable functionality beyond the bare JPEG coding/decoding capability;
+for example, the color quantization modules are not strictly part of JPEG
+decoding, but they are essential for output to colormapped file formats.  These
+extra functions can be compiled out of the library if not required for a
+particular application.
+
+We have also included "jpegtran", a utility for lossless transcoding between
+different JPEG processes, and "rdjpgcom" and "wrjpgcom", two simple
+applications for inserting and extracting textual comments in JFIF files.
+
+The emphasis in designing this software has been on achieving portability and
+flexibility, while also making it fast enough to be useful.  In particular,
+the software is not intended to be read as a tutorial on JPEG.  (See the
+REFERENCES section for introductory material.)  Rather, it is intended to
+be reliable, portable, industrial-strength code.  We do not claim to have
+achieved that goal in every aspect of the software, but we strive for it.
+
+We welcome the use of this software as a component of commercial products.
+No royalty is required, but we do ask for an acknowledgement in product
+documentation, as described under LEGAL ISSUES.
+
+
+LEGAL ISSUES
+============
+
+In plain English:
+
+1. We don't promise that this software works.  (But if you find any bugs,
+   please let us know!)
+2. You can use this software for whatever you want.  You don't have to pay us.
+3. You may not pretend that you wrote this software.  If you use it in a
+   program, you must acknowledge somewhere in your documentation that
+   you've used the IJG code.
+
+In legalese:
+
+The authors make NO WARRANTY or representation, either express or implied,
+with respect to this software, its quality, accuracy, merchantability, or
+fitness for a particular purpose.  This software is provided "AS IS", and you,
+its user, assume the entire risk as to its quality and accuracy.
+
+This software is copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
+All Rights Reserved except as specified below.
+
+Permission is hereby granted to use, copy, modify, and distribute this
+software (or portions thereof) for any purpose, without fee, subject to these
+conditions:
+(1) If any part of the source code for this software is distributed, then this
+README file must be included, with this copyright and no-warranty notice
+unaltered; and any additions, deletions, or changes to the original files
+must be clearly indicated in accompanying documentation.
+(2) If only executable code is distributed, then the accompanying
+documentation must state that "this software is based in part on the work of
+the Independent JPEG Group".
+(3) Permission for use of this software is granted only if the user accepts
+full responsibility for any undesirable consequences; the authors accept
+NO LIABILITY for damages of any kind.
+
+These conditions apply to any software derived from or based on the IJG code,
+not just to the unmodified library.  If you use our work, you ought to
+acknowledge us.
+
+Permission is NOT granted for the use of any IJG author's name or company name
+in advertising or publicity relating to this software or products derived from
+it.  This software may be referred to only as "the Independent JPEG Group's
+software".
+
+We specifically permit and encourage the use of this software as the basis of
+commercial products, provided that all warranty or liability claims are
+assumed by the product vendor.
+
+
+REFERENCES
+==========
+
+We recommend reading one or more of these references before trying to
+understand the innards of the JPEG software.
+
+The best short technical introduction to the JPEG compression algorithm is
+        Wallace, Gregory K.  "The JPEG Still Picture Compression Standard",
+        Communications of the ACM, April 1991 (vol. 34 no. 4), pp. 30-44.
+(Adjacent articles in that issue discuss MPEG motion picture compression,
+applications of JPEG, and related topics.)  If you don't have the CACM issue
+handy, a PDF file containing a revised version of Wallace's article is
+available at http://www.ijg.org/files/Wallace.JPEG.pdf.  The file (actually
+a preprint for an article that appeared in IEEE Trans. Consumer Electronics)
+omits the sample images that appeared in CACM, but it includes corrections
+and some added material.  Note: the Wallace article is copyright ACM and IEEE,
+and it may not be used for commercial purposes.
+
+A somewhat less technical, more leisurely introduction to JPEG can be found in
+"The Data Compression Book" by Mark Nelson and Jean-loup Gailly, published by
+M&T Books (New York), 2nd ed. 1996, ISBN 1-55851-434-1.  This book provides
+good explanations and example C code for a multitude of compression methods
+including JPEG.  It is an excellent source if you are comfortable reading C
+code but don't know much about data compression in general.  The book's JPEG
+sample code is far from industrial-strength, but when you are ready to look
+at a full implementation, you've got one here...
+
+The best currently available description of JPEG is the textbook "JPEG Still
+Image Data Compression Standard" by William B. Pennebaker and Joan L.
+Mitchell, published by Van Nostrand Reinhold, 1993, ISBN 0-442-01272-1.
+Price US$59.95, 638 pp.  The book includes the complete text of the ISO JPEG
+standards (DIS 10918-1 and draft DIS 10918-2).
+
+The original JPEG standard is divided into two parts, Part 1 being the actual
+specification, while Part 2 covers compliance testing methods.  Part 1 is
+titled "Digital Compression and Coding of Continuous-tone Still Images,
+Part 1: Requirements and guidelines" and has document numbers ISO/IEC IS
+10918-1, ITU-T T.81.  Part 2 is titled "Digital Compression and Coding of
+Continuous-tone Still Images, Part 2: Compliance testing" and has document
+numbers ISO/IEC IS 10918-2, ITU-T T.83.
+
+The JPEG standard does not specify all details of an interchangeable file
+format.  For the omitted details, we follow the "JFIF" conventions, revision
+1.02.  JFIF version 1 has been adopted as ISO/IEC 10918-5 (05/2013) and
+Recommendation ITU-T T.871 (05/2011): Information technology - Digital
+compression and coding of continuous-tone still images: JPEG File Interchange
+Format (JFIF).  It is available as a free download in PDF file format from
+https://www.iso.org/standard/54989.html and http://www.itu.int/rec/T-REC-T.871.
+A PDF file of the older JFIF 1.02 specification is available at
+http://www.w3.org/Graphics/JPEG/jfif3.pdf.
+
+The TIFF 6.0 file format specification can be obtained from
+http://mirrors.ctan.org/graphics/tiff/TIFF6.ps.gz.  The JPEG incorporation
+scheme found in the TIFF 6.0 spec of 3-June-92 has a number of serious
+problems.  IJG does not recommend use of the TIFF 6.0 design (TIFF Compression
+tag 6).  Instead, we recommend the JPEG design proposed by TIFF Technical Note
+#2 (Compression tag 7).  Copies of this Note can be obtained from
+http://www.ijg.org/files/.  It is expected that the next revision
+of the TIFF spec will replace the 6.0 JPEG design with the Note's design.
+Although IJG's own code does not support TIFF/JPEG, the free libtiff library
+uses our library to implement TIFF/JPEG per the Note.
+
+
+ARCHIVE LOCATIONS
+=================
+
+The "official" archive site for this software is www.ijg.org.
+The most recent released version can always be found there in
+directory "files".
+
+The JPEG FAQ (Frequently Asked Questions) article is a source of some
+general information about JPEG.  It is available at
+http://www.faqs.org/faqs/jpeg-faq.
+
+
+FILE FORMAT COMPATIBILITY
+=========================
+
+This software implements ITU T.81 | ISO/IEC 10918 with some extensions from
+ITU T.871 | ISO/IEC 10918-5 (JPEG File Interchange Format-- see REFERENCES).
+Informally, the term "JPEG image" or "JPEG file" most often refers to JFIF or
+a subset thereof, but there are other formats containing the name "JPEG" that
+are incompatible with the original JPEG standard or with JFIF (for instance,
+JPEG 2000 and JPEG XR).  This software therefore does not support these
+formats.  Indeed, one of the original reasons for developing this free software
+was to help force convergence on a common, interoperable format standard for
+JPEG files.
+
+JFIF is a minimal or "low end" representation.  TIFF/JPEG (TIFF revision 6.0 as
+modified by TIFF Technical Note #2) can be used for "high end" applications
+that need to record a lot of additional data about an image.
+
+
+TO DO
+=====
+
+Please send bug reports, offers of help, etc. to jpeg-info@jpegclub.org.
diff --git a/thirdparty/libjpeg-turbo/patches/0001-cmake-generated-headers.patch b/thirdparty/libjpeg-turbo/patches/0001-cmake-generated-headers.patch
new file mode 100644
index 00000000000..2a018164a32
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/patches/0001-cmake-generated-headers.patch
@@ -0,0 +1,629 @@
+diff --git a/thirdparty/libjpeg-turbo/src/jconfig.h b/thirdparty/libjpeg-turbo/src/jconfig.h
+new file mode 100644
+index 0000000000..22ed4b4c32
+--- /dev/null
++++ b/thirdparty/libjpeg-turbo/src/jconfig.h
+@@ -0,0 +1,62 @@
++// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
++
++/* Version ID for the JPEG library.
++ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
++ */
++#define JPEG_LIB_VERSION  62
++
++/* libjpeg-turbo version */
++#define LIBJPEG_TURBO_VERSION  3.1.0
++
++/* libjpeg-turbo version in integer form */
++#define LIBJPEG_TURBO_VERSION_NUMBER  3001000
++
++/* Support arithmetic encoding when using 8-bit samples */
++#define C_ARITH_CODING_SUPPORTED 1
++
++/* Support arithmetic decoding when using 8-bit samples */
++#define D_ARITH_CODING_SUPPORTED 1
++
++/* Support in-memory source/destination managers */
++#define MEM_SRCDST_SUPPORTED  1
++
++/* Use accelerated SIMD routines when using 8-bit samples */
++//#define WITH_SIMD 1
++
++/* This version of libjpeg-turbo supports run-time selection of data precision,
++ * so BITS_IN_JSAMPLE is no longer used to specify the data precision at build
++ * time.  However, some downstream software expects the macro to be defined.
++ * Since 12-bit data precision is an opt-in feature that requires explicitly
++ * calling 12-bit-specific libjpeg API functions and using 12-bit-specific data
++ * types, the unmodified portion of the libjpeg API still behaves as if it were
++ * built for 8-bit precision, and JSAMPLE is still literally an 8-bit data
++ * type.  Thus, it is correct to define BITS_IN_JSAMPLE to 8 here.
++ */
++#ifndef BITS_IN_JSAMPLE
++#define BITS_IN_JSAMPLE  8
++#endif
++
++#ifdef _WIN32
++
++#undef RIGHT_SHIFT_IS_UNSIGNED
++
++/* Define "boolean" as unsigned char, not int, per Windows custom */
++#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
++typedef unsigned char boolean;
++#endif
++#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
++
++/* Define "INT32" as int, not long, per Windows custom */
++#if !(defined(_BASETSD_H_) || defined(_BASETSD_H))   /* don't conflict if basetsd.h already read */
++typedef short INT16;
++typedef signed int INT32;
++#endif
++#define XMD_H                   /* prevent jmorecfg.h from redefining it */
++
++#else
++
++/* Define if your (broken) compiler shifts signed values as if they were
++   unsigned. */
++/* #undef RIGHT_SHIFT_IS_UNSIGNED */
++
++#endif
+diff --git a/thirdparty/libjpeg-turbo/src/jconfigint.h b/thirdparty/libjpeg-turbo/src/jconfigint.h
+new file mode 100644
+index 0000000000..f6171bf846
+--- /dev/null
++++ b/thirdparty/libjpeg-turbo/src/jconfigint.h
+@@ -0,0 +1,94 @@
++// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
++
++/* libjpeg-turbo build number */
++#define BUILD  "20250317"
++
++/* How to hide global symbols. */
++#ifndef HIDDEN
++	#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
++		#define HIDDEN  __attribute__((visibility("hidden")))
++	#else
++		#define HIDDEN
++	#endif
++#endif
++
++/* Compiler's inline keyword */
++#undef inline
++
++/* How to obtain function inlining. */
++#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
++	#define INLINE  __inline__ __attribute__((always_inline))
++#else
++	#define INLINE inline
++#endif
++
++/* How to obtain thread-local storage */
++#if defined(_MSC_VER)
++#define THREAD_LOCAL  __declspec(thread)
++#else
++#define THREAD_LOCAL  __thread
++#endif
++
++/* Define to the full name of this package. */
++#define PACKAGE_NAME  "libjpeg-turbo"
++
++/* Version number of package */
++#define VERSION  "3.1.1"
++
++/* The size of `size_t', as computed by sizeof. */
++#define SIZEOF_SIZE_T  8
++
++/* Define if your compiler has __builtin_ctzl() and sizeof(unsigned long) == sizeof(size_t). */
++#if defined(__GNUC__)
++	#define HAVE_BUILTIN_CTZL
++#endif
++
++/* Define to 1 if you have the <intrin.h> header file. */
++/* #undef HAVE_INTRIN_H */
++
++#if defined(_MSC_VER) && defined(HAVE_INTRIN_H)
++#if (SIZEOF_SIZE_T == 8)
++#define HAVE_BITSCANFORWARD64
++#elif (SIZEOF_SIZE_T == 4)
++#define HAVE_BITSCANFORWARD
++#endif
++#endif
++
++#if defined(__has_attribute)
++#if __has_attribute(fallthrough)
++#define FALLTHROUGH  __attribute__((fallthrough));
++#else
++#define FALLTHROUGH
++#endif
++#else
++#define FALLTHROUGH
++#endif
++
++/*
++ * Define BITS_IN_JSAMPLE as either
++ *   8   for 8-bit sample values (the usual setting)
++ *   12  for 12-bit sample values
++ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
++ * JPEG standard, and the IJG code does not support anything else!
++ */
++
++#ifndef BITS_IN_JSAMPLE
++#define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
++#endif
++
++#undef C_ARITH_CODING_SUPPORTED
++#undef D_ARITH_CODING_SUPPORTED
++#undef WITH_SIMD
++
++#if BITS_IN_JSAMPLE == 8
++
++/* Support arithmetic encoding */
++#define C_ARITH_CODING_SUPPORTED 1
++
++/* Support arithmetic decoding */
++#define D_ARITH_CODING_SUPPORTED 1
++
++/* Use accelerated SIMD routines. */
++//#define WITH_SIMD 1
++
++#endif
+diff --git a/thirdparty/libjpeg-turbo/src/jmorecfg.h b/thirdparty/libjpeg-turbo/src/jmorecfg.h
+new file mode 100644
+index 0000000000..f7de737edb
+--- /dev/null
++++ b/thirdparty/libjpeg-turbo/src/jmorecfg.h
+@@ -0,0 +1,389 @@
++// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
++
++/*
++ * jmorecfg.h
++ *
++ * This file was part of the Independent JPEG Group's software:
++ * Copyright (C) 1991-1997, Thomas G. Lane.
++ * Modified 1997-2009 by Guido Vollbeding.
++ * Lossless JPEG Modifications:
++ * Copyright (C) 1999, Ken Murchison.
++ * libjpeg-turbo Modifications:
++ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, 2022, D. R. Commander.
++ * Godot modifications:
++ * Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md).
++ * For conditions of distribution and use, see the accompanying README.ijg
++ * file.
++ *
++ * This file contains additional configuration options that customize the
++ * JPEG software for special applications or support machine-dependent
++ * optimizations.  Most users will not need to touch this file.
++ */
++
++
++/*
++ * Maximum number of components (color channels) allowed in JPEG image.
++ * To meet the letter of Rec. ITU-T T.81 | ISO/IEC 10918-1, set this to 255.
++ * However, darn few applications need more than 4 channels (maybe 5 for CMYK +
++ * alpha mask).  We recommend 10 as a reasonable compromise; use 4 if you are
++ * really short on memory.  (Each allowed component costs a hundred or so
++ * bytes of storage, whether actually used in an image or not.)
++ */
++
++#define MAX_COMPONENTS  10      /* maximum number of image components */
++
++
++/*
++ * Basic data types.
++ * You may need to change these if you have a machine with unusual data
++ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
++ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
++ * but it had better be at least 16.
++ */
++
++/* Representation of a single sample (pixel element value).
++ * We frequently allocate large arrays of these, so it's important to keep
++ * them small.  But if you have memory to burn and access to char or short
++ * arrays is very slow on your hardware, you might want to change these.
++ */
++
++/* JSAMPLE should be the smallest type that will hold the values 0..255. */
++
++typedef unsigned char JSAMPLE;
++#define GETJSAMPLE(value)  ((int)(value))
++
++#define MAXJSAMPLE       255
++#define CENTERJSAMPLE    128
++
++
++/* J12SAMPLE should be the smallest type that will hold the values 0..4095. */
++
++typedef short J12SAMPLE;
++
++#define MAXJ12SAMPLE     4095
++#define CENTERJ12SAMPLE  2048
++
++
++/* J16SAMPLE should be the smallest type that will hold the values 0..65535. */
++
++typedef unsigned short J16SAMPLE;
++
++#define MAXJ16SAMPLE     65535
++#define CENTERJ16SAMPLE  32768
++
++
++/* Representation of a DCT frequency coefficient.
++ * This should be a signed value of at least 16 bits; "short" is usually OK.
++ * Again, we allocate large arrays of these, but you can change to int
++ * if you have memory to burn and "short" is really slow.
++ */
++
++typedef short JCOEF;
++
++
++/* Compressed datastreams are represented as arrays of JOCTET.
++ * These must be EXACTLY 8 bits wide, at least once they are written to
++ * external storage.  Note that when using the stdio data source/destination
++ * managers, this is also the data type passed to fread/fwrite.
++ */
++
++typedef unsigned char JOCTET;
++#define GETJOCTET(value)  (value)
++
++
++/* These typedefs are used for various table entries and so forth.
++ * They must be at least as wide as specified; but making them too big
++ * won't cost a huge amount of memory, so we don't provide special
++ * extraction code like we did for JSAMPLE.  (In other words, these
++ * typedefs live at a different point on the speed/space tradeoff curve.)
++ */
++
++/* UINT8 must hold at least the values 0..255. */
++
++typedef unsigned char UINT8;
++
++/* UINT16 must hold at least the values 0..65535. */
++
++typedef unsigned short UINT16;
++
++/* INT16 must hold at least the values -32768..32767. */
++
++#ifndef XMD_H                   /* X11/xmd.h correctly defines INT16 */
++typedef short INT16;
++#endif
++
++/* INT32 must hold at least signed 32-bit values.
++ *
++ * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.)  Integers were
++ * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to
++ * long.  It also wasn't common (or at least as common) in 1994 for INT32 to be
++ * defined by platform headers.  Since then, however, INT32 is defined in
++ * several other common places:
++ *
++ * Xmd.h (X11 header) typedefs INT32 to int on 64-bit platforms and long on
++ * 32-bit platforms (i.e always a 32-bit signed type.)
++ *
++ * basetsd.h (Win32 header) typedefs INT32 to int (always a 32-bit signed type
++ * on modern platforms.)
++ *
++ * qglobal.h (Qt header) typedefs INT32 to int (always a 32-bit signed type on
++ * modern platforms.)
++ *
++ * This is a recipe for conflict, since "long" and "int" aren't always
++ * compatible types.  Since the definition of INT32 has technically been part
++ * of the libjpeg API for more than 20 years, we can't remove it, but we do not
++ * use it internally any longer.  We instead define a separate type (JLONG)
++ * for internal use, which ensures that internal behavior will always be the
++ * same regardless of any external headers that may be included.
++ */
++
++#ifndef XMD_H                   /* X11/xmd.h correctly defines INT32 */
++#ifndef _BASETSD_H_             /* Microsoft defines it in basetsd.h */
++#ifndef _BASETSD_H              /* MinGW is slightly different */
++#ifndef QGLOBAL_H               /* Qt defines it in qglobal.h */
++typedef long INT32;
++#endif
++#endif
++#endif
++#endif
++
++/* Datatype used for image dimensions.  The JPEG standard only supports
++ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
++ * "unsigned int" is sufficient on all machines.  However, if you need to
++ * handle larger images and you don't mind deviating from the spec, you
++ * can change this datatype.  (Note that changing this datatype will
++ * potentially require modifying the SIMD code.  The x86-64 SIMD extensions,
++ * in particular, assume a 32-bit JDIMENSION.)
++ */
++
++typedef unsigned int JDIMENSION;
++
++#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
++
++
++/* These macros are used in all function definitions and extern declarations.
++ * You could modify them if you need to change function linkage conventions;
++ * in particular, you'll need to do that to make the library a Windows DLL.
++ * Another application is to make all functions global for use with debuggers
++ * or code profilers that require it.
++ */
++
++/* a function called through method pointers: */
++#define METHODDEF(type)         static type
++/* a function used only in its module: */
++#define LOCAL(type)             static type
++/* a function referenced thru EXTERNs: */
++#define GLOBAL(type)            type
++/* a reference to a GLOBAL function: */
++#define EXTERN(type)            extern type
++
++
++/* Originally, this macro was used as a way of defining function prototypes
++ * for both modern compilers as well as older compilers that did not support
++ * prototype parameters.  libjpeg-turbo has never supported these older,
++ * non-ANSI compilers, but the macro is still included because there is some
++ * software out there that uses it.
++ */
++
++#define JMETHOD(type, methodname, arglist)  type (*methodname) arglist
++
++
++/* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS),
++ * but again, some software relies on this macro.
++ */
++
++#undef FAR
++#define FAR
++
++
++/*
++ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
++ * in standard header files.  Or you may have conflicts with application-
++ * specific header files that you want to include together with these files.
++ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
++ */
++
++#ifndef HAVE_BOOLEAN
++typedef int boolean;
++#endif
++#ifndef FALSE                   /* in case these macros already exist */
++#define FALSE   0               /* values of boolean */
++#endif
++#ifndef TRUE
++#define TRUE    1
++#endif
++
++
++/*
++ * The remaining options affect code selection within the JPEG library,
++ * but they don't need to be visible to most applications using the library.
++ * To minimize application namespace pollution, the symbols won't be
++ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
++ */
++
++#ifdef JPEG_INTERNALS
++#define JPEG_INTERNAL_OPTIONS
++#endif
++
++#ifdef JPEG_INTERNAL_OPTIONS
++
++
++/*
++ * These defines indicate whether to include various optional functions.
++ * Undefining some of these symbols will produce a smaller but less capable
++ * library.  Note that you can leave certain source files out of the
++ * compilation/linking process if you've #undef'd the corresponding symbols.
++ * (You may HAVE to do that if your compiler doesn't like null source files.)
++ */
++
++/* Capability options common to encoder and decoder: */
++
++#define DCT_ISLOW_SUPPORTED     /* accurate integer method */
++#define DCT_IFAST_SUPPORTED     /* less accurate int method [legacy feature] */
++#define DCT_FLOAT_SUPPORTED     /* floating-point method [legacy feature] */
++
++/* Encoder capability options: */
++
++#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
++#define C_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
++//#define C_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
++#define ENTROPY_OPT_SUPPORTED       /* Optimization of entropy coding parms? */
++/* Note: if you selected 12-bit data precision, it is dangerous to turn off
++ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
++ * precision, so jchuff.c normally uses entropy optimization to compute
++ * usable tables for higher precision.  If you don't want to do optimization,
++ * you'll have to supply different default Huffman tables.
++ * The exact same statements apply for progressive and lossless JPEG:
++ * the default tables don't work for progressive mode or lossless mode.
++ * (This may get fixed, however.)
++ */
++#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
++
++/* Decoder capability options: */
++
++#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
++#define D_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
++//#define D_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
++#define SAVE_MARKERS_SUPPORTED      /* jpeg_save_markers() needed? */
++#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
++#define IDCT_SCALING_SUPPORTED      /* Output rescaling via IDCT? */
++#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
++#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
++#define QUANT_1PASS_SUPPORTED       /* 1-pass color quantization? */
++#define QUANT_2PASS_SUPPORTED       /* 2-pass color quantization? */
++
++/* more capability options later, no doubt */
++
++
++/*
++ * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial
++ * feature of libjpeg.  The idea was that, if an application developer needed
++ * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could
++ * change these macros, rebuild libjpeg, and link their application statically
++ * with it.  In reality, few people ever did this, because there were some
++ * severe restrictions involved (cjpeg and djpeg no longer worked properly,
++ * compressing/decompressing RGB JPEGs no longer worked properly, and the color
++ * quantizer wouldn't work with pixel sizes other than 3.)  Furthermore, since
++ * all of the O/S-supplied versions of libjpeg were built with the default
++ * values of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications
++ * have come to regard these values as immutable.
++ *
++ * The libjpeg-turbo colorspace extensions provide a much cleaner way of
++ * compressing from/decompressing to buffers with arbitrary component orders
++ * and pixel sizes.  Thus, we do not support changing the values of RGB_RED,
++ * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE.  In addition to the restrictions
++ * listed above, changing these values will also break the SIMD extensions and
++ * the regression tests.
++ */
++
++#define RGB_RED         0       /* Offset of Red in an RGB scanline element */
++#define RGB_GREEN       1       /* Offset of Green */
++#define RGB_BLUE        2       /* Offset of Blue */
++#define RGB_PIXELSIZE   3       /* JSAMPLEs per RGB scanline element */
++
++#define JPEG_NUMCS  17
++
++#define EXT_RGB_RED         0
++#define EXT_RGB_GREEN       1
++#define EXT_RGB_BLUE        2
++#define EXT_RGB_PIXELSIZE   3
++
++#define EXT_RGBX_RED        0
++#define EXT_RGBX_GREEN      1
++#define EXT_RGBX_BLUE       2
++#define EXT_RGBX_PIXELSIZE  4
++
++#define EXT_BGR_RED         2
++#define EXT_BGR_GREEN       1
++#define EXT_BGR_BLUE        0
++#define EXT_BGR_PIXELSIZE   3
++
++#define EXT_BGRX_RED        2
++#define EXT_BGRX_GREEN      1
++#define EXT_BGRX_BLUE       0
++#define EXT_BGRX_PIXELSIZE  4
++
++#define EXT_XBGR_RED        3
++#define EXT_XBGR_GREEN      2
++#define EXT_XBGR_BLUE       1
++#define EXT_XBGR_PIXELSIZE  4
++
++#define EXT_XRGB_RED        1
++#define EXT_XRGB_GREEN      2
++#define EXT_XRGB_BLUE       3
++#define EXT_XRGB_PIXELSIZE  4
++
++static const int rgb_red[JPEG_NUMCS] = {
++  -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED,
++  EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
++  EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
++  -1
++};
++
++static const int rgb_green[JPEG_NUMCS] = {
++  -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN,
++  EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
++  EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
++  -1
++};
++
++static const int rgb_blue[JPEG_NUMCS] = {
++  -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE,
++  EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
++  EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
++  -1
++};
++
++static const int rgb_pixelsize[JPEG_NUMCS] = {
++  -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE,
++  EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
++  EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
++  -1
++};
++
++/* Definitions for speed-related optimizations. */
++
++/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
++ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
++ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
++ */
++
++#ifndef MULTIPLIER
++#ifndef WITH_SIMD
++#define MULTIPLIER  int         /* type for fastest integer multiply */
++#else
++#define MULTIPLIER  short       /* prefer 16-bit with SIMD for parellelism */
++#endif
++#endif
++
++
++/* FAST_FLOAT should be either float or double, whichever is done faster
++ * by your compiler.  (Note that this type is only used in the floating point
++ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
++ */
++
++#ifndef FAST_FLOAT
++#define FAST_FLOAT  float
++#endif
++
++#endif /* JPEG_INTERNAL_OPTIONS */
+diff --git a/thirdparty/libjpeg-turbo/src/jversion.h b/thirdparty/libjpeg-turbo/src/jversion.h
+new file mode 100644
+index 0000000000..8e4f4ef749
+--- /dev/null
++++ b/thirdparty/libjpeg-turbo/src/jversion.h
+@@ -0,0 +1,60 @@
++// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
++
++/*
++ * jversion.h
++ *
++ * This file was part of the Independent JPEG Group's software:
++ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
++ * libjpeg-turbo Modifications:
++ * Copyright (C) 2010, 2012-2024, D. R. Commander.
++ * Godot modifications:
++ * Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md).
++ * For conditions of distribution and use, see the accompanying README.ijg
++ * file.
++ *
++ * This file contains software version identification.
++ */
++
++
++#if JPEG_LIB_VERSION >= 80
++
++#define JVERSION        "8d  15-Jan-2012"
++
++#elif JPEG_LIB_VERSION >= 70
++
++#define JVERSION        "7  27-Jun-2009"
++
++#else
++
++#define JVERSION        "6b  27-Mar-1998"
++
++#endif
++
++/*
++ * NOTE: It is our convention to place the authors in the following order:
++ * - libjpeg-turbo authors (2009-) in descending order of the date of their
++ *   most recent contribution to the project, then in ascending order of the
++ *   date of their first contribution to the project, then in alphabetical
++ *   order
++ * - Upstream authors in descending order of the date of the first inclusion of
++ *   their code
++ */
++
++#define JCOPYRIGHT1 \
++  "Copyright (C) 2009-2024 D. R. Commander\n" \
++  "Copyright (C) 2015, 2020 Google, Inc.\n" \
++  "Copyright (C) 2019-2020 Arm Limited\n" \
++  "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
++  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
++  "Copyright (C) 2015 Intel Corporation\n"
++#define JCOPYRIGHT2 \
++  "Copyright (C) 2013-2014 Linaro Limited\n" \
++  "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
++  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
++  "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
++  "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
++  "Copyright (C) 1999 Ken Murchison\n" \
++  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding\n"
++
++#define JCOPYRIGHT_SHORT \
++  "Copyright (C) 1991-2024 The libjpeg-turbo Project and many others"
diff --git a/thirdparty/libjpeg-turbo/patches/0002-disable-16bitlossless.patch b/thirdparty/libjpeg-turbo/patches/0002-disable-16bitlossless.patch
new file mode 100644
index 00000000000..beb2a9b0aa1
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/patches/0002-disable-16bitlossless.patch
@@ -0,0 +1,20 @@
+commit 462c1cd875ae8f6b5f6406dda01881fb173ac30c
+Author: Daniel Kinsman <danielkinsman@riseup.net>
+Date:   Thu Mar 20 12:21:28 2025 +1100
+
+    remove unneeded source files and lossless jpeg support
+
+diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg.c b/thirdparty/libjpeg-turbo/src/turbojpeg.c
+index 389aea55d3..eec8e2a616 100644
+--- a/thirdparty/libjpeg-turbo/src/turbojpeg.c
++++ b/thirdparty/libjpeg-turbo/src/turbojpeg.c
+@@ -1200,9 +1200,6 @@ bailout:
+ #define BITS_IN_JSAMPLE  12
+ #include "turbojpeg-mp.c"
+ #undef BITS_IN_JSAMPLE
+-#define BITS_IN_JSAMPLE  16
+-#include "turbojpeg-mp.c"
+-#undef BITS_IN_JSAMPLE
+ 
+ /* TurboJPEG 1.2+ */
+ DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
diff --git a/thirdparty/libjpeg-turbo/patches/0003-remove-bmp-ppm-support.patch b/thirdparty/libjpeg-turbo/patches/0003-remove-bmp-ppm-support.patch
new file mode 100644
index 00000000000..e4214269a25
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/patches/0003-remove-bmp-ppm-support.patch
@@ -0,0 +1,329 @@
+diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c b/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c
+index 1fa63b8185..72f99e236a 100644
+--- a/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c
++++ b/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c
+@@ -286,271 +286,6 @@ bailout:
+   return retval;
+ }
+ 
+-
+-/*************************** Packed-Pixel Image I/O **************************/
+-
+-/* TurboJPEG 3.0+ */
+-DLLEXPORT _JSAMPLE *GET_NAME(tj3LoadImage, BITS_IN_JSAMPLE)
+-  (tjhandle handle, const char *filename, int *width, int align, int *height,
+-   int *pixelFormat)
+-{
+-  static const char FUNCTION_NAME[] =
+-    GET_STRING(tj3LoadImage, BITS_IN_JSAMPLE);
+-
+-#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+-
+-  int retval = 0, tempc;
+-  size_t pitch;
+-  tjhandle handle2 = NULL;
+-  tjinstance *this2;
+-  j_compress_ptr cinfo = NULL;
+-  cjpeg_source_ptr src;
+-  _JSAMPLE *dstBuf = NULL;
+-  FILE *file = NULL;
+-  boolean invert;
+-
+-  GET_TJINSTANCE(handle, NULL)
+-
+-  if (!filename || !width || align < 1 || !height || !pixelFormat ||
+-      *pixelFormat < TJPF_UNKNOWN || *pixelFormat >= TJ_NUMPF)
+-    THROW("Invalid argument");
+-  if ((align & (align - 1)) != 0)
+-    THROW("Alignment must be a power of 2");
+-
+-  /* The instance handle passed to this function is used only for parameter
+-     retrieval.  Create a new temporary instance to avoid interfering with the
+-     libjpeg state of the primary instance. */
+-  if ((handle2 = tj3Init(TJINIT_COMPRESS)) == NULL) return NULL;
+-  this2 = (tjinstance *)handle2;
+-  cinfo = &this2->cinfo;
+-
+-#ifdef _MSC_VER
+-  if (fopen_s(&file, filename, "rb") || file == NULL)
+-#else
+-  if ((file = fopen(filename, "rb")) == NULL)
+-#endif
+-    THROW_UNIX("Cannot open input file");
+-
+-  if ((tempc = getc(file)) < 0 || ungetc(tempc, file) == EOF)
+-    THROW_UNIX("Could not read input file")
+-  else if (tempc == EOF)
+-    THROW("Input file contains no data");
+-
+-  if (setjmp(this2->jerr.setjmp_buffer)) {
+-    /* If we get here, the JPEG code has signaled an error. */
+-    retval = -1;  goto bailout;
+-  }
+-
+-  cinfo->data_precision = BITS_IN_JSAMPLE;
+-  if (*pixelFormat == TJPF_UNKNOWN) cinfo->in_color_space = JCS_UNKNOWN;
+-  else cinfo->in_color_space = pf2cs[*pixelFormat];
+-  if (tempc == 'B') {
+-    if ((src = jinit_read_bmp(cinfo, FALSE)) == NULL)
+-      THROW("Could not initialize bitmap loader");
+-    invert = !this->bottomUp;
+-  } else if (tempc == 'P') {
+-#if BITS_IN_JSAMPLE == 8
+-    if (this->precision >= 2 && this->precision <= BITS_IN_JSAMPLE)
+-#else
+-    if (this->precision >= BITS_IN_JSAMPLE - 3 &&
+-        this->precision <= BITS_IN_JSAMPLE)
+-#endif
+-      cinfo->data_precision = this->precision;
+-    if ((src = _jinit_read_ppm(cinfo)) == NULL)
+-      THROW("Could not initialize PPM loader");
+-    invert = this->bottomUp;
+-  } else
+-    THROW("Unsupported file type");
+-
+-  cinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+-
+-  src->input_file = file;
+-  /* Refuse to load images larger than the specified size. */
+-  src->max_pixels = this->maxPixels;
+-  (*src->start_input) (cinfo, src);
+-  if (tempc == 'B') {
+-    if (cinfo->X_density && cinfo->Y_density) {
+-      this->xDensity = cinfo->X_density;
+-      this->yDensity = cinfo->Y_density;
+-      this->densityUnits = cinfo->density_unit;
+-    }
+-  }
+-  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+-
+-  *width = cinfo->image_width;  *height = cinfo->image_height;
+-  *pixelFormat = cs2pf[cinfo->in_color_space];
+-
+-  pitch = PAD((*width) * tjPixelSize[*pixelFormat], align);
+-  if (
+-#if ULLONG_MAX > SIZE_MAX
+-      (unsigned long long)pitch * (unsigned long long)(*height) >
+-      (unsigned long long)((size_t)-1) ||
+-#endif
+-      (dstBuf = (_JSAMPLE *)malloc(pitch * (*height) *
+-                                   sizeof(_JSAMPLE))) == NULL)
+-    THROW("Memory allocation failure");
+-
+-  if (setjmp(this2->jerr.setjmp_buffer)) {
+-    /* If we get here, the JPEG code has signaled an error. */
+-    retval = -1;  goto bailout;
+-  }
+-
+-  while (cinfo->next_scanline < cinfo->image_height) {
+-    int i, nlines = (*src->get_pixel_rows) (cinfo, src);
+-
+-    for (i = 0; i < nlines; i++) {
+-      _JSAMPLE *dstptr;
+-      int row;
+-
+-      row = cinfo->next_scanline + i;
+-      if (invert) dstptr = &dstBuf[((*height) - row - 1) * pitch];
+-      else dstptr = &dstBuf[row * pitch];
+-      memcpy(dstptr, src->_buffer[i],
+-             (*width) * tjPixelSize[*pixelFormat] * sizeof(_JSAMPLE));
+-    }
+-    cinfo->next_scanline += nlines;
+-  }
+-
+-  (*src->finish_input) (cinfo, src);
+-
+-bailout:
+-  tj3Destroy(handle2);
+-  if (file) fclose(file);
+-  if (retval < 0) { free(dstBuf);  dstBuf = NULL; }
+-  return dstBuf;
+-
+-#else /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
+-
+-  static const char ERROR_MSG[] =
+-    "16-bit data precision requires lossless JPEG,\n"
+-    "which was disabled at build time.";
+-  _JSAMPLE *retval = NULL;
+-
+-  GET_TJINSTANCE(handle, NULL)
+-  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME,
+-           ERROR_MSG);
+-  this->isInstanceError = TRUE;  THROWG(ERROR_MSG, NULL)
+-
+-bailout:
+-  return retval;
+-
+-#endif
+-}
+-
+-
+-/* TurboJPEG 3.0+ */
+-DLLEXPORT int GET_NAME(tj3SaveImage, BITS_IN_JSAMPLE)
+-  (tjhandle handle, const char *filename, const _JSAMPLE *buffer, int width,
+-   int pitch, int height, int pixelFormat)
+-{
+-  static const char FUNCTION_NAME[] =
+-    GET_STRING(tj3SaveImage, BITS_IN_JSAMPLE);
+-  int retval = 0;
+-
+-#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+-
+-  tjhandle handle2 = NULL;
+-  tjinstance *this2;
+-  j_decompress_ptr dinfo = NULL;
+-  djpeg_dest_ptr dst;
+-  FILE *file = NULL;
+-  char *ptr = NULL;
+-  boolean invert;
+-
+-  GET_TJINSTANCE(handle, -1)
+-
+-  if (!filename || !buffer || width < 1 || pitch < 0 || height < 1 ||
+-      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+-    THROW("Invalid argument");
+-
+-  /* The instance handle passed to this function is used only for parameter
+-     retrieval.  Create a new temporary instance to avoid interfering with the
+-     libjpeg state of the primary instance. */
+-  if ((handle2 = tj3Init(TJINIT_DECOMPRESS)) == NULL)
+-    return -1;
+-  this2 = (tjinstance *)handle2;
+-  dinfo = &this2->dinfo;
+-
+-#ifdef _MSC_VER
+-  if (fopen_s(&file, filename, "wb") || file == NULL)
+-#else
+-  if ((file = fopen(filename, "wb")) == NULL)
+-#endif
+-    THROW_UNIX("Cannot open output file");
+-
+-  if (setjmp(this2->jerr.setjmp_buffer)) {
+-    /* If we get here, the JPEG code has signaled an error. */
+-    retval = -1;  goto bailout;
+-  }
+-
+-  this2->dinfo.out_color_space = pf2cs[pixelFormat];
+-  dinfo->image_width = width;  dinfo->image_height = height;
+-  dinfo->global_state = DSTATE_READY;
+-  dinfo->scale_num = dinfo->scale_denom = 1;
+-  dinfo->data_precision = BITS_IN_JSAMPLE;
+-
+-  ptr = strrchr(filename, '.');
+-  if (ptr && !strcasecmp(ptr, ".bmp")) {
+-    if ((dst = jinit_write_bmp(dinfo, FALSE, FALSE)) == NULL)
+-      THROW("Could not initialize bitmap writer");
+-    invert = !this->bottomUp;
+-    dinfo->X_density = (UINT16)this->xDensity;
+-    dinfo->Y_density = (UINT16)this->yDensity;
+-    dinfo->density_unit = (UINT8)this->densityUnits;
+-  } else {
+-#if BITS_IN_JSAMPLE == 8
+-    if (this->precision >= 2 && this->precision <= BITS_IN_JSAMPLE)
+-#else
+-    if (this->precision >= BITS_IN_JSAMPLE - 3 &&
+-        this->precision <= BITS_IN_JSAMPLE)
+-#endif
+-      dinfo->data_precision = this->precision;
+-    if ((dst = _jinit_write_ppm(dinfo)) == NULL)
+-      THROW("Could not initialize PPM writer");
+-    invert = this->bottomUp;
+-  }
+-
+-  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+-
+-  dst->output_file = file;
+-  (*dst->start_output) (dinfo, dst);
+-  (*dinfo->mem->realize_virt_arrays) ((j_common_ptr)dinfo);
+-
+-  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+-
+-  while (dinfo->output_scanline < dinfo->output_height) {
+-    _JSAMPLE *rowptr;
+-
+-    if (invert)
+-      rowptr =
+-        (_JSAMPLE *)&buffer[(height - dinfo->output_scanline - 1) * pitch];
+-    else
+-      rowptr = (_JSAMPLE *)&buffer[dinfo->output_scanline * pitch];
+-    memcpy(dst->_buffer[0], rowptr,
+-           width * tjPixelSize[pixelFormat] * sizeof(_JSAMPLE));
+-    (*dst->put_pixel_rows) (dinfo, dst, 1);
+-    dinfo->output_scanline++;
+-  }
+-
+-  (*dst->finish_output) (dinfo, dst);
+-
+-bailout:
+-  tj3Destroy(handle2);
+-  if (file) fclose(file);
+-  return retval;
+-
+-#else /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
+-
+-  GET_TJINSTANCE(handle, -1)
+-  THROW("16-bit data precision requires lossless JPEG,\n"
+-        "which was disabled at build time.")
+-bailout:
+-  return retval;
+-
+-#endif
+-}
+-
+-
+ #undef _JSAMPLE
+ #undef _JSAMPROW
+ #undef _buffer
+diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg.c b/thirdparty/libjpeg-turbo/src/turbojpeg.c
+index eec8e2a616..8ce446148a 100644
+--- a/thirdparty/libjpeg-turbo/src/turbojpeg.c
++++ b/thirdparty/libjpeg-turbo/src/turbojpeg.c
+@@ -3095,48 +3095,3 @@ bailout:
+   free(sizes);
+   return retval;
+ }
+-
+-
+-/*************************** Packed-Pixel Image I/O **************************/
+-
+-/* tj3LoadImage*() is implemented in turbojpeg-mp.c */
+-
+-/* TurboJPEG 2.0+ */
+-DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
+-                                     int align, int *height,
+-                                     int *pixelFormat, int flags)
+-{
+-  tjhandle handle = NULL;
+-  unsigned char *dstBuf = NULL;
+-
+-  if ((handle = tj3Init(TJINIT_COMPRESS)) == NULL) return NULL;
+-
+-  processFlags(handle, flags, COMPRESS);
+-
+-  dstBuf = tj3LoadImage8(handle, filename, width, align, height, pixelFormat);
+-
+-  tj3Destroy(handle);
+-  return dstBuf;
+-}
+-
+-
+-/* tj3SaveImage*() is implemented in turbojpeg-mp.c */
+-
+-/* TurboJPEG 2.0+ */
+-DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
+-                          int width, int pitch, int height, int pixelFormat,
+-                          int flags)
+-{
+-  tjhandle handle = NULL;
+-  int retval = -1;
+-
+-  if ((handle = tj3Init(TJINIT_DECOMPRESS)) == NULL) return -1;
+-
+-  processFlags(handle, flags, DECOMPRESS);
+-
+-  retval = tj3SaveImage8(handle, filename, buffer, width, pitch, height,
+-                         pixelFormat);
+-
+-  tj3Destroy(handle);
+-  return retval;
+-}
diff --git a/thirdparty/libjpeg-turbo/src/cderror.h b/thirdparty/libjpeg-turbo/src/cderror.h
new file mode 100644
index 00000000000..cd3e80646ff
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/cderror.h
@@ -0,0 +1,114 @@
+/*
+ * cderror.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2009-2017 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2021, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file defines the error and message codes for the cjpeg/djpeg
+ * applications.  These strings are not needed as part of the JPEG library
+ * proper.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef CDERROR_H
+#define CDERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code, string)
+#endif /* CDERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code, string)  code,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_FIRSTADDONCODE = 1000, NULL) /* Must be first entry! */
+
+JMESSAGE(JERR_BMP_BADCMAP, "Unsupported BMP colormap format")
+JMESSAGE(JERR_BMP_BADDEPTH, "Only 8-, 24-, and 32-bit BMP files are supported")
+JMESSAGE(JERR_BMP_BADHEADER, "Invalid BMP file: bad header length")
+JMESSAGE(JERR_BMP_BADPLANES, "Invalid BMP file: biPlanes not equal to 1")
+JMESSAGE(JERR_BMP_COLORSPACE, "BMP output must be grayscale or RGB")
+JMESSAGE(JERR_BMP_COMPRESSED, "Sorry, compressed BMPs not yet supported")
+JMESSAGE(JERR_BMP_EMPTY, "Empty BMP image")
+JMESSAGE(JERR_BMP_NOT, "Not a BMP file - does not start with BM")
+JMESSAGE(JERR_BMP_OUTOFRANGE, "Numeric value out of range in BMP file")
+JMESSAGE(JTRC_BMP, "%ux%u %d-bit BMP image")
+JMESSAGE(JTRC_BMP_MAPPED, "%ux%u 8-bit colormapped BMP image")
+JMESSAGE(JTRC_BMP_OS2, "%ux%u %d-bit OS2 BMP image")
+JMESSAGE(JTRC_BMP_OS2_MAPPED, "%ux%u 8-bit colormapped OS2 BMP image")
+
+JMESSAGE(JERR_GIF_BUG, "GIF output got confused")
+JMESSAGE(JERR_GIF_CODESIZE, "Bogus GIF codesize %d")
+JMESSAGE(JERR_GIF_COLORSPACE, "GIF output must be grayscale or RGB")
+JMESSAGE(JERR_GIF_EMPTY, "Empty GIF image")
+JMESSAGE(JERR_GIF_IMAGENOTFOUND, "Too few images in GIF file")
+JMESSAGE(JERR_GIF_NOT, "Not a GIF file")
+JMESSAGE(JTRC_GIF, "%ux%ux%d GIF image")
+JMESSAGE(JTRC_GIF_BADVERSION,
+         "Warning: unexpected GIF version number '%c%c%c'")
+JMESSAGE(JTRC_GIF_EXTENSION, "Ignoring GIF extension block of type 0x%02x")
+JMESSAGE(JTRC_GIF_NONSQUARE, "Caution: nonsquare pixels in input")
+JMESSAGE(JWRN_GIF_BADDATA, "Corrupt data in GIF file")
+JMESSAGE(JWRN_GIF_CHAR, "Bogus char 0x%02x in GIF file, ignoring")
+JMESSAGE(JWRN_GIF_ENDCODE, "Premature end of GIF image")
+JMESSAGE(JWRN_GIF_NOMOREDATA, "Ran out of GIF bits")
+
+JMESSAGE(JERR_PPM_COLORSPACE, "PPM output must be grayscale or RGB")
+JMESSAGE(JERR_PPM_NONNUMERIC, "Nonnumeric data in PPM file")
+JMESSAGE(JERR_PPM_NOT, "Not a PPM/PGM file")
+JMESSAGE(JERR_PPM_OUTOFRANGE, "Numeric value out of range in PPM file")
+JMESSAGE(JTRC_PGM, "%ux%u PGM image (maximum color value = %u)")
+JMESSAGE(JTRC_PGM_TEXT, "%ux%u text PGM image (maximum color value = %u)")
+JMESSAGE(JTRC_PPM, "%ux%u PPM image (maximum color value = %u)")
+JMESSAGE(JTRC_PPM_TEXT, "%ux%u text PPM image (maximum color value = %u)")
+
+JMESSAGE(JERR_TGA_BADCMAP, "Unsupported Targa colormap format")
+JMESSAGE(JERR_TGA_BADPARMS, "Invalid or unsupported Targa file")
+JMESSAGE(JERR_TGA_COLORSPACE, "Targa output must be grayscale or RGB")
+JMESSAGE(JTRC_TGA, "%ux%u RGB Targa image")
+JMESSAGE(JTRC_TGA_GRAY, "%ux%u grayscale Targa image")
+JMESSAGE(JTRC_TGA_MAPPED, "%ux%u colormapped Targa image")
+JMESSAGE(JERR_TGA_NOTCOMP, "Targa support was not compiled")
+
+JMESSAGE(JERR_BAD_CMAP_FILE,
+         "Color map file is invalid or of unsupported format")
+JMESSAGE(JERR_TOO_MANY_COLORS,
+         "Output file format cannot handle %d colormap entries")
+JMESSAGE(JERR_UNGETC_FAILED, "ungetc failed")
+#ifdef TARGA_SUPPORTED
+JMESSAGE(JERR_UNKNOWN_FORMAT,
+         "Unrecognized input file format --- perhaps you need -targa")
+#else
+JMESSAGE(JERR_UNKNOWN_FORMAT, "Unrecognized input file format")
+#endif
+JMESSAGE(JERR_UNSUPPORTED_FORMAT, "Unsupported output file format")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTADDONCODE
+} ADDON_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
diff --git a/thirdparty/libjpeg-turbo/src/cdjpeg.h b/thirdparty/libjpeg-turbo/src/cdjpeg.h
new file mode 100644
index 00000000000..dbe54a0922e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/cdjpeg.h
@@ -0,0 +1,175 @@
+/*
+ * cdjpeg.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 2019 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2017, 2019, 2021-2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains common declarations for the sample applications
+ * cjpeg and djpeg.  It is NOT used by the core JPEG library.
+ */
+
+#define JPEG_CJPEG_DJPEG        /* define proper options in jconfig.h */
+#define JPEG_INTERNAL_OPTIONS   /* cjpeg.c,djpeg.c need to see xxx_SUPPORTED */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"             /* get library error codes too */
+#include "cderror.h"            /* get application-specific error codes */
+
+
+/*
+ * Object interface for cjpeg's source file decoding modules
+ */
+
+typedef struct cjpeg_source_struct *cjpeg_source_ptr;
+
+struct cjpeg_source_struct {
+  void (*start_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+  JDIMENSION (*get_pixel_rows) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+  void (*finish_input) (j_compress_ptr cinfo, cjpeg_source_ptr sinfo);
+
+  FILE *input_file;
+
+  JSAMPARRAY buffer;
+  J12SAMPARRAY buffer12;
+#ifdef C_LOSSLESS_SUPPORTED
+  J16SAMPARRAY buffer16;
+#endif
+  JDIMENSION buffer_height;
+  JDIMENSION max_pixels;
+};
+
+
+/*
+ * Object interface for djpeg's output file encoding modules
+ */
+
+typedef struct djpeg_dest_struct *djpeg_dest_ptr;
+
+struct djpeg_dest_struct {
+  /* start_output is called after jpeg_start_decompress finishes.
+   * The color map will be ready at this time, if one is needed.
+   */
+  void (*start_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo);
+  /* Emit the specified number of pixel rows from the buffer. */
+  void (*put_pixel_rows) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo,
+                          JDIMENSION rows_supplied);
+  /* Finish up at the end of the image. */
+  void (*finish_output) (j_decompress_ptr cinfo, djpeg_dest_ptr dinfo);
+  /* Re-calculate buffer dimensions based on output dimensions (for use with
+     partial image decompression.)  If this is NULL, then the output format
+     does not support partial image decompression (BMP, in particular, cannot
+     support partial decompression because it uses an inversion buffer to write
+     the image in bottom-up order.) */
+  void (*calc_buffer_dimensions) (j_decompress_ptr cinfo,
+                                  djpeg_dest_ptr dinfo);
+
+
+  /* Target file spec; filled in by djpeg.c after object is created. */
+  FILE *output_file;
+
+  /* Output pixel-row buffer.  Created by module init or start_output.
+   * Width is cinfo->output_width * cinfo->output_components;
+   * height is buffer_height.
+   */
+  JSAMPARRAY buffer;
+  J12SAMPARRAY buffer12;
+#ifdef D_LOSSLESS_SUPPORTED
+  J16SAMPARRAY buffer16;
+#endif
+  JDIMENSION buffer_height;
+};
+
+
+/*
+ * cjpeg/djpeg may need to perform extra passes to convert to or from
+ * the source/destination file format.  The JPEG library does not know
+ * about these passes, but we'd like them to be counted by the progress
+ * monitor.  We use an expanded progress monitor object to hold the
+ * additional pass count.
+ */
+
+struct cdjpeg_progress_mgr {
+  struct jpeg_progress_mgr pub; /* fields known to JPEG library */
+  int completed_extra_passes;   /* extra passes completed */
+  int total_extra_passes;       /* total extra */
+  JDIMENSION max_scans;         /* abort if the number of scans exceeds this
+                                   value and the value is non-zero */
+  boolean report;               /* whether or not to report progress */
+  /* last printed percentage stored here to avoid multiple printouts */
+  int percent_done;
+};
+
+typedef struct cdjpeg_progress_mgr *cd_progress_ptr;
+
+
+/* Module selection routines for I/O modules. */
+
+EXTERN(cjpeg_source_ptr) jinit_read_bmp(j_compress_ptr cinfo,
+                                        boolean use_inversion_array);
+EXTERN(djpeg_dest_ptr) jinit_write_bmp(j_decompress_ptr cinfo, boolean is_os2,
+                                       boolean use_inversion_array);
+EXTERN(cjpeg_source_ptr) jinit_read_gif(j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_gif(j_decompress_ptr cinfo, boolean is_lzw);
+EXTERN(djpeg_dest_ptr) j12init_write_gif(j_decompress_ptr cinfo,
+                                         boolean is_lzw);
+EXTERN(cjpeg_source_ptr) jinit_read_ppm(j_compress_ptr cinfo);
+EXTERN(cjpeg_source_ptr) j12init_read_ppm(j_compress_ptr cinfo);
+#ifdef C_LOSSLESS_SUPPORTED
+EXTERN(cjpeg_source_ptr) j16init_read_ppm(j_compress_ptr cinfo);
+#endif
+EXTERN(djpeg_dest_ptr) jinit_write_ppm(j_decompress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) j12init_write_ppm(j_decompress_ptr cinfo);
+#ifdef D_LOSSLESS_SUPPORTED
+EXTERN(djpeg_dest_ptr) j16init_write_ppm(j_decompress_ptr cinfo);
+#endif
+EXTERN(cjpeg_source_ptr) jinit_read_targa(j_compress_ptr cinfo);
+EXTERN(djpeg_dest_ptr) jinit_write_targa(j_decompress_ptr cinfo);
+
+/* cjpeg support routines (in rdswitch.c) */
+
+EXTERN(boolean) read_quant_tables(j_compress_ptr cinfo, char *filename,
+                                  boolean force_baseline);
+EXTERN(boolean) read_scan_script(j_compress_ptr cinfo, char *filename);
+EXTERN(boolean) set_quality_ratings(j_compress_ptr cinfo, char *arg,
+                                    boolean force_baseline);
+EXTERN(boolean) set_quant_slots(j_compress_ptr cinfo, char *arg);
+EXTERN(boolean) set_sample_factors(j_compress_ptr cinfo, char *arg);
+
+/* djpeg support routines (in rdcolmap.c) */
+
+EXTERN(void) read_color_map(j_decompress_ptr cinfo, FILE *infile);
+EXTERN(void) read_color_map_12(j_decompress_ptr cinfo, FILE *infile);
+
+/* common support routines (in cdjpeg.c) */
+
+EXTERN(void) start_progress_monitor(j_common_ptr cinfo,
+                                    cd_progress_ptr progress);
+EXTERN(void) end_progress_monitor(j_common_ptr cinfo);
+EXTERN(boolean) keymatch(char *arg, const char *keyword, int minchars);
+EXTERN(FILE *) read_stdin(void);
+EXTERN(FILE *) write_stdout(void);
+
+/* miscellaneous useful macros */
+
+#ifdef DONT_USE_B_MODE          /* define mode parameters for fopen() */
+#define READ_BINARY     "r"
+#define WRITE_BINARY    "w"
+#else
+#define READ_BINARY     "rb"
+#define WRITE_BINARY    "wb"
+#endif
+
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+#ifndef EXIT_SUCCESS
+#define EXIT_SUCCESS  0
+#endif
+#ifndef EXIT_WARNING
+#define EXIT_WARNING  2
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/cmyk.h b/thirdparty/libjpeg-turbo/src/cmyk.h
new file mode 100644
index 00000000000..4127b14d4ed
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/cmyk.h
@@ -0,0 +1,61 @@
+/*
+ * cmyk.h
+ *
+ * Copyright (C) 2017-2018, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains convenience functions for performing quick & dirty
+ * CMYK<->RGB conversion.  This algorithm is suitable for testing purposes
+ * only.  Properly converting between CMYK and RGB requires a color management
+ * system.
+ */
+
+#ifndef CMYK_H
+#define CMYK_H
+
+#include <jinclude.h>
+#define JPEG_INTERNALS
+#include <jpeglib.h>
+#include "jsamplecomp.h"
+
+
+/* Fully reversible */
+
+INLINE
+LOCAL(void)
+rgb_to_cmyk(int maxval, _JSAMPLE r, _JSAMPLE g, _JSAMPLE b,
+            _JSAMPLE *c, _JSAMPLE *m, _JSAMPLE *y, _JSAMPLE *k)
+{
+  double ctmp = 1.0 - ((double)r / (double)maxval);
+  double mtmp = 1.0 - ((double)g / (double)maxval);
+  double ytmp = 1.0 - ((double)b / (double)maxval);
+  double ktmp = MIN(MIN(ctmp, mtmp), ytmp);
+
+  if (ktmp == 1.0) ctmp = mtmp = ytmp = 0.0;
+  else {
+    ctmp = (ctmp - ktmp) / (1.0 - ktmp);
+    mtmp = (mtmp - ktmp) / (1.0 - ktmp);
+    ytmp = (ytmp - ktmp) / (1.0 - ktmp);
+  }
+  *c = (_JSAMPLE)((double)maxval - ctmp * (double)maxval + 0.5);
+  *m = (_JSAMPLE)((double)maxval - mtmp * (double)maxval + 0.5);
+  *y = (_JSAMPLE)((double)maxval - ytmp * (double)maxval + 0.5);
+  *k = (_JSAMPLE)((double)maxval - ktmp * (double)maxval + 0.5);
+}
+
+
+/* Fully reversible only for C/M/Y/K values generated with rgb_to_cmyk() */
+
+INLINE
+LOCAL(void)
+cmyk_to_rgb(int maxval, _JSAMPLE c, _JSAMPLE m, _JSAMPLE y, _JSAMPLE k,
+            _JSAMPLE *r, _JSAMPLE *g, _JSAMPLE *b)
+{
+  *r = (_JSAMPLE)((double)c * (double)k / (double)maxval + 0.5);
+  *g = (_JSAMPLE)((double)m * (double)k / (double)maxval + 0.5);
+  *b = (_JSAMPLE)((double)y * (double)k / (double)maxval + 0.5);
+}
+
+
+#endif /* CMYK_H */
diff --git a/thirdparty/libjpeg-turbo/src/jaricom.c b/thirdparty/libjpeg-turbo/src/jaricom.c
new file mode 100644
index 00000000000..215640cc446
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jaricom.c
@@ -0,0 +1,157 @@
+/*
+ * jaricom.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2018, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains probability estimation tables for common use in
+ * arithmetic entropy encoding and decoding routines.
+ *
+ * This data represents Table D.2 in
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 and Table 24 in
+ * Recommendation ITU-T T.82 (1993) | ISO/IEC 11544:1993.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+/* The following #define specifies the packing of the four components
+ * into the compact JLONG representation.
+ * Note that this formula must match the actual arithmetic encoder
+ * and decoder implementation.  The implementation has to be changed
+ * if this formula is changed.
+ * The current organization is leaned on Markus Kuhn's JBIG
+ * implementation (jbig_tab.c).
+ */
+
+#define V(i, a, b, c, d) \
+  (((JLONG)a << 16) | ((JLONG)c << 8) | ((JLONG)d << 7) | b)
+
+const JLONG jpeg_aritab[113 + 1] = {
+/*
+ * Index, Qe_Value, Next_Index_LPS, Next_Index_MPS, Switch_MPS
+ */
+  V(   0, 0x5a1d,   1,   1, 1 ),
+  V(   1, 0x2586,  14,   2, 0 ),
+  V(   2, 0x1114,  16,   3, 0 ),
+  V(   3, 0x080b,  18,   4, 0 ),
+  V(   4, 0x03d8,  20,   5, 0 ),
+  V(   5, 0x01da,  23,   6, 0 ),
+  V(   6, 0x00e5,  25,   7, 0 ),
+  V(   7, 0x006f,  28,   8, 0 ),
+  V(   8, 0x0036,  30,   9, 0 ),
+  V(   9, 0x001a,  33,  10, 0 ),
+  V(  10, 0x000d,  35,  11, 0 ),
+  V(  11, 0x0006,   9,  12, 0 ),
+  V(  12, 0x0003,  10,  13, 0 ),
+  V(  13, 0x0001,  12,  13, 0 ),
+  V(  14, 0x5a7f,  15,  15, 1 ),
+  V(  15, 0x3f25,  36,  16, 0 ),
+  V(  16, 0x2cf2,  38,  17, 0 ),
+  V(  17, 0x207c,  39,  18, 0 ),
+  V(  18, 0x17b9,  40,  19, 0 ),
+  V(  19, 0x1182,  42,  20, 0 ),
+  V(  20, 0x0cef,  43,  21, 0 ),
+  V(  21, 0x09a1,  45,  22, 0 ),
+  V(  22, 0x072f,  46,  23, 0 ),
+  V(  23, 0x055c,  48,  24, 0 ),
+  V(  24, 0x0406,  49,  25, 0 ),
+  V(  25, 0x0303,  51,  26, 0 ),
+  V(  26, 0x0240,  52,  27, 0 ),
+  V(  27, 0x01b1,  54,  28, 0 ),
+  V(  28, 0x0144,  56,  29, 0 ),
+  V(  29, 0x00f5,  57,  30, 0 ),
+  V(  30, 0x00b7,  59,  31, 0 ),
+  V(  31, 0x008a,  60,  32, 0 ),
+  V(  32, 0x0068,  62,  33, 0 ),
+  V(  33, 0x004e,  63,  34, 0 ),
+  V(  34, 0x003b,  32,  35, 0 ),
+  V(  35, 0x002c,  33,   9, 0 ),
+  V(  36, 0x5ae1,  37,  37, 1 ),
+  V(  37, 0x484c,  64,  38, 0 ),
+  V(  38, 0x3a0d,  65,  39, 0 ),
+  V(  39, 0x2ef1,  67,  40, 0 ),
+  V(  40, 0x261f,  68,  41, 0 ),
+  V(  41, 0x1f33,  69,  42, 0 ),
+  V(  42, 0x19a8,  70,  43, 0 ),
+  V(  43, 0x1518,  72,  44, 0 ),
+  V(  44, 0x1177,  73,  45, 0 ),
+  V(  45, 0x0e74,  74,  46, 0 ),
+  V(  46, 0x0bfb,  75,  47, 0 ),
+  V(  47, 0x09f8,  77,  48, 0 ),
+  V(  48, 0x0861,  78,  49, 0 ),
+  V(  49, 0x0706,  79,  50, 0 ),
+  V(  50, 0x05cd,  48,  51, 0 ),
+  V(  51, 0x04de,  50,  52, 0 ),
+  V(  52, 0x040f,  50,  53, 0 ),
+  V(  53, 0x0363,  51,  54, 0 ),
+  V(  54, 0x02d4,  52,  55, 0 ),
+  V(  55, 0x025c,  53,  56, 0 ),
+  V(  56, 0x01f8,  54,  57, 0 ),
+  V(  57, 0x01a4,  55,  58, 0 ),
+  V(  58, 0x0160,  56,  59, 0 ),
+  V(  59, 0x0125,  57,  60, 0 ),
+  V(  60, 0x00f6,  58,  61, 0 ),
+  V(  61, 0x00cb,  59,  62, 0 ),
+  V(  62, 0x00ab,  61,  63, 0 ),
+  V(  63, 0x008f,  61,  32, 0 ),
+  V(  64, 0x5b12,  65,  65, 1 ),
+  V(  65, 0x4d04,  80,  66, 0 ),
+  V(  66, 0x412c,  81,  67, 0 ),
+  V(  67, 0x37d8,  82,  68, 0 ),
+  V(  68, 0x2fe8,  83,  69, 0 ),
+  V(  69, 0x293c,  84,  70, 0 ),
+  V(  70, 0x2379,  86,  71, 0 ),
+  V(  71, 0x1edf,  87,  72, 0 ),
+  V(  72, 0x1aa9,  87,  73, 0 ),
+  V(  73, 0x174e,  72,  74, 0 ),
+  V(  74, 0x1424,  72,  75, 0 ),
+  V(  75, 0x119c,  74,  76, 0 ),
+  V(  76, 0x0f6b,  74,  77, 0 ),
+  V(  77, 0x0d51,  75,  78, 0 ),
+  V(  78, 0x0bb6,  77,  79, 0 ),
+  V(  79, 0x0a40,  77,  48, 0 ),
+  V(  80, 0x5832,  80,  81, 1 ),
+  V(  81, 0x4d1c,  88,  82, 0 ),
+  V(  82, 0x438e,  89,  83, 0 ),
+  V(  83, 0x3bdd,  90,  84, 0 ),
+  V(  84, 0x34ee,  91,  85, 0 ),
+  V(  85, 0x2eae,  92,  86, 0 ),
+  V(  86, 0x299a,  93,  87, 0 ),
+  V(  87, 0x2516,  86,  71, 0 ),
+  V(  88, 0x5570,  88,  89, 1 ),
+  V(  89, 0x4ca9,  95,  90, 0 ),
+  V(  90, 0x44d9,  96,  91, 0 ),
+  V(  91, 0x3e22,  97,  92, 0 ),
+  V(  92, 0x3824,  99,  93, 0 ),
+  V(  93, 0x32b4,  99,  94, 0 ),
+  V(  94, 0x2e17,  93,  86, 0 ),
+  V(  95, 0x56a8,  95,  96, 1 ),
+  V(  96, 0x4f46, 101,  97, 0 ),
+  V(  97, 0x47e5, 102,  98, 0 ),
+  V(  98, 0x41cf, 103,  99, 0 ),
+  V(  99, 0x3c3d, 104, 100, 0 ),
+  V( 100, 0x375e,  99,  93, 0 ),
+  V( 101, 0x5231, 105, 102, 0 ),
+  V( 102, 0x4c0f, 106, 103, 0 ),
+  V( 103, 0x4639, 107, 104, 0 ),
+  V( 104, 0x415e, 103,  99, 0 ),
+  V( 105, 0x5627, 105, 106, 1 ),
+  V( 106, 0x50e7, 108, 107, 0 ),
+  V( 107, 0x4b85, 109, 103, 0 ),
+  V( 108, 0x5597, 110, 109, 0 ),
+  V( 109, 0x504f, 111, 107, 0 ),
+  V( 110, 0x5a10, 110, 111, 1 ),
+  V( 111, 0x5522, 112, 109, 0 ),
+  V( 112, 0x59eb, 112, 111, 1 ),
+/*
+ * This last entry is used for fixed probability estimate of 0.5
+ * as recommended in Section 10.3 Table 5 of ITU-T Rec. T.851.
+ */
+  V( 113, 0x5a1d, 113, 113, 0 )
+};
diff --git a/thirdparty/libjpeg-turbo/src/jcapimin.c b/thirdparty/libjpeg-turbo/src/jcapimin.c
new file mode 100644
index 00000000000..eb4599fbfad
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcapimin.c
@@ -0,0 +1,318 @@
+/*
+ * jcapimin.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-compression case or the transcoding-only
+ * case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jcapistd.c.  But also see jcparam.c for
+ * parameter-setup helper routines, jcomapi.c for routines shared by
+ * compression and decompression, and jctrans.c for the transcoding case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jcmaster.h"
+
+
+/*
+ * Initialization of a JPEG compression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateCompress(j_compress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != sizeof(struct jpeg_compress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int)sizeof(struct jpeg_compress_struct), (int)structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr *err = cinfo->err;
+    void *client_data = cinfo->client_data; /* ignore Purify complaint here */
+    memset(cinfo, 0, sizeof(struct jpeg_compress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = FALSE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr)cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->dest = NULL;
+
+  cinfo->comp_info = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    cinfo->quant_tbl_ptrs[i] = NULL;
+#if JPEG_LIB_VERSION >= 70
+    cinfo->q_scale_factor[i] = 100;
+#endif
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+#if JPEG_LIB_VERSION >= 80
+  /* Must do it here for emit_dqt in case jpeg_write_tables is used */
+  cinfo->block_size = DCTSIZE;
+  cinfo->natural_order = jpeg_natural_order;
+  cinfo->lim_Se = DCTSIZE2 - 1;
+#endif
+
+  cinfo->script_space = NULL;
+
+  cinfo->input_gamma = 1.0;     /* in case application forgets */
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+
+  /* OK, I'm ready */
+  cinfo->global_state = CSTATE_START;
+
+  /* The master struct is used to store extension parameters, so we allocate it
+   * here.
+   */
+  cinfo->master = (struct jpeg_comp_master *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_comp_master));
+  memset(cinfo->master, 0, sizeof(my_comp_master));
+}
+
+
+/*
+ * Destruction of a JPEG compression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_compress(j_compress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr)cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG compression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_compress(j_compress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr)cinfo); /* use common routine */
+}
+
+
+/*
+ * Forcibly suppress or un-suppress all quantization and Huffman tables.
+ * Marks all currently defined tables as already written (if suppress)
+ * or not written (if !suppress).  This will control whether they get emitted
+ * by a subsequent jpeg_start_compress call.
+ *
+ * This routine is exported for use by applications that want to produce
+ * abbreviated JPEG datastreams.  It logically belongs in jcparam.c, but
+ * since it is called by jpeg_start_compress, we put it here --- otherwise
+ * jcparam.o would be linked whether the application used it or not.
+ */
+
+GLOBAL(void)
+jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress)
+{
+  int i;
+  JQUANT_TBL *qtbl;
+  JHUFF_TBL *htbl;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if ((qtbl = cinfo->quant_tbl_ptrs[i]) != NULL)
+      qtbl->sent_table = suppress;
+  }
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    if ((htbl = cinfo->dc_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+    if ((htbl = cinfo->ac_huff_tbl_ptrs[i]) != NULL)
+      htbl->sent_table = suppress;
+  }
+}
+
+
+/*
+ * Finish JPEG compression.
+ *
+ * If a multipass operating mode was selected, this may do a great deal of
+ * work including most of the actual output.
+ */
+
+GLOBAL(void)
+jpeg_finish_compress(j_compress_ptr cinfo)
+{
+  JDIMENSION iMCU_row;
+
+  if (cinfo->global_state == CSTATE_SCANNING ||
+      cinfo->global_state == CSTATE_RAW_OK) {
+    /* Terminate first pass */
+    if (cinfo->next_scanline < cinfo->image_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_pass) (cinfo);
+  } else if (cinfo->global_state != CSTATE_WRCOEFS)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any remaining passes */
+  while (!cinfo->master->is_last_pass) {
+    (*cinfo->master->prepare_for_pass) (cinfo);
+    for (iMCU_row = 0; iMCU_row < cinfo->total_iMCU_rows; iMCU_row++) {
+      if (cinfo->progress != NULL) {
+        cinfo->progress->pass_counter = (long)iMCU_row;
+        cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+      }
+      /* We bypass the main controller and invoke coef controller directly;
+       * all work is being done from the coefficient buffer.
+       */
+      if (cinfo->data_precision <= 8) {
+        if (!(*cinfo->coef->compress_data) (cinfo, (JSAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+      } else if (cinfo->data_precision <= 12) {
+        if (!(*cinfo->coef->compress_data_12) (cinfo, (J12SAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+      } else {
+#ifdef C_LOSSLESS_SUPPORTED
+        if (!(*cinfo->coef->compress_data_16) (cinfo, (J16SAMPIMAGE)NULL))
+          ERREXIT(cinfo, JERR_CANT_SUSPEND);
+#else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+      }
+    }
+    (*cinfo->master->finish_pass) (cinfo);
+  }
+  /* Write EOI, do final cleanup */
+  (*cinfo->marker->write_file_trailer) (cinfo);
+  (*cinfo->dest->term_destination) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr)cinfo);
+}
+
+
+/*
+ * Write a special marker.
+ * This is only recommended for writing COM or APPn markers.
+ * Must be called after jpeg_start_compress() and before
+ * first call to jpeg_write_scanlines() or jpeg_write_raw_data().
+ */
+
+GLOBAL(void)
+jpeg_write_marker(j_compress_ptr cinfo, int marker, const JOCTET *dataptr,
+                  unsigned int datalen)
+{
+  void (*write_marker_byte) (j_compress_ptr info, int val);
+
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+  write_marker_byte = cinfo->marker->write_marker_byte; /* copy for speed */
+  while (datalen--) {
+    (*write_marker_byte) (cinfo, *dataptr);
+    dataptr++;
+  }
+}
+
+/* Same, but piecemeal. */
+
+GLOBAL(void)
+jpeg_write_m_header(j_compress_ptr cinfo, int marker, unsigned int datalen)
+{
+  if (cinfo->next_scanline != 0 ||
+      (cinfo->global_state != CSTATE_SCANNING &&
+       cinfo->global_state != CSTATE_RAW_OK &&
+       cinfo->global_state != CSTATE_WRCOEFS))
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  (*cinfo->marker->write_marker_header) (cinfo, marker, datalen);
+}
+
+GLOBAL(void)
+jpeg_write_m_byte(j_compress_ptr cinfo, int val)
+{
+  (*cinfo->marker->write_marker_byte) (cinfo, val);
+}
+
+
+/*
+ * Alternate compression function: just write an abbreviated table file.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * To produce a pair of files containing abbreviated tables and abbreviated
+ * image data, one would proceed as follows:
+ *
+ *              initialize JPEG object
+ *              set JPEG parameters
+ *              set destination to table file
+ *              jpeg_write_tables(cinfo);
+ *              set destination to image file
+ *              jpeg_start_compress(cinfo, FALSE);
+ *              write data...
+ *              jpeg_finish_compress(cinfo);
+ *
+ * jpeg_write_tables has the side effect of marking all tables written
+ * (same as jpeg_suppress_tables(..., TRUE)).  Thus a subsequent start_compress
+ * will not re-emit the tables unless it is passed write_all_tables=TRUE.
+ */
+
+GLOBAL(void)
+jpeg_write_tables(j_compress_ptr cinfo)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Initialize the marker writer ... bit of a crock to do it here. */
+  jinit_marker_writer(cinfo);
+  /* Write them tables! */
+  (*cinfo->marker->write_tables_only) (cinfo);
+  /* And clean up. */
+  (*cinfo->dest->term_destination) (cinfo);
+  /*
+   * In library releases up through v6a, we called jpeg_abort() here to free
+   * any working memory allocated by the destination manager and marker
+   * writer.  Some applications had a problem with that: they allocated space
+   * of their own from the library memory manager, and didn't want it to go
+   * away during write_tables.  So now we do nothing.  This will cause a
+   * memory leak if an app calls write_tables repeatedly without doing a full
+   * compression cycle or otherwise resetting the JPEG object.  However, that
+   * seems less bad than unexpectedly freeing memory in the normal case.
+   * An app that prefers the old behavior can call jpeg_abort for itself after
+   * each call to jpeg_write_tables().
+   */
+}
diff --git a/thirdparty/libjpeg-turbo/src/jcapistd.c b/thirdparty/libjpeg-turbo/src/jcapistd.c
new file mode 100644
index 00000000000..2226094ba65
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcapistd.c
@@ -0,0 +1,200 @@
+/*
+ * jcapistd.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains application interface code for the compression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-compression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_compress, it will end up linking in the entire compressor.
+ * We thus must separate this file from jcapimin.c to avoid linking the
+ * whole compression library into a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE == 8
+
+/*
+ * Compression initialization.
+ * Before calling this, all parameters and a data destination must be set up.
+ *
+ * We require a write_all_tables parameter as a failsafe check when writing
+ * multiple datastreams from the same compression object.  Since prior runs
+ * will have left all the tables marked sent_table=TRUE, a subsequent run
+ * would emit an abbreviated stream (no tables) by default.  This may be what
+ * is wanted, but for safety's sake it should not be the default behavior:
+ * programmers should have to make a deliberate choice to emit abbreviated
+ * images.  Therefore the documentation and examples should encourage people
+ * to pass write_all_tables=TRUE; then it will take active thought to do the
+ * wrong thing.
+ */
+
+GLOBAL(void)
+jpeg_start_compress(j_compress_ptr cinfo, boolean write_all_tables)
+{
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (write_all_tables)
+    jpeg_suppress_tables(cinfo, FALSE); /* mark all tables to be written */
+
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  jinit_compress_master(cinfo);
+  /* Set up for the first pass */
+  (*cinfo->master->prepare_for_pass) (cinfo);
+  /* Ready for application to drive first pass through _jpeg_write_scanlines
+   * or _jpeg_write_raw_data.
+   */
+  cinfo->next_scanline = 0;
+  cinfo->global_state = (cinfo->raw_data_in ? CSTATE_RAW_OK : CSTATE_SCANNING);
+}
+
+#endif
+
+
+/*
+ * Write some scanlines of data to the JPEG compressor.
+ *
+ * The return value will be the number of lines actually written.
+ * This should be less than the supplied num_lines only in case that
+ * the data destination module has requested suspension of the compressor,
+ * or if more than image_height scanlines are passed in.
+ *
+ * Note: we warn about excess calls to _jpeg_write_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * excess scanlines passed in the last valid call are *silently* ignored,
+ * so that the application need not adjust num_lines for end-of-image
+ * when using a multiple-scanline buffer.
+ */
+
+GLOBAL(JDIMENSION)
+_jpeg_write_scanlines(j_compress_ptr cinfo, _JSAMPARRAY scanlines,
+                      JDIMENSION num_lines)
+{
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+  JDIMENSION row_ctr, rows_left;
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  if (cinfo->global_state != CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height)
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long)cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long)cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * _jpeg_write_scanlines.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and _jpeg_write_scanlines.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Ignore any extra scanlines at bottom of image. */
+  rows_left = cinfo->image_height - cinfo->next_scanline;
+  if (num_lines > rows_left)
+    num_lines = rows_left;
+
+  row_ctr = 0;
+  (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, num_lines);
+  cinfo->next_scanline += row_ctr;
+  return row_ctr;
+#else
+  ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  return 0;
+#endif
+}
+
+
+#if BITS_IN_JSAMPLE != 16
+
+/*
+ * Alternate entry point to write raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+_jpeg_write_raw_data(j_compress_ptr cinfo, _JSAMPIMAGE data,
+                     JDIMENSION num_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if (cinfo->global_state != CSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->next_scanline >= cinfo->image_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long)cinfo->next_scanline;
+    cinfo->progress->pass_limit = (long)cinfo->image_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+  }
+
+  /* Give master control module another chance if this is first call to
+   * _jpeg_write_raw_data.  This lets output of the frame/scan headers be
+   * delayed so that application can write COM, etc, markers between
+   * jpeg_start_compress and _jpeg_write_raw_data.
+   */
+  if (cinfo->master->call_pass_startup)
+    (*cinfo->master->pass_startup) (cinfo);
+
+  /* Verify that at least one iMCU row has been passed. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * DCTSIZE;
+  if (num_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Directly compress the row. */
+  if (!(*cinfo->coef->_compress_data) (cinfo, data)) {
+    /* If compressor did not consume the whole row, suspend processing. */
+    return 0;
+  }
+
+  /* OK, we processed one iMCU row. */
+  cinfo->next_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 */
diff --git a/thirdparty/libjpeg-turbo/src/jcarith.c b/thirdparty/libjpeg-turbo/src/jcarith.c
new file mode 100644
index 00000000000..b1720521bf5
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcarith.c
@@ -0,0 +1,932 @@
+/*
+ * jcarith.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2018, 2021-2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing Recommendation ITU-T T.81 | ISO/IEC 10918-1).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/* Expanded entropy encoder object for arithmetic encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  JLONG c; /* C register, base of coding interval, layout as in sec. D.1.3 */
+  JLONG a;               /* A register, normalized size of coding interval */
+  JLONG sc;        /* counter for stacked 0xFF values which might overflow */
+  JLONG zc;          /* counter for pending 0x00 output values which might *
+                          * be discarded at the end ("Pacman" termination) */
+  int ct;  /* bit shift counter, determines when next byte will be written */
+  int buffer;                /* buffer for most recent output byte != 0xFF */
+
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char *dc_stats[NUM_ARITH_TBLS];
+  unsigned char *ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_encoder;
+
+typedef arith_entropy_encoder *arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS  64
+#define AC_STAT_BINS  256
+
+/* NOTE: Uncomment the following #define if you want to use the
+ * given formula for calculating the AC conditioning parameter Kx
+ * for spectral selection progressive coding in section G.1.3.2
+ * of the spec (Kx = Kmin + SRL (8 + Se - Kmin) 4).
+ * Although the spec and P&M authors claim that this "has proven
+ * to give good results for 8 bit precision samples", I'm not
+ * convinced yet that this is really beneficial.
+ * Early tests gave only very marginal compression enhancements
+ * (a few - around 5 or so - bytes even for very large files),
+ * which would turn out rather negative if we'd suppress the
+ * DAC (Define Arithmetic Conditioning) marker segments for
+ * the default parameters in the future.
+ * Note that currently the marker writing module emits 12-byte
+ * DAC segments for a full-component scan in a color image.
+ * This is not worth worrying about IMHO. However, since the
+ * spec defines the default values to be used if the tables
+ * are omitted (unlike Huffman tables, which are required
+ * anyway), one might optimize this behaviour in the future,
+ * and then it would be disadvantageous to use custom tables if
+ * they don't provide sufficient gain to exceed the DAC size.
+ *
+ * On the other hand, I'd consider it as a reasonable result
+ * that the conditioning has no significant influence on the
+ * compression performance. This means that the basic
+ * statistical model is already rather stable.
+ *
+ * Thus, at the moment, we use the default conditioning values
+ * anyway, and do not use the custom formula.
+ *
+#define CALCULATE_SPECTRAL_CONDITIONING
+ */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
+ * We assume that int right shift is unsigned if JLONG right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS    int ishift_temp;
+#define IRIGHT_SHIFT(x, shft) \
+  ((ishift_temp = (x)) < 0 ? \
+   (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
+   (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
+#endif
+
+
+LOCAL(void)
+emit_byte(int val, j_compress_ptr cinfo)
+/* Write next output byte; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr *dest = cinfo->dest;
+
+  *dest->next_output_byte++ = (JOCTET)val;
+  if (--dest->free_in_buffer == 0)
+    if (!(*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+}
+
+
+/*
+ * Finish up at the end of an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass(j_compress_ptr cinfo)
+{
+  arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy;
+  JLONG temp;
+
+  /* Section D.1.8: Termination of encoding */
+
+  /* Find the e->c in the coding interval with the largest
+   * number of trailing zero bits */
+  if ((temp = (e->a - 1 + e->c) & 0xFFFF0000UL) < e->c)
+    e->c = temp + 0x8000L;
+  else
+    e->c = temp;
+  /* Send remaining bytes to output */
+  e->c <<= e->ct;
+  if (e->c & 0xF8000000UL) {
+    /* One final overflow has to be handled */
+    if (e->buffer >= 0) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      emit_byte(e->buffer + 1, cinfo);
+      if (e->buffer + 1 == 0xFF)
+        emit_byte(0x00, cinfo);
+    }
+    e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+    e->sc = 0;
+  } else {
+    if (e->buffer == 0)
+      ++e->zc;
+    else if (e->buffer >= 0) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      emit_byte(e->buffer, cinfo);
+    }
+    if (e->sc) {
+      if (e->zc)
+        do emit_byte(0x00, cinfo);
+        while (--e->zc);
+      do {
+        emit_byte(0xFF, cinfo);
+        emit_byte(0x00, cinfo);
+      } while (--e->sc);
+    }
+  }
+  /* Output final bytes only if they are not 0x00 */
+  if (e->c & 0x7FFF800L) {
+    if (e->zc)  /* output final pending zero bytes */
+      do emit_byte(0x00, cinfo);
+      while (--e->zc);
+    emit_byte((e->c >> 19) & 0xFF, cinfo);
+    if (((e->c >> 19) & 0xFF) == 0xFF)
+      emit_byte(0x00, cinfo);
+    if (e->c & 0x7F800L) {
+      emit_byte((e->c >> 11) & 0xFF, cinfo);
+      if (((e->c >> 11) & 0xFF) == 0xFF)
+        emit_byte(0x00, cinfo);
+    }
+  }
+}
+
+
+/*
+ * The core arithmetic encoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Parameter 'val' to be encoded may be 0 or 1 (binary decision).
+ *
+ * Note: I've added full "Pacman" termination support to the
+ * byte output routines, which is equivalent to the optional
+ * Discard_final_zeros procedure (Figure D.15) in the spec.
+ * Thus, we always produce the shortest possible output
+ * stream compliant to the spec (no trailing zero bytes,
+ * except for FF stuffing).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(void)
+arith_encode(j_compress_ptr cinfo, unsigned char *st, int val)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy;
+  register unsigned char nl, nm;
+  register JLONG qe, temp;
+  register int sv;
+
+  /* Fetch values from our compact representation of Table D.2:
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF;  qe >>= 8;    /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF;  qe >>= 8;    /* Next_Index_MPS */
+
+  /* Encode & estimation procedures per sections D.1.4 & D.1.5 */
+  e->a -= qe;
+  if (val != (sv >> 7)) {
+    /* Encode the less probable symbol */
+    if (e->a >= qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency, otherwise code the LPS
+       * as usual: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nl;     /* Estimate_after_LPS */
+  } else {
+    /* Encode the more probable symbol */
+    if (e->a >= 0x8000L)
+      return;  /* A >= 0x8000 -> ready, no renormalization required */
+    if (e->a < qe) {
+      /* If the interval size (qe) for the less probable symbol (LPS)
+       * is larger than the interval size for the MPS, then exchange
+       * the two symbols for coding efficiency: */
+      e->c += e->a;
+      e->a = qe;
+    }
+    *st = (sv & 0x80) ^ nm;     /* Estimate_after_MPS */
+  }
+
+  /* Renormalization & data output per section D.1.6 */
+  do {
+    e->a <<= 1;
+    e->c <<= 1;
+    if (--e->ct == 0) {
+      /* Another byte is ready for output */
+      temp = e->c >> 19;
+      if (temp > 0xFF) {
+        /* Handle overflow over all stacked 0xFF bytes */
+        if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer + 1, cinfo);
+          if (e->buffer + 1 == 0xFF)
+            emit_byte(0x00, cinfo);
+        }
+        e->zc += e->sc;  /* carry-over converts stacked 0xFF bytes to 0x00 */
+        e->sc = 0;
+        /* Note: The 3 spacer bits in the C register guarantee
+         * that the new buffer byte can't be 0xFF here
+         * (see page 160 in the P&M JPEG book). */
+        e->buffer = temp & 0xFF;  /* new output byte, might overflow later */
+      } else if (temp == 0xFF) {
+        ++e->sc;  /* stack 0xFF byte (which might overflow later) */
+      } else {
+        /* Output all stacked 0xFF bytes, they will not overflow any more */
+        if (e->buffer == 0)
+          ++e->zc;
+        else if (e->buffer >= 0) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          emit_byte(e->buffer, cinfo);
+        }
+        if (e->sc) {
+          if (e->zc)
+            do emit_byte(0x00, cinfo);
+            while (--e->zc);
+          do {
+            emit_byte(0xFF, cinfo);
+            emit_byte(0x00, cinfo);
+          } while (--e->sc);
+        }
+        e->buffer = temp & 0xFF;  /* new output byte (can still overflow) */
+      }
+      e->c &= 0x7FFFFL;
+      e->ct += 8;
+    }
+  } while (e->a < 0x8000L);
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart(j_compress_ptr cinfo, int restart_num)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  int ci;
+  jpeg_component_info *compptr;
+
+  finish_pass(cinfo);
+
+  emit_byte(0xFF, cinfo);
+  emit_byte(JPEG_RST0 + restart_num, cinfo);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->progressive_mode == 0 || cinfo->Se) {
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl;
+  int v, v2, m;
+  ISHIFT_TEMPS
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    m = IRIGHT_SHIFT((int)((*block)[0]), cinfo->Al);
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = m - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;      /* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = m;
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
+      } else {
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;    /* zero diff category */
+      else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] += 8;   /* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke;
+  int v, v2, m;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  for (ke = cinfo->Se; ke > 0; ke--)
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+
+  /* Figure F.5: Encode_AC_Coefficients */
+  for (k = cinfo->Ss; k <= ke; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 0);         /* EOB decision */
+    for (;;) {
+      if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 0);
+          break;
+        }
+      } else {
+        v = -v;
+        if (v >>= cinfo->Al) {
+          arith_encode(cinfo, st + 1, 1);
+          arith_encode(cinfo, entropy->fixed_bin, 1);
+          break;
+        }
+      }
+      arith_encode(cinfo, st + 1, 0);  st += 3;  k++;
+    }
+    st += 2;
+    /* Figure F.8: Encoding the magnitude category of v */
+    m = 0;
+    if (v -= 1) {
+      arith_encode(cinfo, st, 1);
+      m = 1;
+      v2 = v;
+      if (v2 >>= 1) {
+        arith_encode(cinfo, st, 1);
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+    }
+    arith_encode(cinfo, st, 0);
+    /* Figure F.9: Encoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      arith_encode(cinfo, st, (m & v) ? 1 : 0);
+  }
+  /* Encode EOB decision only if k <= cinfo->Se */
+  if (k <= cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  unsigned char *st;
+  int Al, blkn;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
+  Al = cinfo->Al;
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    arith_encode(cinfo, st, (MCU_data[blkn][0][0] >> Al) & 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, k, ke, kex;
+  int v;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data block */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Section G.1.3.3: Encoding of AC coefficients */
+
+  /* Establish EOB (end-of-block) index */
+  for (ke = cinfo->Se; ke > 0; ke--)
+    /* We must apply the point transform by Al.  For AC coefficients this
+     * is an integer division with rounding towards 0.  To do this portably
+     * in C, we shift after obtaining the absolute value.
+     */
+    if ((v = (*block)[jpeg_natural_order[ke]]) >= 0) {
+      if (v >>= cinfo->Al) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Al) break;
+    }
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = ke; kex > 0; kex--)
+    if ((v = (*block)[jpeg_natural_order[kex]]) >= 0) {
+      if (v >>= cinfo->Ah) break;
+    } else {
+      v = -v;
+      if (v >>= cinfo->Ah) break;
+    }
+
+  /* Figure G.10: Encode_AC_Coefficients_SA */
+  for (k = cinfo->Ss; k <= ke; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (k > kex)
+      arith_encode(cinfo, st, 0);       /* EOB decision */
+    for (;;) {
+      if ((v = (*block)[jpeg_natural_order[k]]) >= 0) {
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 0);
+          }
+          break;
+        }
+      } else {
+        v = -v;
+        if (v >>= cinfo->Al) {
+          if (v >> 1)                   /* previously nonzero coef */
+            arith_encode(cinfo, st + 2, (v & 1));
+          else {                        /* newly nonzero coef */
+            arith_encode(cinfo, st + 1, 1);
+            arith_encode(cinfo, entropy->fixed_bin, 1);
+          }
+          break;
+        }
+      }
+      arith_encode(cinfo, st + 1, 0);  st += 3;  k++;
+    }
+  }
+  /* Encode EOB decision only if k <= cinfo->Se */
+  if (k <= cinfo->Se) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    arith_encode(cinfo, st, 1);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  jpeg_component_info *compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, k, ke;
+  int v, v2, m;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      emit_restart(cinfo, entropy->next_restart_num);
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.1.4.1 & F.1.4.4.1: Encoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.4: Encode_DC_DIFF */
+    if ((v = (*block)[0] - entropy->last_dc_val[ci]) == 0) {
+      arith_encode(cinfo, st, 0);
+      entropy->dc_context[ci] = 0;      /* zero diff category */
+    } else {
+      entropy->last_dc_val[ci] = (*block)[0];
+      arith_encode(cinfo, st, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, st + 1, 0); /* Table F.4: SS = S0 + 1 */
+        st += 2;                        /* Table F.4: SP = S0 + 2 */
+        entropy->dc_context[ci] = 4;    /* small positive diff category */
+      } else {
+        v = -v;
+        arith_encode(cinfo, st + 1, 1); /* Table F.4: SS = S0 + 1 */
+        st += 3;                        /* Table F.4: SN = S0 + 3 */
+        entropy->dc_context[ci] = 8;    /* small negative diff category */
+      }
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        st = entropy->dc_stats[tbl] + 20; /* Table F.4: X1 = 20 */
+        while (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st += 1;
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;    /* zero diff category */
+      else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] += 8;   /* large diff category */
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+
+    /* Sections F.1.4.2 & F.1.4.4.2: Encoding of AC coefficients */
+
+    tbl = compptr->ac_tbl_no;
+
+    /* Establish EOB (end-of-block) index */
+    for (ke = DCTSIZE2 - 1; ke > 0; ke--)
+      if ((*block)[jpeg_natural_order[ke]]) break;
+
+    /* Figure F.5: Encode_AC_Coefficients */
+    for (k = 1; k <= ke; k++) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      arith_encode(cinfo, st, 0);       /* EOB decision */
+      while ((v = (*block)[jpeg_natural_order[k]]) == 0) {
+        arith_encode(cinfo, st + 1, 0);  st += 3;  k++;
+      }
+      arith_encode(cinfo, st + 1, 1);
+      /* Figure F.6: Encoding nonzero value v */
+      /* Figure F.7: Encoding the sign of v */
+      if (v > 0) {
+        arith_encode(cinfo, entropy->fixed_bin, 0);
+      } else {
+        v = -v;
+        arith_encode(cinfo, entropy->fixed_bin, 1);
+      }
+      st += 2;
+      /* Figure F.8: Encoding the magnitude category of v */
+      m = 0;
+      if (v -= 1) {
+        arith_encode(cinfo, st, 1);
+        m = 1;
+        v2 = v;
+        if (v2 >>= 1) {
+          arith_encode(cinfo, st, 1);
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (v2 >>= 1) {
+            arith_encode(cinfo, st, 1);
+            m <<= 1;
+            st += 1;
+          }
+        }
+      }
+      arith_encode(cinfo, st, 0);
+      /* Figure F.9: Encoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        arith_encode(cinfo, st, (m & v) ? 1 : 0);
+    }
+    /* Encode EOB decision only if k <= DCTSIZE2 - 1 */
+    if (k <= DCTSIZE2 - 1) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      arith_encode(cinfo, st, 1);
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass(j_compress_ptr cinfo, boolean gather_statistics)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info *compptr;
+
+  if (gather_statistics)
+    /* Make sure to avoid that in the master control logic!
+     * We are fully adaptive here and need no extra
+     * statistics gathering pass!
+     */
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* We assume jcmaster.c already validated the progressive scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->progressive_mode) {
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+        entropy->pub.encode_mcu = encode_mcu_DC_first;
+      else
+        entropy->pub.encode_mcu = encode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+        entropy->pub.encode_mcu = encode_mcu_DC_refine;
+      else
+        entropy->pub.encode_mcu = encode_mcu_AC_refine;
+    }
+  } else
+    entropy->pub.encode_mcu = encode_mcu;
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* DC needs no table for refinement scan */
+    if (cinfo->progressive_mode == 0 || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+        entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
+          ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    /* AC needs no table when not present */
+    if (cinfo->progressive_mode == 0 || cinfo->Se) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+        entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
+          ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
+#ifdef CALCULATE_SPECTRAL_CONDITIONING
+      if (cinfo->progressive_mode)
+        /* Section G.1.3.2: Set appropriate arithmetic conditioning value Kx */
+        cinfo->arith_ac_K[tbl] = cinfo->Ss +
+                                 ((8 + cinfo->Se - cinfo->Ss) >> 4);
+#endif
+    }
+  }
+
+  /* Initialize arithmetic encoding variables */
+  entropy->c = 0;
+  entropy->a = 0x10000L;
+  entropy->sc = 0;
+  entropy->zc = 0;
+  entropy->ct = 11;
+  entropy->buffer = -1;  /* empty */
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_arith_encoder(j_compress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(arith_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
+  entropy->pub.start_pass = start_pass;
+  entropy->pub.finish_pass = finish_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jccoefct.c b/thirdparty/libjpeg-turbo/src/jccoefct.c
new file mode 100644
index 00000000000..2a5dde2d07e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jccoefct.c
@@ -0,0 +1,454 @@
+/*
+ * jccoefct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the coefficient buffer controller for compression.
+ * This controller is the top level of the lossy JPEG compressor proper.
+ * The coefficient buffer lies between forward-DCT and entropy encoding steps.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+/* We use a full-image coefficient buffer when doing Huffman optimization,
+ * and also for writing multiple-scan JPEG files.  In all cases, the DCT
+ * step is run during the first pass, and subsequent passes need only read
+ * the buffered coefficients.
+ */
+#ifdef ENTROPY_OPT_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#else
+#ifdef C_MULTISCAN_FILES_SUPPORTED
+#define FULL_COEF_BUFFER_SUPPORTED
+#endif
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  /* For single-pass compression, it's sufficient to buffer just one MCU
+   * (although this may prove a bit slow in practice).  We allocate a
+   * workspace of C_MAX_BLOCKS_IN_MCU coefficient blocks, and reuse it for each
+   * MCU constructed and sent.  In multi-pass modes, this array points to the
+   * current MCU's blocks within the virtual arrays.
+   */
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+} my_coef_controller;
+
+typedef my_coef_controller *my_coef_ptr;
+
+
+/* Forward declarations */
+METHODDEF(boolean) compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf);
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+METHODDEF(boolean) compress_first_pass(j_compress_ptr cinfo,
+                                       _JSAMPIMAGE input_buf);
+METHODDEF(boolean) compress_output(j_compress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf);
+#endif
+
+
+LOCAL(void)
+start_iMCU_row(j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows - 1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (coef->whole_image[0] != NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub._compress_data = compress_data;
+    break;
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+  case JBUF_SAVE_AND_PASS:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub._compress_data = compress_first_pass;
+    break;
+  case JBUF_CRANK_DEST:
+    if (coef->whole_image[0] == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    coef->pub._compress_data = compress_output;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data in the single-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(boolean)
+compress_data(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, bi, ci, yindex, yoffset, blockcnt;
+  JDIMENSION ypos, xpos;
+  jpeg_component_info *compptr;
+
+  /* Loop to write as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num <= last_MCU_col;
+         MCU_col_num++) {
+      /* Determine where data comes from in input_buf and do the DCT thing.
+       * Each call on forward_DCT processes a horizontal row of DCT blocks
+       * as wide as an MCU; we rely on having allocated the MCU_buffer[] blocks
+       * sequentially.  Dummy blocks at the right or bottom edge are filled in
+       * specially.  The data in them does not matter for image reconstruction,
+       * so we fill them with values that will encode to the smallest amount of
+       * data, viz: all zeroes in the AC entries, DC entries equal to previous
+       * block's DC value.  (Thanks to Thomas Kinsman for this idea.)
+       */
+      blkn = 0;
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width :
+                                                  compptr->last_col_width;
+        xpos = MCU_col_num * compptr->MCU_sample_width;
+        ypos = yoffset * DCTSIZE; /* ypos == (yoffset+yindex) * DCTSIZE */
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yoffset + yindex < compptr->last_row_height) {
+            (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
+                                          input_buf[compptr->component_index],
+                                          coef->MCU_buffer[blkn],
+                                          ypos, xpos, (JDIMENSION)blockcnt);
+            if (blockcnt < compptr->MCU_width) {
+              /* Create some dummy blocks at the right edge of the image. */
+              jzero_far((void *)coef->MCU_buffer[blkn + blockcnt],
+                        (compptr->MCU_width - blockcnt) * sizeof(JBLOCK));
+              for (bi = blockcnt; bi < compptr->MCU_width; bi++) {
+                coef->MCU_buffer[blkn + bi][0][0] =
+                  coef->MCU_buffer[blkn + bi - 1][0][0];
+              }
+            }
+          } else {
+            /* Create a row of dummy blocks at the bottom of the image. */
+            jzero_far((void *)coef->MCU_buffer[blkn],
+                      compptr->MCU_width * sizeof(JBLOCK));
+            for (bi = 0; bi < compptr->MCU_width; bi++) {
+              coef->MCU_buffer[blkn + bi][0][0] =
+                coef->MCU_buffer[blkn - 1][0][0];
+            }
+          }
+          blkn += compptr->MCU_width;
+          ypos += DCTSIZE;
+        }
+      }
+      /* Try to write the MCU.  In event of a suspension failure, we will
+       * re-DCT the MCU on restart (a bit inefficient, could be fixed...)
+       */
+      if (!(*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+
+/*
+ * Process some data in the first pass of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the image.
+ * This amount of data is read from the source buffer, DCT'd and quantized,
+ * and saved into the virtual arrays.  We also generate suitable dummy blocks
+ * as needed at the right and lower edges.  (The dummy blocks are constructed
+ * in the virtual arrays, which have been padded appropriately.)  This makes
+ * it possible for subsequent passes not to worry about real vs. dummy blocks.
+ *
+ * We must also emit the data to the entropy encoder.  This is conveniently
+ * done by calling compress_output() after we've loaded the current strip
+ * of the virtual arrays.
+ *
+ * NB: input_buf contains a plane for each component in image.  All
+ * components are DCT'd and loaded into the virtual arrays in this pass.
+ * However, it may be that only a subset of the components are emitted to
+ * the entropy encoder during this first pass; be careful about looking
+ * at the scan-dependent variables (MCU dimensions, etc).
+ */
+
+METHODDEF(boolean)
+compress_first_pass(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION blocks_across, MCUs_across, MCUindex;
+  int bi, ci, h_samp_factor, block_row, block_rows, ndummy;
+  JCOEF lastDC;
+  jpeg_component_info *compptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW thisblockrow, lastblockrow;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef->whole_image[ci],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (coef->iMCU_row_num < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here, since may not be set! */
+      block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    blocks_across = compptr->width_in_blocks;
+    h_samp_factor = compptr->h_samp_factor;
+    /* Count number of dummy blocks to be added at the right margin. */
+    ndummy = (int)(blocks_across % h_samp_factor);
+    if (ndummy > 0)
+      ndummy = h_samp_factor - ndummy;
+    /* Perform DCT for all non-dummy blocks in this iMCU row.  Each call
+     * on forward_DCT processes a complete horizontal row of DCT blocks.
+     */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      thisblockrow = buffer[block_row];
+      (*cinfo->fdct->_forward_DCT) (cinfo, compptr,
+                                    input_buf[ci], thisblockrow,
+                                    (JDIMENSION)(block_row * DCTSIZE),
+                                    (JDIMENSION)0, blocks_across);
+      if (ndummy > 0) {
+        /* Create dummy blocks at the right edge of the image. */
+        thisblockrow += blocks_across; /* => first dummy block */
+        jzero_far((void *)thisblockrow, ndummy * sizeof(JBLOCK));
+        lastDC = thisblockrow[-1][0];
+        for (bi = 0; bi < ndummy; bi++) {
+          thisblockrow[bi][0] = lastDC;
+        }
+      }
+    }
+    /* If at end of image, create dummy block rows as needed.
+     * The tricky part here is that within each MCU, we want the DC values
+     * of the dummy blocks to match the last real block's DC value.
+     * This squeezes a few more bytes out of the resulting file...
+     */
+    if (coef->iMCU_row_num == last_iMCU_row) {
+      blocks_across += ndummy;  /* include lower right corner */
+      MCUs_across = blocks_across / h_samp_factor;
+      for (block_row = block_rows; block_row < compptr->v_samp_factor;
+           block_row++) {
+        thisblockrow = buffer[block_row];
+        lastblockrow = buffer[block_row - 1];
+        jzero_far((void *)thisblockrow,
+                  (size_t)(blocks_across * sizeof(JBLOCK)));
+        for (MCUindex = 0; MCUindex < MCUs_across; MCUindex++) {
+          lastDC = lastblockrow[h_samp_factor - 1][0];
+          for (bi = 0; bi < h_samp_factor; bi++) {
+            thisblockrow[bi][0] = lastDC;
+          }
+          thisblockrow += h_samp_factor; /* advance to next MCU in row */
+          lastblockrow += h_samp_factor;
+        }
+      }
+    }
+  }
+  /* NB: compress_output will increment iMCU_row_num if successful.
+   * A suspension return will result in redoing all the work above next time.
+   */
+
+  /* Emit data to the entropy encoder, sharing code with subsequent passes */
+  return compress_output(cinfo, input_buf);
+}
+
+
+/*
+ * Process some data in subsequent passes of a multi-pass case.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output(j_compress_ptr cinfo, _JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan.
+   * NB: during first pass, this is safe only because the buffers will
+   * already be aligned properly, so jmemmgr.c won't need to do any I/O.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+         MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex + yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
+      }
+      /* Try to write the MCU. */
+      if (!(*cinfo->entropy->encode_mcu) (cinfo, coef->MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+#endif /* FULL_COEF_BUFFER_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_c_coef_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *)coef;
+  coef->pub.start_pass = start_pass_coef;
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef FULL_COEF_BUFFER_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    int ci;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+         (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                               (long)compptr->h_samp_factor),
+         (JDIMENSION)jround_up((long)compptr->height_in_blocks,
+                               (long)compptr->v_samp_factor),
+         (JDIMENSION)compptr->v_samp_factor);
+    }
+#else
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
+    for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->whole_image[0] = NULL; /* flag for no virtual arrays */
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jccolext.c b/thirdparty/libjpeg-turbo/src/jccolext.c
new file mode 100644
index 00000000000..8eba36c4dff
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jccolext.c
@@ -0,0 +1,152 @@
+/*
+ * jccolext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2012, 2015, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+
+/* This file is included by jccolor.c */
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ *
+ * Note that we change from the application's interleaved-pixel format
+ * to our internal noninterleaved, one-plane-per-component format.
+ * The input buffer is therefore three times as wide as the output buffer.
+ *
+ * A starting row offset is provided only for the output buffer.  The caller
+ * can easily adjust the passed input_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+rgb_ycc_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                         _JSAMPIMAGE output_buf, JDIMENSION output_row,
+                         int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int r, g, b;
+  register JLONG *ctab = cconvert->rgb_ycc_tab;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                 ctab[b + B_Y_OFF]) >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
+                                 ctab[b + B_CB_OFF]) >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
+                                 ctab[b + B_CR_OFF]) >> SCALEBITS);
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles RGB->grayscale conversion, which is the same
+ * as the RGB->Y portion of RGB->YCbCr.
+ * We assume rgb_ycc_start has been called (we only use the Y tables).
+ */
+
+INLINE
+LOCAL(void)
+rgb_gray_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                          _JSAMPIMAGE output_buf, JDIMENSION output_row,
+                          int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int r, g, b;
+  register JLONG *ctab = cconvert->rgb_ycc_tab;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = RANGE_LIMIT(inptr[RGB_RED]);
+      g = RANGE_LIMIT(inptr[RGB_GREEN]);
+      b = RANGE_LIMIT(inptr[RGB_BLUE]);
+      inptr += RGB_PIXELSIZE;
+      /* Y */
+      outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                ctab[b + B_Y_OFF]) >> SCALEBITS);
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles extended RGB->plain RGB conversion
+ */
+
+INLINE
+LOCAL(void)
+rgb_rgb_convert_internal(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                         _JSAMPIMAGE output_buf, JDIMENSION output_row,
+                         int num_rows)
+{
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr0[col] = inptr[RGB_RED];
+      outptr1[col] = inptr[RGB_GREEN];
+      outptr2[col] = inptr[RGB_BLUE];
+      inptr += RGB_PIXELSIZE;
+    }
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jccolor.c b/thirdparty/libjpeg-turbo/src/jccolor.c
new file mode 100644
index 00000000000..c19f6ff4e34
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jccolor.c
@@ -0,0 +1,755 @@
+/*
+ * jccolor.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2009-2012, 2015, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains input colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsimd.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_converter pub; /* public fields */
+
+#if BITS_IN_JSAMPLE != 16
+  /* Private state for RGB->YCC conversion */
+  JLONG *rgb_ycc_tab;           /* => table for RGB to YCbCr conversion */
+#endif
+} my_color_converter;
+
+typedef my_color_converter *my_cconvert_ptr;
+
+
+/**************** RGB -> YCbCr conversion: most common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *      Y  =  0.29900 * R + 0.58700 * G + 0.11400 * B
+ *      Cb = -0.16874 * R - 0.33126 * G + 0.50000 * B  + _CENTERJSAMPLE
+ *      Cr =  0.50000 * R - 0.41869 * G - 0.08131 * B  + _CENTERJSAMPLE
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ * Note: older versions of the IJG code used a zero offset of _MAXJSAMPLE/2,
+ * rather than _CENTERJSAMPLE, for Cb and Cr.  This gave equal positive and
+ * negative swings for Cb/Cr, but meant that grayscale values (Cb=Cr=0)
+ * were not represented exactly.  Now we sacrifice exact representation of
+ * maximum red and maximum blue in order to get exact grayscales.
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times R,G,B for all possible values.
+ * For 8-bit samples this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The _CENTERJSAMPLE offsets and the rounding fudge-factor of 0.5 are included
+ * in the tables to save adding them separately in the inner loop.
+ */
+
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define CBCR_OFFSET     ((JLONG)_CENTERJSAMPLE << SCALEBITS)
+#define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
+#define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
+
+/* We allocate one big table and divide it up into eight parts, instead of
+ * doing eight alloc_small requests.  This lets us use a single table base
+ * address, which can be held in a register in the inner loops on many
+ * machines (more than can hold all eight addresses, anyway).
+ */
+
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
+#define B_Y_OFF         (2 * (_MAXJSAMPLE + 1)) /* etc. */
+#define R_CB_OFF        (3 * (_MAXJSAMPLE + 1))
+#define G_CB_OFF        (4 * (_MAXJSAMPLE + 1))
+#define B_CB_OFF        (5 * (_MAXJSAMPLE + 1))
+#define R_CR_OFF        B_CB_OFF                /* B=>Cb, R=>Cr are the same */
+#define G_CR_OFF        (6 * (_MAXJSAMPLE + 1))
+#define B_CR_OFF        (7 * (_MAXJSAMPLE + 1))
+#define TABLE_SIZE      (8 * (_MAXJSAMPLE + 1))
+
+/* 12-bit samples use a 16-bit data type, so it is possible to pass
+ * out-of-range sample values (< 0 or > 4095) to jpeg_write_scanlines().
+ * Thus, we mask the incoming 12-bit samples to guard against overrunning
+ * or underrunning the conversion tables.
+ */
+
+#if BITS_IN_JSAMPLE == 12
+#define RANGE_LIMIT(value)  ((value) & 0xFFF)
+#else
+#define RANGE_LIMIT(value)  (value)
+#endif
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED  EXT_RGB_RED
+#define RGB_GREEN  EXT_RGB_GREEN
+#define RGB_BLUE  EXT_RGB_BLUE
+#define RGB_PIXELSIZE  EXT_RGB_PIXELSIZE
+#define rgb_ycc_convert_internal  extrgb_ycc_convert_internal
+#define rgb_gray_convert_internal  extrgb_gray_convert_internal
+#define rgb_rgb_convert_internal  extrgb_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_RGBX_RED
+#define RGB_GREEN  EXT_RGBX_GREEN
+#define RGB_BLUE  EXT_RGBX_BLUE
+#define RGB_PIXELSIZE  EXT_RGBX_PIXELSIZE
+#define rgb_ycc_convert_internal  extrgbx_ycc_convert_internal
+#define rgb_gray_convert_internal  extrgbx_gray_convert_internal
+#define rgb_rgb_convert_internal  extrgbx_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_BGR_RED
+#define RGB_GREEN  EXT_BGR_GREEN
+#define RGB_BLUE  EXT_BGR_BLUE
+#define RGB_PIXELSIZE  EXT_BGR_PIXELSIZE
+#define rgb_ycc_convert_internal  extbgr_ycc_convert_internal
+#define rgb_gray_convert_internal  extbgr_gray_convert_internal
+#define rgb_rgb_convert_internal  extbgr_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_BGRX_RED
+#define RGB_GREEN  EXT_BGRX_GREEN
+#define RGB_BLUE  EXT_BGRX_BLUE
+#define RGB_PIXELSIZE  EXT_BGRX_PIXELSIZE
+#define rgb_ycc_convert_internal  extbgrx_ycc_convert_internal
+#define rgb_gray_convert_internal  extbgrx_gray_convert_internal
+#define rgb_rgb_convert_internal  extbgrx_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_XBGR_RED
+#define RGB_GREEN  EXT_XBGR_GREEN
+#define RGB_BLUE  EXT_XBGR_BLUE
+#define RGB_PIXELSIZE  EXT_XBGR_PIXELSIZE
+#define rgb_ycc_convert_internal  extxbgr_ycc_convert_internal
+#define rgb_gray_convert_internal  extxbgr_gray_convert_internal
+#define rgb_rgb_convert_internal  extxbgr_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_XRGB_RED
+#define RGB_GREEN  EXT_XRGB_GREEN
+#define RGB_BLUE  EXT_XRGB_BLUE
+#define RGB_PIXELSIZE  EXT_XRGB_PIXELSIZE
+#define rgb_ycc_convert_internal  extxrgb_ycc_convert_internal
+#define rgb_gray_convert_internal  extxrgb_gray_convert_internal
+#define rgb_rgb_convert_internal  extxrgb_rgb_convert_internal
+#include "jccolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef rgb_ycc_convert_internal
+#undef rgb_gray_convert_internal
+#undef rgb_rgb_convert_internal
+
+
+/*
+ * Initialize for RGB->YCC colorspace conversion.
+ */
+
+METHODDEF(void)
+rgb_ycc_start(j_compress_ptr cinfo)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  JLONG *rgb_ycc_tab;
+  JLONG i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_ycc_tab = rgb_ycc_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (TABLE_SIZE * sizeof(JLONG)));
+
+  for (i = 0; i <= _MAXJSAMPLE; i++) {
+    rgb_ycc_tab[i + R_Y_OFF] = FIX(0.29900) * i;
+    rgb_ycc_tab[i + G_Y_OFF] = FIX(0.58700) * i;
+    rgb_ycc_tab[i + B_Y_OFF] = FIX(0.11400) * i   + ONE_HALF;
+    rgb_ycc_tab[i + R_CB_OFF] = (-FIX(0.16874)) * i;
+    rgb_ycc_tab[i + G_CB_OFF] = (-FIX(0.33126)) * i;
+    /* We use a rounding fudge-factor of 0.5-epsilon for Cb and Cr.
+     * This ensures that the maximum output will round to _MAXJSAMPLE
+     * not _MAXJSAMPLE+1, and thus that we don't have to range-limit.
+     */
+    rgb_ycc_tab[i + B_CB_OFF] = FIX(0.50000) * i  + CBCR_OFFSET + ONE_HALF - 1;
+/*  B=>Cb and R=>Cr tables are the same
+    rgb_ycc_tab[i + R_CR_OFF] = FIX(0.50000) * i  + CBCR_OFFSET + ONE_HALF - 1;
+*/
+    rgb_ycc_tab[i + G_CR_OFF] = (-FIX(0.41869)) * i;
+    rgb_ycc_tab[i + B_CR_OFF] = (-FIX(0.08131)) * i;
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ */
+
+METHODDEF(void)
+rgb_ycc_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_EXT_RGB:
+    extrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    extrgbx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGR:
+    extbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    extbgrx_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    extxbgr_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    extxrgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  default:
+    rgb_ycc_convert_internal(cinfo, input_buf, output_buf, output_row,
+                             num_rows);
+    break;
+  }
+}
+
+
+/**************** Cases other than RGB -> YCbCr **************/
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ */
+
+METHODDEF(void)
+rgb_gray_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                 _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_EXT_RGB:
+    extrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    extrgbx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+    break;
+  case JCS_EXT_BGR:
+    extbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    extbgrx_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    extxbgr_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    extxrgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                  num_rows);
+    break;
+  default:
+    rgb_gray_convert_internal(cinfo, input_buf, output_buf, output_row,
+                              num_rows);
+    break;
+  }
+}
+
+
+/*
+ * Extended RGB to plain RGB conversion
+ */
+
+METHODDEF(void)
+rgb_rgb_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_EXT_RGB:
+    extrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    extrgbx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGR:
+    extbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    extbgrx_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    extxbgr_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    extxrgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                                 num_rows);
+    break;
+  default:
+    rgb_rgb_convert_internal(cinfo, input_buf, output_buf, output_row,
+                             num_rows);
+    break;
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles Adobe-style CMYK->YCCK conversion,
+ * where we convert R=1-C, G=1-M, and B=1-Y to YCbCr using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume rgb_ycc_start has been called.
+ */
+
+METHODDEF(void)
+cmyk_ycck_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                  _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int r, g, b;
+  register JLONG *ctab = cconvert->rgb_ycc_tab;
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr0 = output_buf[0][output_row];
+    outptr1 = output_buf[1][output_row];
+    outptr2 = output_buf[2][output_row];
+    outptr3 = output_buf[3][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      r = _MAXJSAMPLE - RANGE_LIMIT(inptr[0]);
+      g = _MAXJSAMPLE - RANGE_LIMIT(inptr[1]);
+      b = _MAXJSAMPLE - RANGE_LIMIT(inptr[2]);
+      /* K passes through as-is */
+      outptr3[col] = inptr[3];
+      inptr += 4;
+      /* If the inputs are 0.._MAXJSAMPLE, the outputs of these equations
+       * must be too; we do not need an explicit range-limiting operation.
+       * Hence the value being shifted is never negative, and we don't
+       * need the general RIGHT_SHIFT macro.
+       */
+      /* Y */
+      outptr0[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                 ctab[b + B_Y_OFF]) >> SCALEBITS);
+      /* Cb */
+      outptr1[col] = (_JSAMPLE)((ctab[r + R_CB_OFF] + ctab[g + G_CB_OFF] +
+                                 ctab[b + B_CB_OFF]) >> SCALEBITS);
+      /* Cr */
+      outptr2[col] = (_JSAMPLE)((ctab[r + R_CR_OFF] + ctab[g + G_CR_OFF] +
+                                 ctab[b + B_CR_OFF]) >> SCALEBITS);
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles grayscale output with no conversion.
+ * The source can be either plain grayscale or YCbCr (since Y == gray).
+ */
+
+METHODDEF(void)
+grayscale_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                  _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->image_width;
+  int instride = cinfo->input_components;
+
+  while (--num_rows >= 0) {
+    inptr = *input_buf++;
+    outptr = output_buf[0][output_row];
+    output_row++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[col] = inptr[0];
+      inptr += instride;
+    }
+  }
+}
+
+
+/*
+ * Convert some rows of samples to the JPEG colorspace.
+ * This version handles multi-component colorspaces without conversion.
+ * We assume input_components == num_components.
+ */
+
+METHODDEF(void)
+null_convert(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+             _JSAMPIMAGE output_buf, JDIMENSION output_row, int num_rows)
+{
+  register _JSAMPROW inptr;
+  register _JSAMPROW outptr, outptr0, outptr1, outptr2, outptr3;
+  register JDIMENSION col;
+  register int ci;
+  int nc = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->image_width;
+
+  if (nc == 3) {
+    while (--num_rows >= 0) {
+      inptr = *input_buf++;
+      outptr0 = output_buf[0][output_row];
+      outptr1 = output_buf[1][output_row];
+      outptr2 = output_buf[2][output_row];
+      output_row++;
+      for (col = 0; col < num_cols; col++) {
+        outptr0[col] = *inptr++;
+        outptr1[col] = *inptr++;
+        outptr2[col] = *inptr++;
+      }
+    }
+  } else if (nc == 4) {
+    while (--num_rows >= 0) {
+      inptr = *input_buf++;
+      outptr0 = output_buf[0][output_row];
+      outptr1 = output_buf[1][output_row];
+      outptr2 = output_buf[2][output_row];
+      outptr3 = output_buf[3][output_row];
+      output_row++;
+      for (col = 0; col < num_cols; col++) {
+        outptr0[col] = *inptr++;
+        outptr1[col] = *inptr++;
+        outptr2[col] = *inptr++;
+        outptr3[col] = *inptr++;
+      }
+    }
+  } else {
+    while (--num_rows >= 0) {
+      /* It seems fastest to make a separate pass for each component. */
+      for (ci = 0; ci < nc; ci++) {
+        inptr = *input_buf;
+        outptr = output_buf[ci][output_row];
+        for (col = 0; col < num_cols; col++) {
+          outptr[col] = inptr[ci];
+          inptr += nc;
+        }
+      }
+      input_buf++;
+      output_row++;
+    }
+  }
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+null_method(j_compress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for input colorspace conversion.
+ */
+
+GLOBAL(void)
+_jinit_color_converter(j_compress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_color_converter));
+  cinfo->cconvert = (struct jpeg_color_converter *)cconvert;
+  /* set start_pass to null method until we find out differently */
+  cconvert->pub.start_pass = null_method;
+
+  /* Make sure input_components agrees with in_color_space */
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->input_components != 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+    if (cinfo->input_components != rgb_pixelsize[cinfo->in_color_space])
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_YCbCr:
+    if (cinfo->input_components != 3)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->input_components != 4)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+
+  default:                      /* JCS_UNKNOWN can be anything */
+    if (cinfo->input_components < 1)
+      ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+    break;
+  }
+
+  /* Check num_components, set conversion method based on requested space.
+   * NOTE: We do not allow any lossy color conversion algorithms in lossless
+   * mode.
+   */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_GRAYSCALE)
+      cconvert->pub._color_convert = grayscale_convert;
+    else if (IsExtRGB(cinfo->in_color_space)) {
+#ifdef WITH_SIMD
+      if (jsimd_can_rgb_gray())
+        cconvert->pub._color_convert = jsimd_rgb_gray_convert;
+      else
+#endif
+      {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub._color_convert = rgb_gray_convert;
+      }
+    } else if (cinfo->in_color_space == JCS_YCbCr)
+      cconvert->pub._color_convert = grayscale_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless && !IsExtRGB(cinfo->in_color_space))
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (rgb_red[cinfo->in_color_space] == 0 &&
+        rgb_green[cinfo->in_color_space] == 1 &&
+        rgb_blue[cinfo->in_color_space] == 2 &&
+        rgb_pixelsize[cinfo->in_color_space] == 3) {
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub._color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub._color_convert = null_convert;
+    } else if (IsExtRGB(cinfo->in_color_space))
+      cconvert->pub._color_convert = rgb_rgb_convert;
+    else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCbCr:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (IsExtRGB(cinfo->in_color_space)) {
+#ifdef WITH_SIMD
+      if (jsimd_can_rgb_ycc())
+        cconvert->pub._color_convert = jsimd_rgb_ycc_convert;
+      else
+#endif
+      {
+        cconvert->pub.start_pass = rgb_ycc_start;
+        cconvert->pub._color_convert = rgb_ycc_convert;
+      }
+    } else if (cinfo->in_color_space == JCS_YCbCr) {
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub._color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub._color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_CMYK:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub._color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub._color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_YCCK:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->in_color_space != cinfo->jpeg_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    if (cinfo->in_color_space == JCS_CMYK) {
+      cconvert->pub.start_pass = rgb_ycc_start;
+      cconvert->pub._color_convert = cmyk_ycck_convert;
+    } else if (cinfo->in_color_space == JCS_YCCK) {
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_c_can_null_convert())
+        cconvert->pub._color_convert = jsimd_c_null_convert;
+      else
+#endif
+        cconvert->pub._color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:                      /* allow null conversion of JCS_UNKNOWN */
+    if (cinfo->jpeg_color_space != cinfo->in_color_space ||
+        cinfo->num_components != cinfo->input_components)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#if defined(WITH_SIMD) && defined(__mips__)
+    if (jsimd_c_can_null_convert())
+      cconvert->pub._color_convert = jsimd_c_null_convert;
+    else
+#endif
+      cconvert->pub._color_convert = null_convert;
+    break;
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jcdctmgr.c b/thirdparty/libjpeg-turbo/src/jcdctmgr.c
new file mode 100644
index 00000000000..dbdbad6a28e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcdctmgr.c
@@ -0,0 +1,748 @@
+/*
+ * jcdctmgr.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2011, 2014-2015, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the forward-DCT management logic.
+ * This code selects a particular DCT implementation to be used,
+ * and it performs related housekeeping chores including coefficient
+ * quantization.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+#include "jsimddct.h"
+
+
+/* Private subobject for this module */
+
+typedef void (*forward_DCT_method_ptr) (DCTELEM *data);
+typedef void (*float_DCT_method_ptr) (FAST_FLOAT *data);
+
+typedef void (*convsamp_method_ptr) (_JSAMPARRAY sample_data,
+                                     JDIMENSION start_col,
+                                     DCTELEM *workspace);
+typedef void (*float_convsamp_method_ptr) (_JSAMPARRAY sample_data,
+                                           JDIMENSION start_col,
+                                           FAST_FLOAT *workspace);
+
+typedef void (*quantize_method_ptr) (JCOEFPTR coef_block, DCTELEM *divisors,
+                                     DCTELEM *workspace);
+typedef void (*float_quantize_method_ptr) (JCOEFPTR coef_block,
+                                           FAST_FLOAT *divisors,
+                                           FAST_FLOAT *workspace);
+
+METHODDEF(void) quantize(JCOEFPTR, DCTELEM *, DCTELEM *);
+
+typedef struct {
+  struct jpeg_forward_dct pub;  /* public fields */
+
+  /* Pointer to the DCT routine actually in use */
+  forward_DCT_method_ptr dct;
+  convsamp_method_ptr convsamp;
+  quantize_method_ptr quantize;
+
+  /* The actual post-DCT divisors --- not identical to the quant table
+   * entries, because of scaling (especially for an unnormalized DCT).
+   * Each table is given in normal array order.
+   */
+  DCTELEM *divisors[NUM_QUANT_TBLS];
+
+  /* work area for FDCT subroutine */
+  DCTELEM *workspace;
+
+#ifdef DCT_FLOAT_SUPPORTED
+  /* Same as above for the floating-point case. */
+  float_DCT_method_ptr float_dct;
+  float_convsamp_method_ptr float_convsamp;
+  float_quantize_method_ptr float_quantize;
+  FAST_FLOAT *float_divisors[NUM_QUANT_TBLS];
+  FAST_FLOAT *float_workspace;
+#endif
+} my_fdct_controller;
+
+typedef my_fdct_controller *my_fdct_ptr;
+
+
+#if BITS_IN_JSAMPLE == 8
+
+/*
+ * Find the highest bit in an integer through binary search.
+ */
+
+LOCAL(int)
+flss(UINT16 val)
+{
+  int bit;
+
+  bit = 16;
+
+  if (!val)
+    return 0;
+
+  if (!(val & 0xff00)) {
+    bit -= 8;
+    val <<= 8;
+  }
+  if (!(val & 0xf000)) {
+    bit -= 4;
+    val <<= 4;
+  }
+  if (!(val & 0xc000)) {
+    bit -= 2;
+    val <<= 2;
+  }
+  if (!(val & 0x8000)) {
+    bit -= 1;
+    val <<= 1;
+  }
+
+  return bit;
+}
+
+
+/*
+ * Compute values to do a division using reciprocal.
+ *
+ * This implementation is based on an algorithm described in
+ *   "Optimizing subroutines in assembly language:
+ *   An optimization guide for x86 platforms" (https://agner.org/optimize).
+ * More information about the basic algorithm can be found in
+ * the paper "Integer Division Using Reciprocals" by Robert Alverson.
+ *
+ * The basic idea is to replace x/d by x * d^-1. In order to store
+ * d^-1 with enough precision we shift it left a few places. It turns
+ * out that this algoright gives just enough precision, and also fits
+ * into DCTELEM:
+ *
+ *   b = (the number of significant bits in divisor) - 1
+ *   r = (word size) + b
+ *   f = 2^r / divisor
+ *
+ * f will not be an integer for most cases, so we need to compensate
+ * for the rounding error introduced:
+ *
+ *   no fractional part:
+ *
+ *       result = input >> r
+ *
+ *   fractional part of f < 0.5:
+ *
+ *       round f down to nearest integer
+ *       result = ((input + 1) * f) >> r
+ *
+ *   fractional part of f > 0.5:
+ *
+ *       round f up to nearest integer
+ *       result = (input * f) >> r
+ *
+ * This is the original algorithm that gives truncated results. But we
+ * want properly rounded results, so we replace "input" with
+ * "input + divisor/2".
+ *
+ * In order to allow SIMD implementations we also tweak the values to
+ * allow the same calculation to be made at all times:
+ *
+ *   dctbl[0] = f rounded to nearest integer
+ *   dctbl[1] = divisor / 2 (+ 1 if fractional part of f < 0.5)
+ *   dctbl[2] = 1 << ((word size) * 2 - r)
+ *   dctbl[3] = r - (word size)
+ *
+ * dctbl[2] is for stupid instruction sets where the shift operation
+ * isn't member wise (e.g. MMX).
+ *
+ * The reason dctbl[2] and dctbl[3] reduce the shift with (word size)
+ * is that most SIMD implementations have a "multiply and store top
+ * half" operation.
+ *
+ * Lastly, we store each of the values in their own table instead
+ * of in a consecutive manner, yet again in order to allow SIMD
+ * routines.
+ */
+
+LOCAL(int)
+compute_reciprocal(UINT16 divisor, DCTELEM *dtbl)
+{
+  UDCTELEM2 fq, fr;
+  UDCTELEM c;
+  int b, r;
+
+  if (divisor == 1) {
+    /* divisor == 1 means unquantized, so these reciprocal/correction/shift
+     * values will cause the C quantization algorithm to act like the
+     * identity function.  Since only the C quantization algorithm is used in
+     * these cases, the scale value is irrelevant.
+     */
+    dtbl[DCTSIZE2 * 0] = (DCTELEM)1;                        /* reciprocal */
+    dtbl[DCTSIZE2 * 1] = (DCTELEM)0;                        /* correction */
+    dtbl[DCTSIZE2 * 2] = (DCTELEM)1;                        /* scale */
+    dtbl[DCTSIZE2 * 3] = -(DCTELEM)(sizeof(DCTELEM) * 8);   /* shift */
+    return 0;
+  }
+
+  b = flss(divisor) - 1;
+  r  = sizeof(DCTELEM) * 8 + b;
+
+  fq = ((UDCTELEM2)1 << r) / divisor;
+  fr = ((UDCTELEM2)1 << r) % divisor;
+
+  c = divisor / 2;                      /* for rounding */
+
+  if (fr == 0) {                        /* divisor is power of two */
+    /* fq will be one bit too large to fit in DCTELEM, so adjust */
+    fq >>= 1;
+    r--;
+  } else if (fr <= (divisor / 2U)) {    /* fractional part is < 0.5 */
+    c++;
+  } else {                              /* fractional part is > 0.5 */
+    fq++;
+  }
+
+  dtbl[DCTSIZE2 * 0] = (DCTELEM)fq;     /* reciprocal */
+  dtbl[DCTSIZE2 * 1] = (DCTELEM)c;      /* correction + roundfactor */
+#ifdef WITH_SIMD
+  dtbl[DCTSIZE2 * 2] = (DCTELEM)(1 << (sizeof(DCTELEM) * 8 * 2 - r)); /* scale */
+#else
+  dtbl[DCTSIZE2 * 2] = 1;
+#endif
+  dtbl[DCTSIZE2 * 3] = (DCTELEM)r - sizeof(DCTELEM) * 8; /* shift */
+
+  if (r <= 16) return 0;
+  else return 1;
+}
+
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ * Verify that all referenced Q-tables are present, and set up
+ * the divisor table for each one.
+ * In the current implementation, DCT of all components is done during
+ * the first pass, even if only some components will be output in the
+ * first scan.  Hence all components should be examined here.
+ */
+
+METHODDEF(void)
+start_pass_fdctmgr(j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
+  int ci, qtblno, i;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtbl;
+  DCTELEM *dtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    qtblno = compptr->quant_tbl_no;
+    /* Make sure specified quantization table is present */
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    qtbl = cinfo->quant_tbl_ptrs[qtblno];
+    /* Compute divisors for this quant table */
+    /* We may do this more than once for same table, but it's not a big deal */
+    switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+    case JDCT_ISLOW:
+      /* For LL&M IDCT method, divisors are equal to raw quantization
+       * coefficients multiplied by 8 (to counteract scaling).
+       */
+      if (fdct->divisors[qtblno] == NULL) {
+        fdct->divisors[qtblno] = (DCTELEM *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      (DCTSIZE2 * 4) * sizeof(DCTELEM));
+      }
+      dtbl = fdct->divisors[qtblno];
+      for (i = 0; i < DCTSIZE2; i++) {
+#if BITS_IN_JSAMPLE == 8
+#ifdef WITH_SIMD
+        if (!compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]) &&
+            fdct->quantize == jsimd_quantize)
+          fdct->quantize = quantize;
+#else
+        compute_reciprocal(qtbl->quantval[i] << 3, &dtbl[i]);
+#endif
+#else
+        dtbl[i] = ((DCTELEM)qtbl->quantval[i]) << 3;
+#endif
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+        /* For AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         */
+#define CONST_BITS  14
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
+
+        if (fdct->divisors[qtblno] == NULL) {
+          fdct->divisors[qtblno] = (DCTELEM *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                        (DCTSIZE2 * 4) * sizeof(DCTELEM));
+        }
+        dtbl = fdct->divisors[qtblno];
+        for (i = 0; i < DCTSIZE2; i++) {
+#if BITS_IN_JSAMPLE == 8
+#ifdef WITH_SIMD
+          if (!compute_reciprocal(
+                DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
+                                      (JLONG)aanscales[i]),
+                        CONST_BITS - 3), &dtbl[i]) &&
+              fdct->quantize == jsimd_quantize)
+            fdct->quantize = quantize;
+#else
+          compute_reciprocal(
+            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
+                                  (JLONG)aanscales[i]),
+                    CONST_BITS-3), &dtbl[i]);
+#endif
+#else
+          dtbl[i] = (DCTELEM)
+            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
+                                  (JLONG)aanscales[i]),
+                    CONST_BITS - 3);
+#endif
+        }
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+        /* For float AA&N IDCT method, divisors are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * We apply a further scale factor of 8.
+         * What's actually stored is 1/divisor so that the inner loop can
+         * use a multiplication rather than a division.
+         */
+        FAST_FLOAT *fdtbl;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
+
+        if (fdct->float_divisors[qtblno] == NULL) {
+          fdct->float_divisors[qtblno] = (FAST_FLOAT *)
+            (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                        DCTSIZE2 * sizeof(FAST_FLOAT));
+        }
+        fdtbl = fdct->float_divisors[qtblno];
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fdtbl[i] = (FAST_FLOAT)
+              (1.0 / (((double)qtbl->quantval[i] *
+                       aanscalefactor[row] * aanscalefactor[col] * 8.0)));
+            i++;
+          }
+        }
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Load data into workspace, applying unsigned->signed conversion.
+ */
+
+METHODDEF(void)
+convsamp(_JSAMPARRAY sample_data, JDIMENSION start_col, DCTELEM *workspace)
+{
+  register DCTELEM *workspaceptr;
+  register _JSAMPROW elemptr;
+  register int elemr;
+
+  workspaceptr = workspace;
+  for (elemr = 0; elemr < DCTSIZE; elemr++) {
+    elemptr = sample_data[elemr] + start_col;
+
+#if DCTSIZE == 8                /* unroll the inner loop */
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+#else
+    {
+      register int elemc;
+      for (elemc = DCTSIZE; elemc > 0; elemc--)
+        *workspaceptr++ = (*elemptr++) - _CENTERJSAMPLE;
+    }
+#endif
+  }
+}
+
+
+/*
+ * Quantize/descale the coefficients, and store into coef_blocks[].
+ */
+
+METHODDEF(void)
+quantize(JCOEFPTR coef_block, DCTELEM *divisors, DCTELEM *workspace)
+{
+  int i;
+  DCTELEM temp;
+  JCOEFPTR output_ptr = coef_block;
+
+#if BITS_IN_JSAMPLE == 8
+
+  UDCTELEM recip, corr;
+  int shift;
+  UDCTELEM2 product;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = workspace[i];
+    recip = divisors[i + DCTSIZE2 * 0];
+    corr =  divisors[i + DCTSIZE2 * 1];
+    shift = divisors[i + DCTSIZE2 * 3];
+
+    if (temp < 0) {
+      temp = -temp;
+      product = (UDCTELEM2)(temp + corr) * recip;
+      product >>= shift + sizeof(DCTELEM) * 8;
+      temp = (DCTELEM)product;
+      temp = -temp;
+    } else {
+      product = (UDCTELEM2)(temp + corr) * recip;
+      product >>= shift + sizeof(DCTELEM) * 8;
+      temp = (DCTELEM)product;
+    }
+    output_ptr[i] = (JCOEF)temp;
+  }
+
+#else
+
+  register DCTELEM qval;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    qval = divisors[i];
+    temp = workspace[i];
+    /* Divide the coefficient value by qval, ensuring proper rounding.
+     * Since C does not specify the direction of rounding for negative
+     * quotients, we have to force the dividend positive for portability.
+     *
+     * In most files, at least half of the output values will be zero
+     * (at default quantization settings, more like three-quarters...)
+     * so we should ensure that this case is fast.  On many machines,
+     * a comparison is enough cheaper than a divide to make a special test
+     * a win.  Since both inputs will be nonnegative, we need only test
+     * for a < b to discover whether a/b is 0.
+     * If your machine's division is fast enough, define FAST_DIVIDE.
+     */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a, b)  a /= b
+#else
+#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
+#endif
+    if (temp < 0) {
+      temp = -temp;
+      temp += qval >> 1;        /* for rounding */
+      DIVIDE_BY(temp, qval);
+      temp = -temp;
+    } else {
+      temp += qval >> 1;        /* for rounding */
+      DIVIDE_BY(temp, qval);
+    }
+    output_ptr[i] = (JCOEF)temp;
+  }
+
+#endif
+
+}
+
+
+/*
+ * Perform forward DCT on one or more blocks of a component.
+ *
+ * The input samples are taken from the sample_data[] array starting at
+ * position start_row/start_col, and moving to the right for any additional
+ * blocks. The quantized coefficients are returned in coef_blocks[].
+ */
+
+METHODDEF(void)
+forward_DCT(j_compress_ptr cinfo, jpeg_component_info *compptr,
+            _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+            JDIMENSION start_row, JDIMENSION start_col, JDIMENSION num_blocks)
+/* This version is used for integer DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
+  DCTELEM *divisors = fdct->divisors[compptr->quant_tbl_no];
+  DCTELEM *workspace;
+  JDIMENSION bi;
+
+  /* Make sure the compiler doesn't look up these every pass */
+  forward_DCT_method_ptr do_dct = fdct->dct;
+  convsamp_method_ptr do_convsamp = fdct->convsamp;
+  quantize_method_ptr do_quantize = fdct->quantize;
+  workspace = fdct->workspace;
+
+  sample_data += start_row;     /* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    (*do_convsamp) (sample_data, start_col, workspace);
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    (*do_quantize) (coef_blocks[bi], divisors, workspace);
+  }
+}
+
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+METHODDEF(void)
+convsamp_float(_JSAMPARRAY sample_data, JDIMENSION start_col,
+               FAST_FLOAT *workspace)
+{
+  register FAST_FLOAT *workspaceptr;
+  register _JSAMPROW elemptr;
+  register int elemr;
+
+  workspaceptr = workspace;
+  for (elemr = 0; elemr < DCTSIZE; elemr++) {
+    elemptr = sample_data[elemr] + start_col;
+#if DCTSIZE == 8                /* unroll the inner loop */
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+#else
+    {
+      register int elemc;
+      for (elemc = DCTSIZE; elemc > 0; elemc--)
+        *workspaceptr++ = (FAST_FLOAT)((*elemptr++) - _CENTERJSAMPLE);
+    }
+#endif
+  }
+}
+
+
+METHODDEF(void)
+quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
+               FAST_FLOAT *workspace)
+{
+  register FAST_FLOAT temp;
+  register int i;
+  register JCOEFPTR output_ptr = coef_block;
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    /* Apply the quantization and scaling factor */
+    temp = workspace[i] * divisors[i];
+
+    /* Round to nearest integer.
+     * Since C does not specify the direction of rounding for negative
+     * quotients, we have to force the dividend positive for portability.
+     * The maximum coefficient size is +-16K (for 12-bit data), so this
+     * code should work for either 16-bit or 32-bit ints.
+     */
+    output_ptr[i] = (JCOEF)((int)(temp + (FAST_FLOAT)16384.5) - 16384);
+  }
+}
+
+
+METHODDEF(void)
+forward_DCT_float(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                  _JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                  JDIMENSION start_row, JDIMENSION start_col,
+                  JDIMENSION num_blocks)
+/* This version is used for floating-point DCT implementations. */
+{
+  /* This routine is heavily used, so it's worth coding it tightly. */
+  my_fdct_ptr fdct = (my_fdct_ptr)cinfo->fdct;
+  FAST_FLOAT *divisors = fdct->float_divisors[compptr->quant_tbl_no];
+  FAST_FLOAT *workspace;
+  JDIMENSION bi;
+
+
+  /* Make sure the compiler doesn't look up these every pass */
+  float_DCT_method_ptr do_dct = fdct->float_dct;
+  float_convsamp_method_ptr do_convsamp = fdct->float_convsamp;
+  float_quantize_method_ptr do_quantize = fdct->float_quantize;
+  workspace = fdct->float_workspace;
+
+  sample_data += start_row;     /* fold in the vertical offset once */
+
+  for (bi = 0; bi < num_blocks; bi++, start_col += DCTSIZE) {
+    /* Load data into workspace, applying unsigned->signed conversion */
+    (*do_convsamp) (sample_data, start_col, workspace);
+
+    /* Perform the DCT */
+    (*do_dct) (workspace);
+
+    /* Quantize/descale the coefficients, and store into coef_blocks[] */
+    (*do_quantize) (coef_blocks[bi], divisors, workspace);
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
+
+
+/*
+ * Initialize FDCT manager.
+ */
+
+GLOBAL(void)
+_jinit_forward_dct(j_compress_ptr cinfo)
+{
+  my_fdct_ptr fdct;
+  int i;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  fdct = (my_fdct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_fdct_controller));
+  cinfo->fdct = (struct jpeg_forward_dct *)fdct;
+  fdct->pub.start_pass = start_pass_fdctmgr;
+
+  /* First determine the DCT... */
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+    fdct->pub._forward_DCT = forward_DCT;
+#ifdef WITH_SIMD
+    if (jsimd_can_fdct_islow())
+      fdct->dct = jsimd_fdct_islow;
+    else
+#endif
+      fdct->dct = _jpeg_fdct_islow;
+    break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+    fdct->pub._forward_DCT = forward_DCT;
+#ifdef WITH_SIMD
+    if (jsimd_can_fdct_ifast())
+      fdct->dct = jsimd_fdct_ifast;
+    else
+#endif
+      fdct->dct = _jpeg_fdct_ifast;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+    fdct->pub._forward_DCT = forward_DCT_float;
+#ifdef WITH_SIMD
+    if (jsimd_can_fdct_float())
+      fdct->float_dct = jsimd_fdct_float;
+    else
+#endif
+      fdct->float_dct = jpeg_fdct_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* ...then the supporting stages. */
+  switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+  case JDCT_ISLOW:
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+  case JDCT_IFAST:
+#endif
+#if defined(DCT_ISLOW_SUPPORTED) || defined(DCT_IFAST_SUPPORTED)
+#ifdef WITH_SIMD
+    if (jsimd_can_convsamp())
+      fdct->convsamp = jsimd_convsamp;
+    else
+#endif
+      fdct->convsamp = convsamp;
+#ifdef WITH_SIMD
+    if (jsimd_can_quantize())
+      fdct->quantize = jsimd_quantize;
+    else
+#endif
+      fdct->quantize = quantize;
+    break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  case JDCT_FLOAT:
+#ifdef WITH_SIMD
+    if (jsimd_can_convsamp_float())
+      fdct->float_convsamp = jsimd_convsamp_float;
+    else
+#endif
+      fdct->float_convsamp = convsamp_float;
+#ifdef WITH_SIMD
+    if (jsimd_can_quantize_float())
+      fdct->float_quantize = jsimd_quantize_float;
+    else
+#endif
+      fdct->float_quantize = quantize_float;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+
+  /* Allocate workspace memory */
+#ifdef DCT_FLOAT_SUPPORTED
+  if (cinfo->dct_method == JDCT_FLOAT)
+    fdct->float_workspace = (FAST_FLOAT *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(FAST_FLOAT) * DCTSIZE2);
+  else
+#endif
+    fdct->workspace = (DCTELEM *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(DCTELEM) * DCTSIZE2);
+
+  /* Mark divisor tables unallocated */
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    fdct->divisors[i] = NULL;
+#ifdef DCT_FLOAT_SUPPORTED
+    fdct->float_divisors[i] = NULL;
+#endif
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jchuff.c b/thirdparty/libjpeg-turbo/src/jchuff.c
new file mode 100644
index 00000000000..8cdd5bd35dc
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jchuff.c
@@ -0,0 +1,1175 @@
+/*
+ * jchuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2014-2016, 2018-2024, D. R. Commander.
+ * Copyright (C) 2015, Matthieu Darbois.
+ * Copyright (C) 2018, Matthias Räncker.
+ * Copyright (C) 2020, Arm Limited.
+ * Copyright (C) 2022, Felix Hanau.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy encoding routines.
+ *
+ * Much of the complexity here has to do with supporting output suspension.
+ * If the data destination module demands suspension, we want to be able to
+ * back up to the start of the current MCU.  To do this, we copy state
+ * variables into local working storage, and update them back to the
+ * permanent JPEG objects only upon successful completion of an MCU.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#ifdef WITH_SIMD
+#include "jsimd.h"
+#else
+#include "jchuff.h"             /* Declarations shared with jc*huff.c */
+#endif
+#include <limits.h>
+#include "jpeg_nbits.h"
+
+
+/* Expanded entropy encoder object for Huffman encoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+#if defined(__x86_64__) && defined(__ILP32__)
+typedef unsigned long long bit_buf_type;
+#else
+typedef size_t bit_buf_type;
+#endif
+
+/* NOTE: The more optimal Huffman encoding algorithm is only used by the
+ * intrinsics implementation of the Arm Neon SIMD extensions, which is why we
+ * retain the old Huffman encoder behavior when using the GAS implementation.
+ */
+#if defined(WITH_SIMD) && !(defined(__arm__) || defined(__aarch64__) || \
+                            defined(_M_ARM) || defined(_M_ARM64))
+typedef unsigned long long simd_bit_buf_type;
+#else
+typedef bit_buf_type simd_bit_buf_type;
+#endif
+
+#if (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 8) || defined(_WIN64) || \
+    (defined(__x86_64__) && defined(__ILP32__))
+#define BIT_BUF_SIZE  64
+#elif (defined(SIZEOF_SIZE_T) && SIZEOF_SIZE_T == 4) || defined(_WIN32)
+#define BIT_BUF_SIZE  32
+#else
+#error Cannot determine word size
+#endif
+#define SIMD_BIT_BUF_SIZE  (sizeof(simd_bit_buf_type) * 8)
+
+typedef struct {
+  union {
+    bit_buf_type c;
+#ifdef WITH_SIMD
+    simd_bit_buf_type simd;
+#endif
+  } put_buffer;                         /* current bit accumulation buffer */
+  int free_bits;                        /* # of bits available in it */
+                                        /* (Neon GAS: # of bits now in it) */
+  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
+} savable_state;
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  savable_state saved;          /* Bit buffer & DC state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  c_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
+  c_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];
+
+#ifdef ENTROPY_OPT_SUPPORTED    /* Statistics tables for optimization */
+  long *dc_count_ptrs[NUM_HUFF_TBLS];
+  long *ac_count_ptrs[NUM_HUFF_TBLS];
+#endif
+
+#ifdef WITH_SIMD
+  int simd;
+#endif
+} huff_entropy_encoder;
+
+typedef huff_entropy_encoder *huff_entropy_ptr;
+
+/* Working state while writing an MCU.
+ * This struct contains all the fields that are needed by subroutines.
+ */
+
+typedef struct {
+  JOCTET *next_output_byte;     /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  savable_state cur;            /* Current bit buffer & DC state */
+  j_compress_ptr cinfo;         /* dump_buffer needs access to this */
+#ifdef WITH_SIMD
+  int simd;
+#endif
+} working_state;
+
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_huff(j_compress_ptr cinfo);
+#ifdef ENTROPY_OPT_SUPPORTED
+METHODDEF(boolean) encode_mcu_gather(j_compress_ptr cinfo,
+                                     JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_gather(j_compress_ptr cinfo);
+#endif
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ * If gather_statistics is TRUE, we do not output anything during the scan,
+ * just count the Huffman symbols used and generate Huffman code tables.
+ */
+
+METHODDEF(void)
+start_pass_huff(j_compress_ptr cinfo, boolean gather_statistics)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info *compptr;
+
+  if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->pub.encode_mcu = encode_mcu_gather;
+    entropy->pub.finish_pass = finish_pass_gather;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    entropy->pub.encode_mcu = encode_mcu_huff;
+    entropy->pub.finish_pass = finish_pass_huff;
+  }
+
+#ifdef WITH_SIMD
+  entropy->simd = jsimd_can_huff_encode_one_block();
+#endif
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (gather_statistics) {
+#ifdef ENTROPY_OPT_SUPPORTED
+      /* Check for invalid table indexes */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (dctbl < 0 || dctbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, dctbl);
+      if (actbl < 0 || actbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, actbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->dc_count_ptrs[dctbl] == NULL)
+        entropy->dc_count_ptrs[dctbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      memset(entropy->dc_count_ptrs[dctbl], 0, 257 * sizeof(long));
+      if (entropy->ac_count_ptrs[actbl] == NULL)
+        entropy->ac_count_ptrs[actbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      memset(entropy->ac_count_ptrs[actbl], 0, 257 * sizeof(long));
+#endif
+    } else {
+      /* Compute derived values for Huffman tables */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, TRUE, dctbl,
+                              &entropy->dc_derived_tbls[dctbl]);
+      jpeg_make_c_derived_tbl(cinfo, FALSE, actbl,
+                              &entropy->ac_derived_tbls[actbl]);
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bit buffer to empty */
+#ifdef WITH_SIMD
+  if (entropy->simd) {
+    entropy->saved.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    entropy->saved.free_bits = 0;
+#else
+    entropy->saved.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+  } else
+#endif
+  {
+    entropy->saved.put_buffer.c = 0;
+    entropy->saved.free_bits = BIT_BUF_SIZE;
+  }
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jcphuff.c and jclhuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC, int tblno,
+                        c_derived_tbl **pdtbl)
+{
+  JHUFF_TBL *htbl;
+  c_derived_tbl *dtbl;
+  int p, i, l, lastp, si, maxsymbol;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (c_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(c_derived_tbl));
+  dtbl = *pdtbl;
+
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int)htbl->bits[l];
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char)l;
+  }
+  huffsize[p] = 0;
+  lastp = p;
+
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int)huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((JLONG)code) >= (((JLONG)1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure C.3: generate encoding tables */
+  /* These are code and size indexed by symbol value */
+
+  /* Set all codeless symbols to have code length 0;
+   * this lets us detect duplicate VAL entries here, and later
+   * allows emit_bits to detect any attempt to emit such symbols.
+   */
+  memset(dtbl->ehufco, 0, sizeof(dtbl->ehufco));
+  memset(dtbl->ehufsi, 0, sizeof(dtbl->ehufsi));
+
+  /* This is also a convenient place to check for out-of-range and duplicated
+   * VAL entries.  We allow 0..255 for AC symbols but only 0..15 for DC in
+   * lossy mode and 0..16 for DC in lossless mode.  (We could constrain them
+   * further based on data depth and mode, but this seems enough.)
+   */
+  maxsymbol = isDC ? (cinfo->master->lossless ? 16 : 15) : 255;
+
+  for (p = 0; p < lastp; p++) {
+    i = htbl->huffval[p];
+    if (i < 0 || i > maxsymbol || dtbl->ehufsi[i])
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    dtbl->ehufco[i] = huffcode[p];
+    dtbl->ehufsi[i] = huffsize[p];
+  }
+}
+
+
+/* Outputting bytes to the file */
+
+/* Emit a byte, taking 'action' if must suspend. */
+#define emit_byte(state, val, action) { \
+  *(state)->next_output_byte++ = (JOCTET)(val); \
+  if (--(state)->free_in_buffer == 0) \
+    if (!dump_buffer(state)) \
+      { action; } \
+}
+
+
+LOCAL(boolean)
+dump_buffer(working_state *state)
+/* Empty the output buffer; return TRUE if successful, FALSE if must suspend */
+{
+  struct jpeg_destination_mgr *dest = state->cinfo->dest;
+
+  if (!(*dest->empty_output_buffer) (state->cinfo))
+    return FALSE;
+  /* After a successful buffer dump, must reset buffer pointers */
+  state->next_output_byte = dest->next_output_byte;
+  state->free_in_buffer = dest->free_in_buffer;
+  return TRUE;
+}
+
+
+/* Outputting bits to the file */
+
+/* Output byte b and, speculatively, an additional 0 byte.  0xFF must be
+ * encoded as 0xFF 0x00, so the output buffer pointer is advanced by 2 if the
+ * byte is 0xFF.  Otherwise, the output buffer pointer is advanced by 1, and
+ * the speculative 0 byte will be overwritten by the next byte.
+ */
+#define EMIT_BYTE(b) { \
+  buffer[0] = (JOCTET)(b); \
+  buffer[1] = 0; \
+  buffer -= -2 + ((JOCTET)(b) < 0xFF); \
+}
+
+/* Output the entire bit buffer.  If there are no 0xFF bytes in it, then write
+ * directly to the output buffer.  Otherwise, use the EMIT_BYTE() macro to
+ * encode 0xFF as 0xFF 0x00.
+ */
+#if BIT_BUF_SIZE == 64
+
+#define FLUSH() { \
+  if (put_buffer & 0x8080808080808080 & ~(put_buffer + 0x0101010101010101)) { \
+    EMIT_BYTE(put_buffer >> 56) \
+    EMIT_BYTE(put_buffer >> 48) \
+    EMIT_BYTE(put_buffer >> 40) \
+    EMIT_BYTE(put_buffer >> 32) \
+    EMIT_BYTE(put_buffer >> 24) \
+    EMIT_BYTE(put_buffer >> 16) \
+    EMIT_BYTE(put_buffer >>  8) \
+    EMIT_BYTE(put_buffer      ) \
+  } else { \
+    buffer[0] = (JOCTET)(put_buffer >> 56); \
+    buffer[1] = (JOCTET)(put_buffer >> 48); \
+    buffer[2] = (JOCTET)(put_buffer >> 40); \
+    buffer[3] = (JOCTET)(put_buffer >> 32); \
+    buffer[4] = (JOCTET)(put_buffer >> 24); \
+    buffer[5] = (JOCTET)(put_buffer >> 16); \
+    buffer[6] = (JOCTET)(put_buffer >> 8); \
+    buffer[7] = (JOCTET)(put_buffer); \
+    buffer += 8; \
+  } \
+}
+
+#else
+
+#define FLUSH() { \
+  if (put_buffer & 0x80808080 & ~(put_buffer + 0x01010101)) { \
+    EMIT_BYTE(put_buffer >> 24) \
+    EMIT_BYTE(put_buffer >> 16) \
+    EMIT_BYTE(put_buffer >>  8) \
+    EMIT_BYTE(put_buffer      ) \
+  } else { \
+    buffer[0] = (JOCTET)(put_buffer >> 24); \
+    buffer[1] = (JOCTET)(put_buffer >> 16); \
+    buffer[2] = (JOCTET)(put_buffer >> 8); \
+    buffer[3] = (JOCTET)(put_buffer); \
+    buffer += 4; \
+  } \
+}
+
+#endif
+
+/* Fill the bit buffer to capacity with the leading bits from code, then output
+ * the bit buffer and put the remaining bits from code into the bit buffer.
+ */
+#define PUT_AND_FLUSH(code, size) { \
+  put_buffer = (put_buffer << (size + free_bits)) | (code >> -free_bits); \
+  FLUSH() \
+  free_bits += BIT_BUF_SIZE; \
+  put_buffer = code; \
+}
+
+/* Insert code into the bit buffer and output the bit buffer if needed.
+ * NOTE: We can't flush with free_bits == 0, since the left shift in
+ * PUT_AND_FLUSH() would have undefined behavior.
+ */
+#define PUT_BITS(code, size) { \
+  free_bits -= size; \
+  if (free_bits < 0) \
+    PUT_AND_FLUSH(code, size) \
+  else \
+    put_buffer = (put_buffer << size) | code; \
+}
+
+#define PUT_CODE(code, size) { \
+  temp &= (((JLONG)1) << nbits) - 1; \
+  temp |= code << nbits; \
+  nbits += size; \
+  PUT_BITS(temp, nbits) \
+}
+
+
+/* Although it is exceedingly rare, it is possible for a Huffman-encoded
+ * coefficient block to be larger than the 128-byte unencoded block.  For each
+ * of the 64 coefficients, PUT_BITS is invoked twice, and each invocation can
+ * theoretically store 16 bits (for a maximum of 2048 bits or 256 bytes per
+ * encoded block.)  If, for instance, one artificially sets the AC
+ * coefficients to alternating values of 32767 and -32768 (using the JPEG
+ * scanning order-- 1, 8, 16, etc.), then this will produce an encoded block
+ * larger than 200 bytes.
+ */
+#define BUFSIZE  (DCTSIZE2 * 8)
+
+#define LOAD_BUFFER() { \
+  if (state->free_in_buffer < BUFSIZE) { \
+    localbuf = 1; \
+    buffer = _buffer; \
+  } else \
+    buffer = state->next_output_byte; \
+}
+
+#define STORE_BUFFER() { \
+  if (localbuf) { \
+    size_t bytes, bytestocopy; \
+    bytes = buffer - _buffer; \
+    buffer = _buffer; \
+    while (bytes > 0) { \
+      bytestocopy = MIN(bytes, state->free_in_buffer); \
+      memcpy(state->next_output_byte, buffer, bytestocopy); \
+      state->next_output_byte += bytestocopy; \
+      buffer += bytestocopy; \
+      state->free_in_buffer -= bytestocopy; \
+      if (state->free_in_buffer == 0) \
+        if (!dump_buffer(state)) return FALSE; \
+      bytes -= bytestocopy; \
+    } \
+  } else { \
+    state->free_in_buffer -= (buffer - state->next_output_byte); \
+    state->next_output_byte = buffer; \
+  } \
+}
+
+
+LOCAL(boolean)
+flush_bits(working_state *state)
+{
+  JOCTET _buffer[BUFSIZE], *buffer, temp;
+  simd_bit_buf_type put_buffer;  int put_bits;
+  int localbuf = 0;
+
+#ifdef WITH_SIMD
+  if (state->simd) {
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    put_bits = state->cur.free_bits;
+#else
+    put_bits = SIMD_BIT_BUF_SIZE - state->cur.free_bits;
+#endif
+    put_buffer = state->cur.put_buffer.simd;
+  } else
+#endif
+  {
+    put_bits = BIT_BUF_SIZE - state->cur.free_bits;
+    put_buffer = state->cur.put_buffer.c;
+  }
+
+  LOAD_BUFFER()
+
+  while (put_bits >= 8) {
+    put_bits -= 8;
+    temp = (JOCTET)(put_buffer >> put_bits);
+    EMIT_BYTE(temp)
+  }
+  if (put_bits) {
+    /* fill partial byte with ones */
+    temp = (JOCTET)((put_buffer << (8 - put_bits)) | (0xFF >> put_bits));
+    EMIT_BYTE(temp)
+  }
+
+#ifdef WITH_SIMD
+  if (state->simd) {                    /* and reset bit buffer to empty */
+    state->cur.put_buffer.simd = 0;
+#if defined(__aarch64__) && !defined(NEON_INTRINSICS)
+    state->cur.free_bits = 0;
+#else
+    state->cur.free_bits = SIMD_BIT_BUF_SIZE;
+#endif
+  } else
+#endif
+  {
+    state->cur.put_buffer.c = 0;
+    state->cur.free_bits = BIT_BUF_SIZE;
+  }
+  STORE_BUFFER()
+
+  return TRUE;
+}
+
+
+#ifdef WITH_SIMD
+
+/* Encode a single block's worth of coefficients */
+
+LOCAL(boolean)
+encode_one_block_simd(working_state *state, JCOEFPTR block, int last_dc_val,
+                      c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  JOCTET _buffer[BUFSIZE], *buffer;
+  int localbuf = 0;
+
+#ifdef ZERO_BUFFERS
+  memset(_buffer, 0, sizeof(_buffer));
+#endif
+
+  LOAD_BUFFER()
+
+  buffer = jsimd_huff_encode_one_block(state, buffer, block, last_dc_val,
+                                       dctbl, actbl);
+
+  STORE_BUFFER()
+
+  return TRUE;
+}
+
+#endif
+
+LOCAL(boolean)
+encode_one_block(working_state *state, JCOEFPTR block, int last_dc_val,
+                 c_derived_tbl *dctbl, c_derived_tbl *actbl)
+{
+  int temp, nbits, free_bits;
+  bit_buf_type put_buffer;
+  JOCTET _buffer[BUFSIZE], *buffer;
+  int localbuf = 0;
+  int max_coef_bits = state->cinfo->data_precision + 2;
+
+  free_bits = state->cur.free_bits;
+  put_buffer = state->cur.put_buffer.c;
+  LOAD_BUFFER()
+
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+
+  temp = block[0] - last_dc_val;
+
+  /* This is a well-known technique for obtaining the absolute value without a
+   * branch.  It is derived from an assembly language technique presented in
+   * "How to Optimize for the Pentium Processors", Copyright (c) 1996, 1997 by
+   * Agner Fog.  This code assumes we are on a two's complement machine.
+   */
+  nbits = temp >> (CHAR_BIT * sizeof(int) - 1);
+  temp += nbits;
+  nbits ^= temp;
+
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = JPEG_NBITS(nbits);
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > max_coef_bits + 1)
+    ERREXIT(state->cinfo, JERR_BAD_DCT_COEF);
+
+  /* Emit the Huffman-coded symbol for the number of bits.
+   * Emit that number of bits of the value, if positive,
+   * or the complement of its magnitude, if negative.
+   */
+  PUT_CODE(dctbl->ehufco[nbits], dctbl->ehufsi[nbits])
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+
+  {
+    int r = 0;                  /* r = run length of zeros */
+
+/* Manually unroll the k loop to eliminate the counter variable.  This
+ * improves performance greatly on systems with a limited number of
+ * registers (such as x86.)
+ */
+#define kloop(jpeg_natural_order_of_k) { \
+  if ((temp = block[jpeg_natural_order_of_k]) == 0) { \
+    r += 16; \
+  } else { \
+    /* Branch-less absolute value, bitwise complement, etc., same as above */ \
+    nbits = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp += nbits; \
+    nbits ^= temp; \
+    nbits = JPEG_NBITS_NONZERO(nbits); \
+    /* Check for out-of-range coefficient values */ \
+    if (nbits > max_coef_bits) \
+      ERREXIT(state->cinfo, JERR_BAD_DCT_COEF); \
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
+    while (r >= 16 * 16) { \
+      r -= 16 * 16; \
+      PUT_BITS(actbl->ehufco[0xf0], actbl->ehufsi[0xf0]) \
+    } \
+    /* Emit Huffman symbol for run length / number of bits */ \
+    r += nbits; \
+    PUT_CODE(actbl->ehufco[r], actbl->ehufsi[r]) \
+    r = 0; \
+  } \
+}
+
+    /* One iteration for each value in jpeg_natural_order[] */
+    kloop(1);   kloop(8);   kloop(16);  kloop(9);   kloop(2);   kloop(3);
+    kloop(10);  kloop(17);  kloop(24);  kloop(32);  kloop(25);  kloop(18);
+    kloop(11);  kloop(4);   kloop(5);   kloop(12);  kloop(19);  kloop(26);
+    kloop(33);  kloop(40);  kloop(48);  kloop(41);  kloop(34);  kloop(27);
+    kloop(20);  kloop(13);  kloop(6);   kloop(7);   kloop(14);  kloop(21);
+    kloop(28);  kloop(35);  kloop(42);  kloop(49);  kloop(56);  kloop(57);
+    kloop(50);  kloop(43);  kloop(36);  kloop(29);  kloop(22);  kloop(15);
+    kloop(23);  kloop(30);  kloop(37);  kloop(44);  kloop(51);  kloop(58);
+    kloop(59);  kloop(52);  kloop(45);  kloop(38);  kloop(31);  kloop(39);
+    kloop(46);  kloop(53);  kloop(60);  kloop(61);  kloop(54);  kloop(47);
+    kloop(55);  kloop(62);  kloop(63);
+
+    /* If the last coef(s) were zero, emit an end-of-block code */
+    if (r > 0) {
+      PUT_BITS(actbl->ehufco[0], actbl->ehufsi[0])
+    }
+  }
+
+  state->cur.put_buffer.c = put_buffer;
+  state->cur.free_bits = free_bits;
+  STORE_BUFFER()
+
+  return TRUE;
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(boolean)
+emit_restart(working_state *state, int restart_num)
+{
+  int ci;
+
+  if (!flush_bits(state))
+    return FALSE;
+
+  emit_byte(state, 0xFF, return FALSE);
+  emit_byte(state, JPEG_RST0 + restart_num, return FALSE);
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < state->cinfo->comps_in_scan; ci++)
+    state->cur.last_dc_val[ci] = 0;
+
+  /* The restart counter is not updated until we successfully write the MCU. */
+
+  return TRUE;
+}
+
+
+/*
+ * Encode and output one MCU's worth of Huffman-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+encode_mcu_huff(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  working_state state;
+  int blkn, ci;
+  jpeg_component_info *compptr;
+
+  /* Load up working state */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  state.cur = entropy->saved;
+  state.cinfo = cinfo;
+#ifdef WITH_SIMD
+  state.simd = entropy->simd;
+#endif
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!emit_restart(&state, entropy->next_restart_num))
+        return FALSE;
+  }
+
+  /* Encode the MCU data blocks */
+#ifdef WITH_SIMD
+  if (entropy->simd) {
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      if (!encode_one_block_simd(&state,
+                                 MCU_data[blkn][0], state.cur.last_dc_val[ci],
+                                 entropy->dc_derived_tbls[compptr->dc_tbl_no],
+                                 entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+        return FALSE;
+      /* Update last_dc_val */
+      state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+    }
+  } else
+#endif
+  {
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      if (!encode_one_block(&state,
+                            MCU_data[blkn][0], state.cur.last_dc_val[ci],
+                            entropy->dc_derived_tbls[compptr->dc_tbl_no],
+                            entropy->ac_derived_tbls[compptr->ac_tbl_no]))
+        return FALSE;
+      /* Update last_dc_val */
+      state.cur.last_dc_val[ci] = MCU_data[blkn][0][0];
+    }
+  }
+
+  /* Completed MCU, so update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  entropy->saved = state.cur;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+finish_pass_huff(j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  working_state state;
+
+  /* Load up working state ... flush_bits needs it */
+  state.next_output_byte = cinfo->dest->next_output_byte;
+  state.free_in_buffer = cinfo->dest->free_in_buffer;
+  state.cur = entropy->saved;
+  state.cinfo = cinfo;
+#ifdef WITH_SIMD
+  state.simd = entropy->simd;
+#endif
+
+  /* Flush out the last data */
+  if (!flush_bits(&state))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Update state */
+  cinfo->dest->next_output_byte = state.next_output_byte;
+  cinfo->dest->free_in_buffer = state.free_in_buffer;
+  entropy->saved = state.cur;
+}
+
+
+/*
+ * Huffman coding optimization.
+ *
+ * We first scan the supplied data and count the number of uses of each symbol
+ * that is to be Huffman-coded. (This process MUST agree with the code above.)
+ * Then we build a Huffman coding tree for the observed counts.
+ * Symbols which are not needed at all for the particular image are not
+ * assigned any code, which saves space in the DHT marker as well as in
+ * the compressed data.
+ */
+
+#ifdef ENTROPY_OPT_SUPPORTED
+
+
+/* Process a single block's worth of coefficients */
+
+LOCAL(void)
+htest_one_block(j_compress_ptr cinfo, JCOEFPTR block, int last_dc_val,
+                long dc_counts[], long ac_counts[])
+{
+  register int temp;
+  register int nbits;
+  register int k, r;
+  int max_coef_bits = cinfo->data_precision + 2;
+
+  /* Encode the DC coefficient difference per section F.1.2.1 */
+
+  temp = block[0] - last_dc_val;
+  if (temp < 0)
+    temp = -temp;
+
+  /* Find the number of bits needed for the magnitude of the coefficient */
+  nbits = 0;
+  while (temp) {
+    nbits++;
+    temp >>= 1;
+  }
+  /* Check for out-of-range coefficient values.
+   * Since we're encoding a difference, the range limit is twice as much.
+   */
+  if (nbits > max_coef_bits + 1)
+    ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+  /* Count the Huffman symbol for the number of bits */
+  dc_counts[nbits]++;
+
+  /* Encode the AC coefficients per section F.1.2.2 */
+
+  r = 0;                        /* r = run length of zeros */
+
+  for (k = 1; k < DCTSIZE2; k++) {
+    if ((temp = block[jpeg_natural_order[k]]) == 0) {
+      r++;
+    } else {
+      /* if run length > 15, must emit special run-length-16 codes (0xF0) */
+      while (r > 15) {
+        ac_counts[0xF0]++;
+        r -= 16;
+      }
+
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      if (temp < 0)
+        temp = -temp;
+
+      /* Find the number of bits needed for the magnitude of the coefficient */
+      nbits = 1;                /* there must be at least one 1 bit */
+      while ((temp >>= 1))
+        nbits++;
+      /* Check for out-of-range coefficient values */
+      if (nbits > max_coef_bits)
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+      /* Count Huffman symbol for run length / number of bits */
+      ac_counts[(r << 4) + nbits]++;
+
+      r = 0;
+    }
+  }
+
+  /* If the last coef(s) were zero, emit an end-of-block code */
+  if (r > 0)
+    ac_counts[0]++;
+}
+
+
+/*
+ * Trial-encode one MCU's worth of Huffman-compressed coefficients.
+ * No data is actually output, so no suspension return is possible.
+ */
+
+METHODDEF(boolean)
+encode_mcu_gather(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int blkn, ci;
+  jpeg_component_info *compptr;
+
+  /* Take care of restart intervals if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      /* Re-initialize DC predictions to 0 */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+        entropy->saved.last_dc_val[ci] = 0;
+      /* Update restart state */
+      entropy->restarts_to_go = cinfo->restart_interval;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    htest_one_block(cinfo, MCU_data[blkn][0], entropy->saved.last_dc_val[ci],
+                    entropy->dc_count_ptrs[compptr->dc_tbl_no],
+                    entropy->ac_count_ptrs[compptr->ac_tbl_no]);
+    entropy->saved.last_dc_val[ci] = MCU_data[blkn][0][0];
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Generate the best Huffman code table for the given counts, fill htbl.
+ * Note this is also used by jcphuff.c and jclhuff.c.
+ *
+ * The JPEG standard requires that no symbol be assigned a codeword of all
+ * one bits (so that padding bits added at the end of a compressed segment
+ * can't look like a valid code).  Because of the canonical ordering of
+ * codewords, this just means that there must be an unused slot in the
+ * longest codeword length category.  Annex K (Clause K.2) of
+ * Rec. ITU-T T.81 (1992) | ISO/IEC 10918-1:1994 suggests reserving such a slot
+ * by pretending that symbol 256 is a valid symbol with count 1.  In theory
+ * that's not optimal; giving it count zero but including it in the symbol set
+ * anyway should give a better Huffman code.  But the theoretically better code
+ * actually seems to come out worse in practice, because it produces more
+ * all-ones bytes (which incur stuffed zero bytes in the final file).  In any
+ * case the difference is tiny.
+ *
+ * The JPEG standard requires Huffman codes to be no more than 16 bits long.
+ * If some symbols have a very small but nonzero probability, the Huffman tree
+ * must be adjusted to meet the code length restriction.  We currently use
+ * the adjustment method suggested in JPEG section K.2.  This method is *not*
+ * optimal; it may not choose the best possible limited-length code.  But
+ * typically only very-low-frequency symbols will be given less-than-optimal
+ * lengths, so the code is almost optimal.  Experimental comparisons against
+ * an optimal limited-length-code algorithm indicate that the difference is
+ * microscopic --- usually less than a hundredth of a percent of total size.
+ * So the extra complexity of an optimal algorithm doesn't seem worthwhile.
+ */
+
+GLOBAL(void)
+jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl, long freq[])
+{
+#define MAX_CLEN  32            /* assumed maximum initial code length */
+  UINT8 bits[MAX_CLEN + 1];     /* bits[k] = # of symbols with code length k */
+  int bit_pos[MAX_CLEN + 1];    /* # of symbols with smaller code length */
+  int codesize[257];            /* codesize[k] = code length of symbol k */
+  int nz_index[257];            /* index of nonzero symbol in the original freq
+                                   array */
+  int others[257];              /* next symbol in current branch of tree */
+  int c1, c2;
+  int p, i, j;
+  int num_nz_symbols;
+  long v, v2;
+
+  /* This algorithm is explained in section K.2 of the JPEG standard */
+
+  memset(bits, 0, sizeof(bits));
+  memset(codesize, 0, sizeof(codesize));
+  for (i = 0; i < 257; i++)
+    others[i] = -1;             /* init links to empty */
+
+  freq[256] = 1;                /* make sure 256 has a nonzero count */
+  /* Including the pseudo-symbol 256 in the Huffman procedure guarantees
+   * that no real symbol is given code-value of all ones, because 256
+   * will be placed last in the largest codeword category.
+   */
+
+  /* Group nonzero frequencies together so we can more easily find the
+   * smallest.
+   */
+  num_nz_symbols = 0;
+  for (i = 0; i < 257; i++) {
+    if (freq[i]) {
+      nz_index[num_nz_symbols] = i;
+      freq[num_nz_symbols] = freq[i];
+      num_nz_symbols++;
+    }
+  }
+
+  /* Huffman's basic algorithm to assign optimal code lengths to symbols */
+
+  for (;;) {
+    /* Find the two smallest nonzero frequencies; set c1, c2 = their symbols */
+    /* In case of ties, take the larger symbol number.  Since we have grouped
+     * the nonzero symbols together, checking for zero symbols is not
+     * necessary.
+     */
+    c1 = -1;
+    c2 = -1;
+    v = 1000000000L;
+    v2 = 1000000000L;
+    for (i = 0; i < num_nz_symbols; i++) {
+      if (freq[i] <= v2) {
+        if (freq[i] <= v) {
+          c2 = c1;
+          v2 = v;
+          v = freq[i];
+          c1 = i;
+        } else {
+          v2 = freq[i];
+          c2 = i;
+        }
+      }
+    }
+
+    /* Done if we've merged everything into one frequency */
+    if (c2 < 0)
+      break;
+
+    /* Else merge the two counts/trees */
+    freq[c1] += freq[c2];
+    /* Set the frequency to a very high value instead of zero, so we don't have
+     * to check for zero values.
+     */
+    freq[c2] = 1000000001L;
+
+    /* Increment the codesize of everything in c1's tree branch */
+    codesize[c1]++;
+    while (others[c1] >= 0) {
+      c1 = others[c1];
+      codesize[c1]++;
+    }
+
+    others[c1] = c2;            /* chain c2 onto c1's tree branch */
+
+    /* Increment the codesize of everything in c2's tree branch */
+    codesize[c2]++;
+    while (others[c2] >= 0) {
+      c2 = others[c2];
+      codesize[c2]++;
+    }
+  }
+
+  /* Now count the number of symbols of each code length */
+  for (i = 0; i < num_nz_symbols; i++) {
+    /* The JPEG standard seems to think that this can't happen, */
+    /* but I'm paranoid... */
+    if (codesize[i] > MAX_CLEN)
+      ERREXIT(cinfo, JERR_HUFF_CLEN_OVERFLOW);
+
+    bits[codesize[i]]++;
+  }
+
+  /* Count the number of symbols with a length smaller than i bits, so we can
+   * construct the symbol table more efficiently.  Note that this includes the
+   * pseudo-symbol 256, but since it is the last symbol, it will not affect the
+   * table.
+   */
+  p = 0;
+  for (i = 1; i <= MAX_CLEN; i++) {
+    bit_pos[i] = p;
+    p += bits[i];
+  }
+
+  /* JPEG doesn't allow symbols with code lengths over 16 bits, so if the pure
+   * Huffman procedure assigned any such lengths, we must adjust the coding.
+   * Here is what Rec. ITU-T T.81 | ISO/IEC 10918-1 says about how this next
+   * bit works: Since symbols are paired for the longest Huffman code, the
+   * symbols are removed from this length category two at a time.  The prefix
+   * for the pair (which is one bit shorter) is allocated to one of the pair;
+   * then, skipping the BITS entry for that prefix length, a code word from the
+   * next shortest nonzero BITS entry is converted into a prefix for two code
+   * words one bit longer.
+   */
+
+  for (i = MAX_CLEN; i > 16; i--) {
+    while (bits[i] > 0) {
+      j = i - 2;                /* find length of new prefix to be used */
+      while (bits[j] == 0)
+        j--;
+
+      bits[i] -= 2;             /* remove two symbols */
+      bits[i - 1]++;            /* one goes in this length */
+      bits[j + 1] += 2;         /* two new symbols in this length */
+      bits[j]--;                /* symbol of this length is now a prefix */
+    }
+  }
+
+  /* Remove the count for the pseudo-symbol 256 from the largest codelength */
+  while (bits[i] == 0)          /* find largest codelength still in use */
+    i--;
+  bits[i]--;
+
+  /* Return final symbol counts (only for lengths 0..16) */
+  memcpy(htbl->bits, bits, sizeof(htbl->bits));
+
+  /* Return a list of the symbols sorted by code length */
+  /* It's not real clear to me why we don't need to consider the codelength
+   * changes made above, but Rec. ITU-T T.81 | ISO/IEC 10918-1 seems to think
+   * this works.
+   */
+  for (i = 0; i < num_nz_symbols - 1; i++) {
+    htbl->huffval[bit_pos[codesize[i]]] = (UINT8)nz_index[i];
+    bit_pos[codesize[i]]++;
+  }
+
+  /* Set sent_table FALSE so updated table will be written to JPEG file. */
+  htbl->sent_table = FALSE;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather(j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int ci, dctbl, actbl;
+  jpeg_component_info *compptr;
+  JHUFF_TBL **htblptr;
+  boolean did_dc[NUM_HUFF_TBLS];
+  boolean did_ac[NUM_HUFF_TBLS];
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  memset(did_dc, 0, sizeof(did_dc));
+  memset(did_ac, 0, sizeof(did_ac));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    if (!did_dc[dctbl]) {
+      htblptr = &cinfo->dc_huff_tbl_ptrs[dctbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->dc_count_ptrs[dctbl]);
+      did_dc[dctbl] = TRUE;
+    }
+    if (!did_ac[actbl]) {
+      htblptr = &cinfo->ac_huff_tbl_ptrs[actbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->ac_count_ptrs[actbl]);
+      did_ac[actbl] = TRUE;
+    }
+  }
+}
+
+
+#endif /* ENTROPY_OPT_SUPPORTED */
+
+
+/*
+ * Module initialization routine for Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_huff_encoder(j_compress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(huff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
+  entropy->pub.start_pass = start_pass_huff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+#ifdef ENTROPY_OPT_SUPPORTED
+    entropy->dc_count_ptrs[i] = entropy->ac_count_ptrs[i] = NULL;
+#endif
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jchuff.h b/thirdparty/libjpeg-turbo/src/jchuff.h
new file mode 100644
index 00000000000..21f17b89b09
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jchuff.h
@@ -0,0 +1,44 @@
+/*
+ * jchuff.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains declarations for Huffman entropy encoding routines
+ * that are shared between the sequential encoder (jchuff.c) and the
+ * progressive encoder (jcphuff.c).  No other modules need to see these.
+ */
+
+/* The legal range of a DCT coefficient is
+ *  -1024 .. +1023  for 8-bit data;
+ * -16384 .. +16383 for 12-bit data.
+ * Hence the magnitude should always fit in 10 or 14 bits respectively.
+ */
+
+/* The progressive Huffman encoder uses an unsigned 16-bit data type to store
+ * absolute values of coefficients, because it is possible to inject a
+ * coefficient value of -32768 into the encoder by attempting to transform a
+ * malformed 12-bit JPEG image, and the absolute value of -32768 would overflow
+ * a signed 16-bit integer.
+ */
+typedef unsigned short UJCOEF;
+
+/* Derived data constructed for each Huffman table */
+
+typedef struct {
+  unsigned int ehufco[256];     /* code for each symbol */
+  char ehufsi[256];             /* length of code for each symbol */
+  /* If no code has been allocated for a symbol S, ehufsi[S] contains 0 */
+} c_derived_tbl;
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_c_derived_tbl(j_compress_ptr cinfo, boolean isDC,
+                                     int tblno, c_derived_tbl **pdtbl);
+
+/* Generate an optimal table definition given the specified counts */
+EXTERN(void) jpeg_gen_optimal_table(j_compress_ptr cinfo, JHUFF_TBL *htbl,
+                                    long freq[]);
diff --git a/thirdparty/libjpeg-turbo/src/jcicc.c b/thirdparty/libjpeg-turbo/src/jcicc.c
new file mode 100644
index 00000000000..11037ff6941
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcicc.c
@@ -0,0 +1,105 @@
+/*
+ * jcicc.c
+ *
+ * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman.
+ * Copyright (C) 2017, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides code to write International Color Consortium (ICC) device
+ * profiles embedded in JFIF JPEG image files.  The ICC has defined a standard
+ * for including such data in JPEG "APP2" markers.  The code given here does
+ * not know anything about the internal structure of the ICC profile data; it
+ * just knows how to embed the profile data in a JPEG file while writing it.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/*
+ * Since an ICC profile can be larger than the maximum size of a JPEG marker
+ * (64K), we need provisions to split it into multiple markers.  The format
+ * defined by the ICC specifies one or more APP2 markers containing the
+ * following data:
+ *      Identifying string      ASCII "ICC_PROFILE\0"  (12 bytes)
+ *      Marker sequence number  1 for first APP2, 2 for next, etc (1 byte)
+ *      Number of markers       Total number of APP2's used (1 byte)
+ *      Profile data            (remainder of APP2 data)
+ * Decoders should use the marker sequence numbers to reassemble the profile,
+ * rather than assuming that the APP2 markers appear in the correct sequence.
+ */
+
+#define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */
+#define MAX_BYTES_IN_MARKER  65533      /* maximum data len of a JPEG marker */
+#define MAX_DATA_BYTES_IN_MARKER  (MAX_BYTES_IN_MARKER - ICC_OVERHEAD_LEN)
+
+
+/*
+ * This routine writes the given ICC profile data into a JPEG file.  It *must*
+ * be called AFTER calling jpeg_start_compress() and BEFORE the first call to
+ * jpeg_write_scanlines().  (This ordering ensures that the APP2 marker(s) will
+ * appear after the SOI and JFIF or Adobe markers, but before all else.)
+ */
+
+GLOBAL(void)
+jpeg_write_icc_profile(j_compress_ptr cinfo, const JOCTET *icc_data_ptr,
+                       unsigned int icc_data_len)
+{
+  unsigned int num_markers;     /* total number of markers we'll write */
+  int cur_marker = 1;           /* per spec, counting starts at 1 */
+  unsigned int length;          /* number of bytes to write in this marker */
+
+  if (icc_data_ptr == NULL || icc_data_len == 0)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  if (cinfo->global_state < CSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Calculate the number of markers we'll need, rounding up of course */
+  num_markers = icc_data_len / MAX_DATA_BYTES_IN_MARKER;
+  if (num_markers * MAX_DATA_BYTES_IN_MARKER != icc_data_len)
+    num_markers++;
+
+  while (icc_data_len > 0) {
+    /* length of profile to put in this marker */
+    length = icc_data_len;
+    if (length > MAX_DATA_BYTES_IN_MARKER)
+      length = MAX_DATA_BYTES_IN_MARKER;
+    icc_data_len -= length;
+
+    /* Write the JPEG marker header (APP2 code and marker length) */
+    jpeg_write_m_header(cinfo, ICC_MARKER,
+                        (unsigned int)(length + ICC_OVERHEAD_LEN));
+
+    /* Write the marker identifying string "ICC_PROFILE" (null-terminated).  We
+     * code it in this less-than-transparent way so that the code works even if
+     * the local character set is not ASCII.
+     */
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x43);
+    jpeg_write_m_byte(cinfo, 0x5F);
+    jpeg_write_m_byte(cinfo, 0x50);
+    jpeg_write_m_byte(cinfo, 0x52);
+    jpeg_write_m_byte(cinfo, 0x4F);
+    jpeg_write_m_byte(cinfo, 0x46);
+    jpeg_write_m_byte(cinfo, 0x49);
+    jpeg_write_m_byte(cinfo, 0x4C);
+    jpeg_write_m_byte(cinfo, 0x45);
+    jpeg_write_m_byte(cinfo, 0x0);
+
+    /* Add the sequencing info */
+    jpeg_write_m_byte(cinfo, cur_marker);
+    jpeg_write_m_byte(cinfo, (int)num_markers);
+
+    /* Add the profile data */
+    while (length--) {
+      jpeg_write_m_byte(cinfo, *icc_data_ptr);
+      icc_data_ptr++;
+    }
+    cur_marker++;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jcinit.c b/thirdparty/libjpeg-turbo/src/jcinit.c
new file mode 100644
index 00000000000..09ff6b3e43e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcinit.c
@@ -0,0 +1,149 @@
+/*
+ * jcinit.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains initialization logic for the JPEG compressor.
+ * This routine is in charge of selecting the modules to be executed and
+ * making an initialization call to each one.
+ *
+ * Logically, this code belongs in jcmaster.c.  It's split out because
+ * linking this routine implies linking the entire compression library.
+ * For a transcoding-only application, we want to be able to use jcmaster.c
+ * without linking in the whole library.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+
+
+/*
+ * Master selection of compression modules.
+ * This is done once at the start of processing an image.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ */
+
+GLOBAL(void)
+jinit_compress_master(j_compress_ptr cinfo)
+{
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, FALSE /* full compression */);
+
+  /* Preprocessing */
+  if (!cinfo->raw_data_in) {
+    if (cinfo->data_precision <= 8) {
+      jinit_color_converter(cinfo);
+      jinit_downsampler(cinfo);
+      jinit_c_prep_controller(cinfo, FALSE /* never need full buffer here */);
+    } else if (cinfo->data_precision <= 12) {
+      j12init_color_converter(cinfo);
+      j12init_downsampler(cinfo);
+      j12init_c_prep_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+    } else {
+#ifdef C_LOSSLESS_SUPPORTED
+      j16init_color_converter(cinfo);
+      j16init_downsampler(cinfo);
+      j16init_c_prep_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+    }
+  }
+
+  if (cinfo->master->lossless) {
+#ifdef C_LOSSLESS_SUPPORTED
+    /* Prediction, sample differencing, and point transform */
+    if (cinfo->data_precision <= 8)
+      jinit_lossless_compressor(cinfo);
+    else if (cinfo->data_precision <= 12)
+      j12init_lossless_compressor(cinfo);
+    else
+      j16init_lossless_compressor(cinfo);
+    /* Entropy encoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+    } else {
+      jinit_lhuff_encoder(cinfo);
+    }
+
+    /* Need a full-image difference buffer in any multi-pass mode. */
+    if (cinfo->data_precision <= 8)
+      jinit_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                               cinfo->optimize_coding));
+    else if (cinfo->data_precision <= 12)
+      j12init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+    else
+      j16init_c_diff_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* Forward DCT */
+    if (cinfo->data_precision == 8)
+      jinit_forward_dct(cinfo);
+    else if (cinfo->data_precision == 12)
+      j12init_forward_dct(cinfo);
+    else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+    /* Entropy encoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+      jinit_arith_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+    } else {
+      if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+        jinit_phuff_encoder(cinfo);
+#else
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      } else
+        jinit_huff_encoder(cinfo);
+    }
+
+    /* Need a full-image coefficient buffer in any multi-pass mode. */
+    if (cinfo->data_precision == 12)
+      j12init_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                                 cinfo->optimize_coding));
+    else
+      jinit_c_coef_controller(cinfo, (boolean)(cinfo->num_scans > 1 ||
+                                               cinfo->optimize_coding));
+  }
+
+  if (cinfo->data_precision <= 8)
+    jinit_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+  else if (cinfo->data_precision <= 12)
+    j12init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+  else
+#ifdef C_LOSSLESS_SUPPORTED
+    j16init_c_main_controller(cinfo, FALSE /* never need full buffer here */);
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+
+  /* Write the datastream header (SOI) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
diff --git a/thirdparty/libjpeg-turbo/src/jcmainct.c b/thirdparty/libjpeg-turbo/src/jcmainct.c
new file mode 100644
index 00000000000..954e94017c9
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcmainct.c
@@ -0,0 +1,186 @@
+/*
+ * jcmainct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the main buffer controller for compression.
+ * The main buffer lies between the pre-processor and the JPEG
+ * compressor proper; it holds downsampled data in the JPEG colorspace.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_main_controller pub; /* public fields */
+
+  JDIMENSION cur_iMCU_row;      /* number of current iMCU row */
+  JDIMENSION rowgroup_ctr;      /* counts row groups received in iMCU row */
+  boolean suspended;            /* remember if we suspended output */
+  J_BUF_MODE pass_mode;         /* current operating mode */
+
+  /* If using just a strip buffer, this points to the entire set of buffers
+   * (we allocate one for each component).  In the full-image case, this
+   * points to the currently accessible strips of the virtual arrays.
+   */
+  _JSAMPARRAY buffer[MAX_COMPONENTS];
+} my_main_controller;
+
+typedef my_main_controller *my_main_ptr;
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main(j_compress_ptr cinfo,
+                                         _JSAMPARRAY input_buf,
+                                         JDIMENSION *in_row_ctr,
+                                         JDIMENSION in_rows_avail);
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+
+  /* Do nothing in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  main_ptr->cur_iMCU_row = 0;   /* initialize counters */
+  main_ptr->rowgroup_ctr = 0;
+  main_ptr->suspended = FALSE;
+  main_ptr->pass_mode = pass_mode;      /* save mode for use by process_data */
+  main_ptr->pub._process_data = process_data_simple_main;
+}
+
+
+/*
+ * Process some data.
+ * This routine handles the simple pass-through mode,
+ * where we have only a strip buffer.
+ */
+
+METHODDEF(void)
+process_data_simple_main(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                         JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  JDIMENSION data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  while (main_ptr->cur_iMCU_row < cinfo->total_iMCU_rows) {
+    /* Read input data if we haven't filled the main buffer yet */
+    if (main_ptr->rowgroup_ctr < data_unit)
+      (*cinfo->prep->_pre_process_data) (cinfo, input_buf, in_row_ctr,
+                                         in_rows_avail, main_ptr->buffer,
+                                         &main_ptr->rowgroup_ctr, data_unit);
+
+    /* If we don't have a full iMCU row buffered, return to application for
+     * more data.  Note that preprocessor will always pad to fill the iMCU row
+     * at the bottom of the image.
+     */
+    if (main_ptr->rowgroup_ctr != data_unit)
+      return;
+
+    /* Send the completed row to the compressor */
+    if (!(*cinfo->coef->_compress_data) (cinfo, main_ptr->buffer)) {
+      /* If compressor did not consume the whole row, then we must need to
+       * suspend processing and return to the application.  In this situation
+       * we pretend we didn't yet consume the last input row; otherwise, if
+       * it happened to be the last row of the image, the application would
+       * think we were done.
+       */
+      if (!main_ptr->suspended) {
+        (*in_row_ctr)--;
+        main_ptr->suspended = TRUE;
+      }
+      return;
+    }
+    /* We did finish the row.  Undo our little suspension hack if a previous
+     * call suspended; then mark the main buffer empty.
+     */
+    if (main_ptr->suspended) {
+      (*in_row_ctr)++;
+      main_ptr->suspended = FALSE;
+    }
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->cur_iMCU_row++;
+  }
+}
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_c_main_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main_ptr;
+  int ci;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  main_ptr = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_main_controller));
+  cinfo->main = (struct jpeg_c_main_controller *)main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;
+
+  /* We don't need to create a buffer in raw-data mode. */
+  if (cinfo->raw_data_in)
+    return;
+
+  /* Create the buffer.  It holds downsampled data, so each component
+   * may be of a different size.
+   */
+  if (need_full_buffer) {
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+  } else {
+    /* Allocate a strip buffer for each component */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+         compptr->width_in_blocks * data_unit,
+         (JDIMENSION)(compptr->v_samp_factor * data_unit));
+    }
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jcmarker.c b/thirdparty/libjpeg-turbo/src/jcmarker.c
new file mode 100644
index 00000000000..a064d4dd9e7
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcmarker.c
@@ -0,0 +1,670 @@
+/*
+ * jcmarker.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to write JPEG datastream markers.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+
+
+typedef enum {                  /* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_writer pub; /* public fields */
+
+  unsigned int last_restart_interval; /* last DRI value emitted; 0 after SOI */
+} my_marker_writer;
+
+typedef my_marker_writer *my_marker_ptr;
+
+
+/*
+ * Basic output routines.
+ *
+ * Note that we do not support suspension while writing a marker.
+ * Therefore, an application using suspension must ensure that there is
+ * enough buffer space for the initial markers (typ. 600-700 bytes) before
+ * calling jpeg_start_compress, and enough space to write the trailing EOI
+ * (a few bytes) before calling jpeg_finish_compress.  Multipass compression
+ * modes are not supported at all with suspension, so those two are the only
+ * points where markers will be written.
+ */
+
+LOCAL(void)
+emit_byte(j_compress_ptr cinfo, int val)
+/* Emit a byte */
+{
+  struct jpeg_destination_mgr *dest = cinfo->dest;
+
+  *(dest->next_output_byte)++ = (JOCTET)val;
+  if (--dest->free_in_buffer == 0) {
+    if (!(*dest->empty_output_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  }
+}
+
+
+LOCAL(void)
+emit_marker(j_compress_ptr cinfo, JPEG_MARKER mark)
+/* Emit a marker code */
+{
+  emit_byte(cinfo, 0xFF);
+  emit_byte(cinfo, (int)mark);
+}
+
+
+LOCAL(void)
+emit_2bytes(j_compress_ptr cinfo, int value)
+/* Emit a 2-byte integer; these are always MSB first in JPEG files */
+{
+  emit_byte(cinfo, (value >> 8) & 0xFF);
+  emit_byte(cinfo, value & 0xFF);
+}
+
+
+/*
+ * Routines to write specific marker types.
+ */
+
+LOCAL(int)
+emit_dqt(j_compress_ptr cinfo, int index)
+/* Emit a DQT marker */
+/* Returns the precision used (0 = 8bits, 1 = 16bits) for baseline checking */
+{
+  JQUANT_TBL *qtbl = cinfo->quant_tbl_ptrs[index];
+  int prec;
+  int i;
+
+  if (qtbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, index);
+
+  prec = 0;
+  for (i = 0; i < DCTSIZE2; i++) {
+    if (qtbl->quantval[i] > 255)
+      prec = 1;
+  }
+
+  if (!qtbl->sent_table) {
+    emit_marker(cinfo, M_DQT);
+
+    emit_2bytes(cinfo, prec ? DCTSIZE2 * 2 + 1 + 2 : DCTSIZE2 + 1 + 2);
+
+    emit_byte(cinfo, index + (prec << 4));
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      /* The table entries must be emitted in zigzag order. */
+      unsigned int qval = qtbl->quantval[jpeg_natural_order[i]];
+      if (prec)
+        emit_byte(cinfo, (int)(qval >> 8));
+      emit_byte(cinfo, (int)(qval & 0xFF));
+    }
+
+    qtbl->sent_table = TRUE;
+  }
+
+  return prec;
+}
+
+
+LOCAL(void)
+emit_dht(j_compress_ptr cinfo, int index, boolean is_ac)
+/* Emit a DHT marker */
+{
+  JHUFF_TBL *htbl;
+  int length, i;
+
+  if (is_ac) {
+    htbl = cinfo->ac_huff_tbl_ptrs[index];
+    index += 0x10;              /* output index has AC bit set */
+  } else {
+    htbl = cinfo->dc_huff_tbl_ptrs[index];
+  }
+
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, index);
+
+  if (!htbl->sent_table) {
+    emit_marker(cinfo, M_DHT);
+
+    length = 0;
+    for (i = 1; i <= 16; i++)
+      length += htbl->bits[i];
+
+    emit_2bytes(cinfo, length + 2 + 1 + 16);
+    emit_byte(cinfo, index);
+
+    for (i = 1; i <= 16; i++)
+      emit_byte(cinfo, htbl->bits[i]);
+
+    for (i = 0; i < length; i++)
+      emit_byte(cinfo, htbl->huffval[i]);
+
+    htbl->sent_table = TRUE;
+  }
+}
+
+
+LOCAL(void)
+emit_dac(j_compress_ptr cinfo)
+/* Emit a DAC marker */
+/* Since the useful info is so small, we want to emit all the tables in */
+/* one DAC marker.  Therefore this routine does its own scan of the table. */
+{
+#ifdef C_ARITH_CODING_SUPPORTED
+  char dc_in_use[NUM_ARITH_TBLS];
+  char ac_in_use[NUM_ARITH_TBLS];
+  int length, i;
+  jpeg_component_info *compptr;
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    dc_in_use[i] = ac_in_use[i] = 0;
+
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    /* DC needs no table for refinement scan */
+    if (cinfo->Ss == 0 && cinfo->Ah == 0)
+      dc_in_use[compptr->dc_tbl_no] = 1;
+    /* AC needs no table when not present */
+    if (cinfo->Se)
+      ac_in_use[compptr->ac_tbl_no] = 1;
+  }
+
+  length = 0;
+  for (i = 0; i < NUM_ARITH_TBLS; i++)
+    length += dc_in_use[i] + ac_in_use[i];
+
+  if (length) {
+    emit_marker(cinfo, M_DAC);
+
+    emit_2bytes(cinfo, length * 2 + 2);
+
+    for (i = 0; i < NUM_ARITH_TBLS; i++) {
+      if (dc_in_use[i]) {
+        emit_byte(cinfo, i);
+        emit_byte(cinfo, cinfo->arith_dc_L[i] + (cinfo->arith_dc_U[i] << 4));
+      }
+      if (ac_in_use[i]) {
+        emit_byte(cinfo, i + 0x10);
+        emit_byte(cinfo, cinfo->arith_ac_K[i]);
+      }
+    }
+  }
+#endif /* C_ARITH_CODING_SUPPORTED */
+}
+
+
+LOCAL(void)
+emit_dri(j_compress_ptr cinfo)
+/* Emit a DRI marker */
+{
+  emit_marker(cinfo, M_DRI);
+
+  emit_2bytes(cinfo, 4);        /* fixed length */
+
+  emit_2bytes(cinfo, (int)cinfo->restart_interval);
+}
+
+
+LOCAL(void)
+emit_sof(j_compress_ptr cinfo, JPEG_MARKER code)
+/* Emit a SOF marker */
+{
+  int ci;
+  jpeg_component_info *compptr;
+
+  emit_marker(cinfo, code);
+
+  emit_2bytes(cinfo, 3 * cinfo->num_components + 2 + 5 + 1); /* length */
+
+  /* Make sure image isn't bigger than SOF field can handle */
+  if ((long)cinfo->_jpeg_height > 65535L || (long)cinfo->_jpeg_width > 65535L)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)65535);
+
+  emit_byte(cinfo, cinfo->data_precision);
+  emit_2bytes(cinfo, (int)cinfo->_jpeg_height);
+  emit_2bytes(cinfo, (int)cinfo->_jpeg_width);
+
+  emit_byte(cinfo, cinfo->num_components);
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    emit_byte(cinfo, compptr->component_id);
+    emit_byte(cinfo, (compptr->h_samp_factor << 4) + compptr->v_samp_factor);
+    emit_byte(cinfo, compptr->quant_tbl_no);
+  }
+}
+
+
+LOCAL(void)
+emit_sos(j_compress_ptr cinfo)
+/* Emit a SOS marker */
+{
+  int i, td, ta;
+  jpeg_component_info *compptr;
+
+  emit_marker(cinfo, M_SOS);
+
+  emit_2bytes(cinfo, 2 * cinfo->comps_in_scan + 2 + 1 + 3); /* length */
+
+  emit_byte(cinfo, cinfo->comps_in_scan);
+
+  for (i = 0; i < cinfo->comps_in_scan; i++) {
+    compptr = cinfo->cur_comp_info[i];
+    emit_byte(cinfo, compptr->component_id);
+
+    /* We emit 0 for unused field(s); this is recommended by the P&M text
+     * but does not seem to be specified in the standard.
+     */
+
+    /* DC needs no table for refinement scan */
+    td = cinfo->Ss == 0 && cinfo->Ah == 0 ? compptr->dc_tbl_no : 0;
+    /* AC needs no table when not present */
+    ta = cinfo->Se ? compptr->ac_tbl_no : 0;
+
+    emit_byte(cinfo, (td << 4) + ta);
+  }
+
+  emit_byte(cinfo, cinfo->Ss);
+  emit_byte(cinfo, cinfo->Se);
+  emit_byte(cinfo, (cinfo->Ah << 4) + cinfo->Al);
+}
+
+
+LOCAL(void)
+emit_jfif_app0(j_compress_ptr cinfo)
+/* Emit a JFIF-compliant APP0 marker */
+{
+  /*
+   * Length of APP0 block       (2 bytes)
+   * Block ID                   (4 bytes - ASCII "JFIF")
+   * Zero byte                  (1 byte to terminate the ID string)
+   * Version Major, Minor       (2 bytes - major first)
+   * Units                      (1 byte - 0x00 = none, 0x01 = inch, 0x02 = cm)
+   * Xdpu                       (2 bytes - dots per unit horizontal)
+   * Ydpu                       (2 bytes - dots per unit vertical)
+   * Thumbnail X size           (1 byte)
+   * Thumbnail Y size           (1 byte)
+   */
+
+  emit_marker(cinfo, M_APP0);
+
+  emit_2bytes(cinfo, 2 + 4 + 1 + 2 + 1 + 2 + 2 + 1 + 1); /* length */
+
+  emit_byte(cinfo, 0x4A);       /* Identifier: ASCII "JFIF" */
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0x49);
+  emit_byte(cinfo, 0x46);
+  emit_byte(cinfo, 0);
+  emit_byte(cinfo, cinfo->JFIF_major_version); /* Version fields */
+  emit_byte(cinfo, cinfo->JFIF_minor_version);
+  emit_byte(cinfo, cinfo->density_unit); /* Pixel size information */
+  emit_2bytes(cinfo, (int)cinfo->X_density);
+  emit_2bytes(cinfo, (int)cinfo->Y_density);
+  emit_byte(cinfo, 0);          /* No thumbnail image */
+  emit_byte(cinfo, 0);
+}
+
+
+LOCAL(void)
+emit_adobe_app14(j_compress_ptr cinfo)
+/* Emit an Adobe APP14 marker */
+{
+  /*
+   * Length of APP14 block      (2 bytes)
+   * Block ID                   (5 bytes - ASCII "Adobe")
+   * Version Number             (2 bytes - currently 100)
+   * Flags0                     (2 bytes - currently 0)
+   * Flags1                     (2 bytes - currently 0)
+   * Color transform            (1 byte)
+   *
+   * Although Adobe TN 5116 mentions Version = 101, all the Adobe files
+   * now in circulation seem to use Version = 100, so that's what we write.
+   *
+   * We write the color transform byte as 1 if the JPEG color space is
+   * YCbCr, 2 if it's YCCK, 0 otherwise.  Adobe's definition has to do with
+   * whether the encoder performed a transformation, which is pretty useless.
+   */
+
+  emit_marker(cinfo, M_APP14);
+
+  emit_2bytes(cinfo, 2 + 5 + 2 + 2 + 2 + 1); /* length */
+
+  emit_byte(cinfo, 0x41);       /* Identifier: ASCII "Adobe" */
+  emit_byte(cinfo, 0x64);
+  emit_byte(cinfo, 0x6F);
+  emit_byte(cinfo, 0x62);
+  emit_byte(cinfo, 0x65);
+  emit_2bytes(cinfo, 100);      /* Version */
+  emit_2bytes(cinfo, 0);        /* Flags0 */
+  emit_2bytes(cinfo, 0);        /* Flags1 */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_YCbCr:
+    emit_byte(cinfo, 1);        /* Color transform = 1 */
+    break;
+  case JCS_YCCK:
+    emit_byte(cinfo, 2);        /* Color transform = 2 */
+    break;
+  default:
+    emit_byte(cinfo, 0);        /* Color transform = 0 */
+    break;
+  }
+}
+
+
+/*
+ * These routines allow writing an arbitrary marker with parameters.
+ * The only intended use is to emit COM or APPn markers after calling
+ * write_file_header and before calling write_frame_header.
+ * Other uses are not guaranteed to produce desirable results.
+ * Counting the parameter bytes properly is the caller's responsibility.
+ */
+
+METHODDEF(void)
+write_marker_header(j_compress_ptr cinfo, int marker, unsigned int datalen)
+/* Emit an arbitrary marker header */
+{
+  if (datalen > (unsigned int)65533)            /* safety check */
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  emit_marker(cinfo, (JPEG_MARKER)marker);
+
+  emit_2bytes(cinfo, (int)(datalen + 2));       /* total length */
+}
+
+METHODDEF(void)
+write_marker_byte(j_compress_ptr cinfo, int val)
+/* Emit one byte of marker parameters following write_marker_header */
+{
+  emit_byte(cinfo, val);
+}
+
+
+/*
+ * Write datastream header.
+ * This consists of an SOI and optional APPn markers.
+ * We recommend use of the JFIF marker, but not the Adobe marker,
+ * when using YCbCr or grayscale data.  The JFIF marker should NOT
+ * be used for any other JPEG colorspace.  The Adobe marker is helpful
+ * to distinguish RGB, CMYK, and YCCK colorspaces.
+ * Note that an application can write additional header markers after
+ * jpeg_start_compress returns.
+ */
+
+METHODDEF(void)
+write_file_header(j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+
+  emit_marker(cinfo, M_SOI);    /* first the SOI */
+
+  /* SOI is defined to reset restart interval to 0 */
+  marker->last_restart_interval = 0;
+
+  if (cinfo->write_JFIF_header) /* next an optional JFIF APP0 */
+    emit_jfif_app0(cinfo);
+  if (cinfo->write_Adobe_marker) /* next an optional Adobe APP14 */
+    emit_adobe_app14(cinfo);
+}
+
+
+/*
+ * Write frame header.
+ * This consists of DQT and SOFn markers.
+ * Note that we do not emit the SOF until we have emitted the DQT(s).
+ * This avoids compatibility problems with incorrect implementations that
+ * try to error-check the quant table numbers as soon as they see the SOF.
+ */
+
+METHODDEF(void)
+write_frame_header(j_compress_ptr cinfo)
+{
+  int ci, prec = 0;
+  boolean is_baseline;
+  jpeg_component_info *compptr;
+
+  if (!cinfo->master->lossless) {
+    /* Emit DQT for each quantization table.
+     * Note that emit_dqt() suppresses any duplicate tables.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      prec += emit_dqt(cinfo, compptr->quant_tbl_no);
+    }
+    /* now prec is nonzero iff there are any 16-bit quant tables. */
+  }
+
+  /* Check for a non-baseline specification.
+   * Note we assume that Huffman table numbers won't be changed later.
+   */
+  if (cinfo->arith_code || cinfo->progressive_mode ||
+      cinfo->master->lossless || cinfo->data_precision != 8) {
+    is_baseline = FALSE;
+  } else {
+    is_baseline = TRUE;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      if (compptr->dc_tbl_no > 1 || compptr->ac_tbl_no > 1)
+        is_baseline = FALSE;
+    }
+    if (prec && is_baseline) {
+      is_baseline = FALSE;
+      /* If it's baseline except for quantizer size, warn the user */
+      TRACEMS(cinfo, 0, JTRC_16BIT_TABLES);
+    }
+  }
+
+  /* Emit the proper SOF marker */
+  if (cinfo->arith_code) {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF10); /* SOF code for progressive arithmetic */
+    else
+      emit_sof(cinfo, M_SOF9);  /* SOF code for sequential arithmetic */
+  } else {
+    if (cinfo->progressive_mode)
+      emit_sof(cinfo, M_SOF2);  /* SOF code for progressive Huffman */
+    else if (cinfo->master->lossless)
+      emit_sof(cinfo, M_SOF3);  /* SOF code for lossless Huffman */
+    else if (is_baseline)
+      emit_sof(cinfo, M_SOF0);  /* SOF code for baseline implementation */
+    else
+      emit_sof(cinfo, M_SOF1);  /* SOF code for non-baseline Huffman file */
+  }
+}
+
+
+/*
+ * Write scan header.
+ * This consists of DHT or DAC markers, optional DRI, and SOS.
+ * Compressed data will be written following the SOS.
+ */
+
+METHODDEF(void)
+write_scan_header(j_compress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+  int i;
+  jpeg_component_info *compptr;
+
+  if (cinfo->arith_code) {
+    /* Emit arith conditioning info.  We may have some duplication
+     * if the file has multiple scans, but it's so small it's hardly
+     * worth worrying about.
+     */
+    emit_dac(cinfo);
+  } else {
+    /* Emit Huffman tables.
+     * Note that emit_dht() suppresses any duplicate tables.
+     */
+    for (i = 0; i < cinfo->comps_in_scan; i++) {
+      compptr = cinfo->cur_comp_info[i];
+      /* DC needs no table for refinement scan */
+      if ((cinfo->Ss == 0 && cinfo->Ah == 0) || cinfo->master->lossless)
+        emit_dht(cinfo, compptr->dc_tbl_no, FALSE);
+      /* AC needs no table when not present, and lossless mode uses only DC
+         tables. */
+      if (cinfo->Se && !cinfo->master->lossless)
+        emit_dht(cinfo, compptr->ac_tbl_no, TRUE);
+    }
+  }
+
+  /* Emit DRI if required --- note that DRI value could change for each scan.
+   * We avoid wasting space with unnecessary DRIs, however.
+   */
+  if (cinfo->restart_interval != marker->last_restart_interval) {
+    emit_dri(cinfo);
+    marker->last_restart_interval = cinfo->restart_interval;
+  }
+
+  emit_sos(cinfo);
+}
+
+
+/*
+ * Write datastream trailer.
+ */
+
+METHODDEF(void)
+write_file_trailer(j_compress_ptr cinfo)
+{
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Write an abbreviated table-specification datastream.
+ * This consists of SOI, DQT and DHT tables, and EOI.
+ * Any table that is defined and not marked sent_table = TRUE will be
+ * emitted.  Note that all tables will be marked sent_table = TRUE at exit.
+ */
+
+METHODDEF(void)
+write_tables_only(j_compress_ptr cinfo)
+{
+  int i;
+
+  emit_marker(cinfo, M_SOI);
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++) {
+    if (cinfo->quant_tbl_ptrs[i] != NULL)
+      (void)emit_dqt(cinfo, i);
+  }
+
+  if (!cinfo->arith_code) {
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL)
+        emit_dht(cinfo, i, FALSE);
+      if (cinfo->ac_huff_tbl_ptrs[i] != NULL)
+        emit_dht(cinfo, i, TRUE);
+    }
+  }
+
+  emit_marker(cinfo, M_EOI);
+}
+
+
+/*
+ * Initialize the marker writer module.
+ */
+
+GLOBAL(void)
+jinit_marker_writer(j_compress_ptr cinfo)
+{
+  my_marker_ptr marker;
+
+  /* Create the subobject */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_marker_writer));
+  cinfo->marker = (struct jpeg_marker_writer *)marker;
+  /* Initialize method pointers */
+  marker->pub.write_file_header = write_file_header;
+  marker->pub.write_frame_header = write_frame_header;
+  marker->pub.write_scan_header = write_scan_header;
+  marker->pub.write_file_trailer = write_file_trailer;
+  marker->pub.write_tables_only = write_tables_only;
+  marker->pub.write_marker_header = write_marker_header;
+  marker->pub.write_marker_byte = write_marker_byte;
+  /* Initialize private state */
+  marker->last_restart_interval = 0;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jcmaster.c b/thirdparty/libjpeg-turbo/src/jcmaster.c
new file mode 100644
index 00000000000..1dcd252b3c1
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcmaster.c
@@ -0,0 +1,801 @@
+/*
+ * jcmaster.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2003-2010 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2016, 2018, 2022-2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains master control logic for the JPEG compressor.
+ * These routines are concerned with parameter validation, initial setup,
+ * and inter-pass control (determining the number of passes and the work
+ * to be done in each pass).
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+#include "jcmaster.h"
+
+
+/*
+ * Support routines that do various essential calculations.
+ */
+
+#if JPEG_LIB_VERSION >= 70
+/*
+ * Compute JPEG image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+GLOBAL(void)
+jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  /* Hardwire it to "no scaling" */
+  cinfo->jpeg_width = cinfo->image_width;
+  cinfo->jpeg_height = cinfo->image_height;
+  cinfo->min_DCT_h_scaled_size = data_unit;
+  cinfo->min_DCT_v_scaled_size = data_unit;
+}
+#endif
+
+
+LOCAL(boolean)
+using_std_huff_tables(j_compress_ptr cinfo)
+{
+  int i;
+
+  static const UINT8 bits_dc_luminance[17] = {
+    /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_luminance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_dc_chrominance[17] = {
+    /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_chrominance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_ac_luminance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
+  };
+  static const UINT8 val_ac_luminance[] = {
+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  static const UINT8 bits_ac_chrominance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
+  };
+  static const UINT8 val_ac_chrominance[] = {
+    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  if (cinfo->dc_huff_tbl_ptrs[0] == NULL ||
+      cinfo->ac_huff_tbl_ptrs[0] == NULL ||
+      cinfo->dc_huff_tbl_ptrs[1] == NULL ||
+      cinfo->ac_huff_tbl_ptrs[1] == NULL)
+    return FALSE;
+
+  for (i = 2; i < NUM_HUFF_TBLS; i++) {
+    if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
+        cinfo->ac_huff_tbl_ptrs[i] != NULL)
+      return FALSE;
+  }
+
+  if (memcmp(cinfo->dc_huff_tbl_ptrs[0]->bits, bits_dc_luminance,
+             sizeof(bits_dc_luminance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[0]->huffval, val_dc_luminance,
+             sizeof(val_dc_luminance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[0]->bits, bits_ac_luminance,
+             sizeof(bits_ac_luminance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[0]->huffval, val_ac_luminance,
+             sizeof(val_ac_luminance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[1]->bits, bits_dc_chrominance,
+             sizeof(bits_dc_chrominance)) ||
+      memcmp(cinfo->dc_huff_tbl_ptrs[1]->huffval, val_dc_chrominance,
+             sizeof(val_dc_chrominance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[1]->bits, bits_ac_chrominance,
+             sizeof(bits_ac_chrominance)) ||
+      memcmp(cinfo->ac_huff_tbl_ptrs[1]->huffval, val_ac_chrominance,
+             sizeof(val_ac_chrominance)))
+    return FALSE;
+
+  return TRUE;
+}
+
+
+LOCAL(void)
+initial_setup(j_compress_ptr cinfo, boolean transcode_only)
+/* Do computations that are needed before master selection phase */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+#if JPEG_LIB_VERSION >= 70
+#if JPEG_LIB_VERSION >= 80
+  if (!transcode_only)
+#endif
+    jpeg_calc_jpeg_dimensions(cinfo);
+#endif
+
+  /* Sanity check on image dimensions */
+  if (cinfo->_jpeg_height <= 0 || cinfo->_jpeg_width <= 0 ||
+      cinfo->num_components <= 0 || cinfo->input_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long)cinfo->_jpeg_height > (long)JPEG_MAX_DIMENSION ||
+      (long)cinfo->_jpeg_width > (long)JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION);
+
+  /* Width of an input scanline must be representable as JDIMENSION. */
+  samplesperrow = (long)cinfo->image_width * (long)cinfo->input_components;
+  jd_samplesperrow = (JDIMENSION)samplesperrow;
+  if ((long)jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Lossy JPEG images must have 8 or 12 bits per sample.  Lossless JPEG images
+   * can have 2 to 16 bits per sample.
+   */
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+    if (cinfo->data_precision < 2 || cinfo->data_precision > 16)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+             MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor <= 0 ||
+        compptr->h_samp_factor > MAX_SAMP_FACTOR ||
+        compptr->v_samp_factor <= 0 ||
+        compptr->v_samp_factor > MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+                                   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+                                   compptr->v_samp_factor);
+  }
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Fill in the correct component_index value; don't rely on application */
+    compptr->component_index = ci;
+    /* For compression, we never do DCT scaling. */
+#if JPEG_LIB_VERSION >= 70
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
+#else
+    compptr->DCT_scaled_size = data_unit;
+#endif
+    /* Size in data units */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
+                    (long)(cinfo->max_h_samp_factor * data_unit));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor,
+                    (long)(cinfo->max_v_samp_factor * data_unit));
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_width * (long)compptr->h_samp_factor,
+                    (long)cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_height * (long)compptr->v_samp_factor,
+                    (long)cinfo->max_v_samp_factor);
+    /* Mark component needed (this flag isn't actually used for compression) */
+    compptr->component_needed = TRUE;
+  }
+
+  /* Compute number of fully interleaved MCU rows (number of times that
+   * main controller will call coefficient or difference controller).
+   */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long)cinfo->_jpeg_height,
+                  (long)(cinfo->max_v_samp_factor * data_unit));
+}
+
+
+#if defined(C_MULTISCAN_FILES_SUPPORTED) || defined(C_LOSSLESS_SUPPORTED)
+#define NEED_SCAN_SCRIPT
+#endif
+
+#ifdef NEED_SCAN_SCRIPT
+
+LOCAL(void)
+validate_script(j_compress_ptr cinfo)
+/* Verify that the scan script in cinfo->scan_info[] is valid; also
+ * determine whether it uses progressive JPEG, and set cinfo->progressive_mode.
+ */
+{
+  const jpeg_scan_info *scanptr;
+  int scanno, ncomps, ci, coefi, thisi;
+  int Ss, Se, Ah, Al;
+  boolean component_sent[MAX_COMPONENTS];
+#ifdef C_PROGRESSIVE_SUPPORTED
+  int *last_bitpos_ptr;
+  int last_bitpos[MAX_COMPONENTS][DCTSIZE2];
+  /* -1 until that coefficient has been seen; then last Al for it */
+#endif
+
+  if (cinfo->num_scans <= 0)
+    ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, 0);
+
+#ifndef C_MULTISCAN_FILES_SUPPORTED
+  if (cinfo->num_scans > 1)
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+
+  scanptr = cinfo->scan_info;
+  if (scanptr->Ss != 0 && scanptr->Se == 0) {
+#ifdef C_LOSSLESS_SUPPORTED
+    cinfo->master->lossless = TRUE;
+    cinfo->progressive_mode = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      component_sent[ci] = FALSE;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  }
+  /* For sequential JPEG, all scans must have Ss=0, Se=DCTSIZE2-1;
+   * for progressive JPEG, no scan can have this.
+   */
+  else if (scanptr->Ss != 0 || scanptr->Se != DCTSIZE2 - 1) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    cinfo->progressive_mode = TRUE;
+    cinfo->master->lossless = FALSE;
+    last_bitpos_ptr = &last_bitpos[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (coefi = 0; coefi < DCTSIZE2; coefi++)
+        *last_bitpos_ptr++ = -1;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = cinfo->master->lossless = FALSE;
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      component_sent[ci] = FALSE;
+  }
+
+  for (scanno = 1; scanno <= cinfo->num_scans; scanptr++, scanno++) {
+    /* Validate component indexes */
+    ncomps = scanptr->comps_in_scan;
+    if (ncomps <= 0 || ncomps > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, ncomps, MAX_COMPS_IN_SCAN);
+    for (ci = 0; ci < ncomps; ci++) {
+      thisi = scanptr->component_index[ci];
+      if (thisi < 0 || thisi >= cinfo->num_components)
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+      /* Components must appear in SOF order within each scan */
+      if (ci > 0 && thisi <= scanptr->component_index[ci - 1])
+        ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+    }
+    /* Validate progression parameters */
+    Ss = scanptr->Ss;
+    Se = scanptr->Se;
+    Ah = scanptr->Ah;
+    Al = scanptr->Al;
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      /* Rec. ITU-T T.81 | ISO/IEC 10918-1 simply gives the ranges 0..13 for Ah
+       * and Al, but that seems wrong: the upper bound ought to depend on data
+       * precision.  Perhaps they really meant 0..N+1 for N-bit precision.
+       * Here we allow 0..10 for 8-bit data; Al larger than 10 results in
+       * out-of-range reconstructed DC values during the first DC scan,
+       * which might cause problems for some decoders.
+       */
+      int max_Ah_Al = cinfo->data_precision == 12 ? 13 : 10;
+
+      if (Ss < 0 || Ss >= DCTSIZE2 || Se < Ss || Se >= DCTSIZE2 ||
+          Ah < 0 || Ah > max_Ah_Al || Al < 0 || Al > max_Ah_Al)
+        ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      if (Ss == 0) {
+        if (Se != 0)            /* DC and AC together not OK */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else {
+        if (ncomps != 1)        /* AC scans must be for only one component */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      for (ci = 0; ci < ncomps; ci++) {
+        last_bitpos_ptr = &last_bitpos[scanptr->component_index[ci]][0];
+        if (Ss != 0 && last_bitpos_ptr[0] < 0) /* AC without prior DC scan */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+        for (coefi = Ss; coefi <= Se; coefi++) {
+          if (last_bitpos_ptr[coefi] < 0) {
+            /* first scan of this coefficient */
+            if (Ah != 0)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          } else {
+            /* not first scan */
+            if (Ah != last_bitpos_ptr[coefi] || Al != Ah - 1)
+              ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+          }
+          last_bitpos_ptr[coefi] = Al;
+        }
+      }
+#endif
+    } else {
+#ifdef C_LOSSLESS_SUPPORTED
+      if (cinfo->master->lossless) {
+        /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that
+         * seems wrong: the upper bound ought to depend on data precision.
+         * Perhaps they really meant 0..N-1 for N-bit precision, which is what
+         * we allow here.  Values greater than or equal to the data precision
+         * will result in a blank image.
+         */
+        if (Ss < 1 || Ss > 7 ||         /* predictor selection value */
+            Se != 0 || Ah != 0 ||
+            Al < 0 || Al >= cinfo->data_precision) /* point transform */
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      } else
+#endif
+      {
+        /* For sequential JPEG, all progression parameters must be these: */
+        if (Ss != 0 || Se != DCTSIZE2 - 1 || Ah != 0 || Al != 0)
+          ERREXIT1(cinfo, JERR_BAD_PROG_SCRIPT, scanno);
+      }
+      /* Make sure components are not sent twice */
+      for (ci = 0; ci < ncomps; ci++) {
+        thisi = scanptr->component_index[ci];
+        if (component_sent[thisi])
+          ERREXIT1(cinfo, JERR_BAD_SCAN_SCRIPT, scanno);
+        component_sent[thisi] = TRUE;
+      }
+    }
+  }
+
+  /* Now verify that everything got sent. */
+  if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+    /* For progressive mode, we only check that at least some DC data
+     * got sent for each component; the spec does not require that all bits
+     * of all coefficients be transmitted.  Would it be wiser to enforce
+     * transmission of all coefficient bits??
+     */
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (last_bitpos[ci][0] < 0)
+        ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+#endif
+  } else {
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      if (!component_sent[ci])
+        ERREXIT(cinfo, JERR_MISSING_DATA);
+    }
+  }
+}
+
+#endif /* NEED_SCAN_SCRIPT */
+
+
+LOCAL(void)
+select_scan_parameters(j_compress_ptr cinfo)
+/* Set up the scan parameters for the current scan */
+{
+  int ci;
+
+#ifdef NEED_SCAN_SCRIPT
+  if (cinfo->scan_info != NULL) {
+    /* Prepare for current scan --- the script is already validated */
+    my_master_ptr master = (my_master_ptr)cinfo->master;
+    const jpeg_scan_info *scanptr = cinfo->scan_info + master->scan_number;
+
+    cinfo->comps_in_scan = scanptr->comps_in_scan;
+    for (ci = 0; ci < scanptr->comps_in_scan; ci++) {
+      cinfo->cur_comp_info[ci] =
+        &cinfo->comp_info[scanptr->component_index[ci]];
+    }
+    cinfo->Ss = scanptr->Ss;
+    cinfo->Se = scanptr->Se;
+    cinfo->Ah = scanptr->Ah;
+    cinfo->Al = scanptr->Al;
+  } else
+#endif
+  {
+    /* Prepare for single sequential-JPEG scan containing all components */
+    if (cinfo->num_components > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+               MAX_COMPS_IN_SCAN);
+    cinfo->comps_in_scan = cinfo->num_components;
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      cinfo->cur_comp_info[ci] = &cinfo->comp_info[ci];
+    }
+    if (!cinfo->master->lossless) {
+      cinfo->Ss = 0;
+      cinfo->Se = DCTSIZE2 - 1;
+      cinfo->Ah = 0;
+      cinfo->Al = 0;
+    }
+  }
+}
+
+
+LOCAL(void)
+per_scan_setup(j_compress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] are already set */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  if (cinfo->comps_in_scan == 1) {
+
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+
+    /* For noninterleaved scan, always one block per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = data_unit;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of block rows present in the last iMCU row.
+     */
+    tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+
+  } else {
+
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+               MAX_COMPS_IN_SCAN);
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_width,
+                    (long)(cinfo->max_h_samp_factor * data_unit));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long)cinfo->_jpeg_height,
+                    (long)(cinfo->max_v_samp_factor * data_unit));
+
+    cinfo->blocks_in_MCU = 0;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of blocks of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width * data_unit;
+      /* Figure number of non-dummy blocks in last MCU column & row */
+      tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int)(compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > C_MAX_BLOCKS_IN_MCU)
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+
+  }
+
+  /* Convert restart specified in rows to actual MCU count. */
+  /* Note that count must fit in 16 bits, so we provide limiting. */
+  if (cinfo->restart_in_rows > 0) {
+    long nominal = (long)cinfo->restart_in_rows * (long)cinfo->MCUs_per_row;
+    cinfo->restart_interval = (unsigned int)MIN(nominal, 65535L);
+  }
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each pass.  We determine which modules
+ * will be active during this pass and give them appropriate start_pass calls.
+ * We also set is_last_pass to indicate whether any more passes will be
+ * required.
+ */
+
+METHODDEF(void)
+prepare_for_pass(j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  switch (master->pass_type) {
+  case main_pass:
+    /* Initial pass: will collect input data, and do either Huffman
+     * optimization or data output for the first scan.
+     */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (!cinfo->raw_data_in) {
+      (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->downsample->start_pass) (cinfo);
+      (*cinfo->prep->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+    (*cinfo->fdct->start_pass) (cinfo);
+    (*cinfo->entropy->start_pass) (cinfo, cinfo->optimize_coding);
+    (*cinfo->coef->start_pass) (cinfo,
+                                (master->total_passes > 1 ?
+                                 JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+    (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    if (cinfo->optimize_coding) {
+      /* No immediate data output; postpone writing frame/scan headers */
+      master->pub.call_pass_startup = FALSE;
+    } else {
+      /* Will write frame/scan headers at first jpeg_write_scanlines call */
+      master->pub.call_pass_startup = TRUE;
+    }
+    break;
+#ifdef ENTROPY_OPT_SUPPORTED
+  case huff_opt_pass:
+    /* Do Huffman optimization for a scan after the first one. */
+    select_scan_parameters(cinfo);
+    per_scan_setup(cinfo);
+    if (cinfo->Ss != 0 || cinfo->Ah == 0 || cinfo->arith_code ||
+        cinfo->master->lossless) {
+      (*cinfo->entropy->start_pass) (cinfo, TRUE);
+      (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+      master->pub.call_pass_startup = FALSE;
+      break;
+    }
+    /* Special case: Huffman DC refinement scans need no Huffman table
+     * and therefore we can skip the optimization pass for them.
+     */
+    master->pass_type = output_pass;
+    master->pass_number++;
+#endif
+    FALLTHROUGH                 /*FALLTHROUGH*/
+  case output_pass:
+    /* Do a data-output pass. */
+    /* We need not repeat per-scan setup if prior optimization pass did it. */
+    if (!cinfo->optimize_coding) {
+      select_scan_parameters(cinfo);
+      per_scan_setup(cinfo);
+    }
+    (*cinfo->entropy->start_pass) (cinfo, FALSE);
+    (*cinfo->coef->start_pass) (cinfo, JBUF_CRANK_DEST);
+    /* We emit frame/scan headers now */
+    if (master->scan_number == 0)
+      (*cinfo->marker->write_frame_header) (cinfo);
+    (*cinfo->marker->write_scan_header) (cinfo);
+    master->pub.call_pass_startup = FALSE;
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+  }
+
+  master->pub.is_last_pass = (master->pass_number == master->total_passes - 1);
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->total_passes;
+  }
+}
+
+
+/*
+ * Special start-of-pass hook.
+ * This is called by jpeg_write_scanlines if call_pass_startup is TRUE.
+ * In single-pass processing, we need this hook because we don't want to
+ * write frame/scan headers during jpeg_start_compress; we want to let the
+ * application write COM markers etc. between jpeg_start_compress and the
+ * jpeg_write_scanlines loop.
+ * In multi-pass processing, this routine is not used.
+ */
+
+METHODDEF(void)
+pass_startup(j_compress_ptr cinfo)
+{
+  cinfo->master->call_pass_startup = FALSE; /* reset flag so call only once */
+
+  (*cinfo->marker->write_frame_header) (cinfo);
+  (*cinfo->marker->write_scan_header) (cinfo);
+}
+
+
+/*
+ * Finish up at end of pass.
+ */
+
+METHODDEF(void)
+finish_pass_master(j_compress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  /* The entropy coder always needs an end-of-pass call,
+   * either to analyze statistics or to flush its output buffer.
+   */
+  (*cinfo->entropy->finish_pass) (cinfo);
+
+  /* Update state for next pass */
+  switch (master->pass_type) {
+  case main_pass:
+    /* next pass is either output of scan 0 (after optimization)
+     * or output of scan 1 (if no optimization).
+     */
+    master->pass_type = output_pass;
+    if (!cinfo->optimize_coding)
+      master->scan_number++;
+    break;
+  case huff_opt_pass:
+    /* next pass is always output of current scan */
+    master->pass_type = output_pass;
+    break;
+  case output_pass:
+    /* next pass is either optimization or output of next scan */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    master->scan_number++;
+    break;
+  }
+
+  master->pass_number++;
+}
+
+
+/*
+ * Initialize master compression control.
+ */
+
+GLOBAL(void)
+jinit_c_master_control(j_compress_ptr cinfo, boolean transcode_only)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  boolean empty_huff_tables = TRUE;
+  int i;
+
+  master->pub.prepare_for_pass = prepare_for_pass;
+  master->pub.pass_startup = pass_startup;
+  master->pub.finish_pass = finish_pass_master;
+  master->pub.is_last_pass = FALSE;
+
+  if (cinfo->scan_info != NULL) {
+#ifdef NEED_SCAN_SCRIPT
+    validate_script(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    cinfo->progressive_mode = FALSE;
+    cinfo->num_scans = 1;
+  }
+
+#ifdef C_LOSSLESS_SUPPORTED
+  /* Disable smoothing and subsampling in lossless mode, since those are lossy
+   * algorithms.  Set the JPEG colorspace to the input colorspace.  Disable raw
+   * (downsampled) data input, because it isn't particularly useful without
+   * subsampling and has not been tested in lossless mode.
+   */
+  if (cinfo->master->lossless) {
+    int ci;
+    jpeg_component_info *compptr;
+
+    cinfo->raw_data_in = FALSE;
+    cinfo->smoothing_factor = 0;
+    jpeg_default_colorspace(cinfo);
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++)
+      compptr->h_samp_factor = compptr->v_samp_factor = 1;
+  }
+#endif
+
+  /* Validate parameters, determine derived values */
+  initial_setup(cinfo, transcode_only);
+
+  if (cinfo->arith_code)
+    cinfo->optimize_coding = FALSE;
+  else {
+    if (cinfo->master->lossless ||      /*  TEMPORARY HACK ??? */
+        cinfo->progressive_mode)
+      cinfo->optimize_coding = TRUE; /* assume default tables no good for
+                                        progressive mode or lossless mode */
+    for (i = 0; i < NUM_HUFF_TBLS; i++) {
+      if (cinfo->dc_huff_tbl_ptrs[i] != NULL ||
+          cinfo->ac_huff_tbl_ptrs[i] != NULL) {
+        empty_huff_tables = FALSE;
+        break;
+      }
+    }
+    if (cinfo->data_precision == 12 && !cinfo->optimize_coding &&
+        (empty_huff_tables || using_std_huff_tables(cinfo)))
+      cinfo->optimize_coding = TRUE; /* assume default tables no good for
+                                        12-bit data precision */
+  }
+
+  /* Initialize my private state */
+  if (transcode_only) {
+    /* no main pass in transcoding */
+    if (cinfo->optimize_coding)
+      master->pass_type = huff_opt_pass;
+    else
+      master->pass_type = output_pass;
+  } else {
+    /* for normal compression, first pass is always this type: */
+    master->pass_type = main_pass;
+  }
+  master->scan_number = 0;
+  master->pass_number = 0;
+  if (cinfo->optimize_coding)
+    master->total_passes = cinfo->num_scans * 2;
+  else
+    master->total_passes = cinfo->num_scans;
+
+  master->jpeg_version = PACKAGE_NAME " version " VERSION " (build " BUILD ")";
+}
diff --git a/thirdparty/libjpeg-turbo/src/jcmaster.h b/thirdparty/libjpeg-turbo/src/jcmaster.h
new file mode 100644
index 00000000000..3b13289b691
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcmaster.h
@@ -0,0 +1,43 @@
+/*
+ * jcmaster.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1995, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2016, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains master control structure for the JPEG compressor.
+ */
+
+/* Private state */
+
+typedef enum {
+  main_pass,                    /* input data, also do first output step */
+  huff_opt_pass,                /* Huffman code optimization pass */
+  output_pass                   /* data output pass */
+} c_pass_type;
+
+typedef struct {
+  struct jpeg_comp_master pub;  /* public fields */
+
+  c_pass_type pass_type;        /* the type of the current pass */
+
+  int pass_number;              /* # of passes completed */
+  int total_passes;             /* total # of passes needed */
+
+  int scan_number;              /* current index in scan_info[] */
+
+  /*
+   * This is here so we can add libjpeg-turbo version/build information to the
+   * global string table without introducing a new global symbol.  Adding this
+   * information to the global string table allows one to examine a binary
+   * object and determine which version of libjpeg-turbo it was built from or
+   * linked against.
+   */
+  const char *jpeg_version;
+
+} my_comp_master;
+
+typedef my_comp_master *my_master_ptr;
diff --git a/thirdparty/libjpeg-turbo/src/jcomapi.c b/thirdparty/libjpeg-turbo/src/jcomapi.c
new file mode 100644
index 00000000000..84f37e172ec
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcomapi.c
@@ -0,0 +1,110 @@
+/*
+ * jcomapi.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains application interface routines that are used for both
+ * compression and decompression.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+/*
+ * Abort processing of a JPEG compression or decompression operation,
+ * but don't destroy the object itself.
+ *
+ * For this, we merely clean up all the nonpermanent memory pools.
+ * Note that temp files (virtual arrays) are not allowed to belong to
+ * the permanent pool, so we will be able to close all temp files here.
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_abort(j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Do nothing if called on a not-initialized or destroyed JPEG object. */
+  if (cinfo->mem == NULL)
+    return;
+
+  /* Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS - 1; pool > JPOOL_PERMANENT; pool--) {
+    (*cinfo->mem->free_pool) (cinfo, pool);
+  }
+
+  /* Reset overall state for possible reuse of object */
+  if (cinfo->is_decompressor) {
+    cinfo->global_state = DSTATE_START;
+    /* Try to keep application from accessing now-deleted marker list.
+     * A bit kludgy to do it here, but this is the most central place.
+     */
+    ((j_decompress_ptr)cinfo)->marker_list = NULL;
+    ((j_decompress_ptr)cinfo)->master->marker_list_end = NULL;
+  } else {
+    cinfo->global_state = CSTATE_START;
+  }
+}
+
+
+/*
+ * Destruction of a JPEG object.
+ *
+ * Everything gets deallocated except the master jpeg_compress_struct itself
+ * and the error manager struct.  Both of these are supplied by the application
+ * and must be freed, if necessary, by the application.  (Often they are on
+ * the stack and so don't need to be freed anyway.)
+ * Closing a data source or destination, if necessary, is the application's
+ * responsibility.
+ */
+
+GLOBAL(void)
+jpeg_destroy(j_common_ptr cinfo)
+{
+  /* We need only tell the memory manager to release everything. */
+  /* NB: mem pointer is NULL if memory mgr failed to initialize. */
+  if (cinfo->mem != NULL)
+    (*cinfo->mem->self_destruct) (cinfo);
+  cinfo->mem = NULL;            /* be safe if jpeg_destroy is called twice */
+  cinfo->global_state = 0;      /* mark it destroyed */
+}
+
+
+/*
+ * Convenience routines for allocating quantization and Huffman tables.
+ * (Would jutils.c be a more reasonable place to put these?)
+ */
+
+GLOBAL(JQUANT_TBL *)
+jpeg_alloc_quant_table(j_common_ptr cinfo)
+{
+  JQUANT_TBL *tbl;
+
+  tbl = (JQUANT_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JQUANT_TBL));
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
+  return tbl;
+}
+
+
+GLOBAL(JHUFF_TBL *)
+jpeg_alloc_huff_table(j_common_ptr cinfo)
+{
+  JHUFF_TBL *tbl;
+
+  tbl = (JHUFF_TBL *)
+    (*cinfo->mem->alloc_small) (cinfo, JPOOL_PERMANENT, sizeof(JHUFF_TBL));
+  tbl->sent_table = FALSE;      /* make sure this is false in any new table */
+  return tbl;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jconfig.h b/thirdparty/libjpeg-turbo/src/jconfig.h
new file mode 100644
index 00000000000..42d9654c0fc
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jconfig.h
@@ -0,0 +1,62 @@
+// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
+
+/* Version ID for the JPEG library.
+ * Might be useful for tests like "#if JPEG_LIB_VERSION >= 60".
+ */
+#define JPEG_LIB_VERSION  62
+
+/* libjpeg-turbo version */
+#define LIBJPEG_TURBO_VERSION  3.1.0
+
+/* libjpeg-turbo version in integer form */
+#define LIBJPEG_TURBO_VERSION_NUMBER  3001000
+
+/* Support arithmetic encoding when using 8-bit samples */
+#define C_ARITH_CODING_SUPPORTED 1
+
+/* Support arithmetic decoding when using 8-bit samples */
+#define D_ARITH_CODING_SUPPORTED 1
+
+/* Support in-memory source/destination managers */
+#define MEM_SRCDST_SUPPORTED  1
+
+/* Use accelerated SIMD routines when using 8-bit samples */
+//#define WITH_SIMD 1
+
+/* This version of libjpeg-turbo supports run-time selection of data precision,
+ * so BITS_IN_JSAMPLE is no longer used to specify the data precision at build
+ * time.  However, some downstream software expects the macro to be defined.
+ * Since 12-bit data precision is an opt-in feature that requires explicitly
+ * calling 12-bit-specific libjpeg API functions and using 12-bit-specific data
+ * types, the unmodified portion of the libjpeg API still behaves as if it were
+ * built for 8-bit precision, and JSAMPLE is still literally an 8-bit data
+ * type.  Thus, it is correct to define BITS_IN_JSAMPLE to 8 here.
+ */
+#ifndef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  8
+#endif
+
+#ifdef _WIN32
+
+#undef RIGHT_SHIFT_IS_UNSIGNED
+
+/* Define "boolean" as unsigned char, not int, per Windows custom */
+#ifndef __RPCNDR_H__            /* don't conflict if rpcndr.h already read */
+typedef unsigned char boolean;
+#endif
+#define HAVE_BOOLEAN            /* prevent jmorecfg.h from redefining it */
+
+/* Define "INT32" as int, not long, per Windows custom */
+#if !(defined(_BASETSD_H_) || defined(_BASETSD_H))   /* don't conflict if basetsd.h already read */
+typedef short INT16;
+typedef signed int INT32;
+#endif
+#define XMD_H                   /* prevent jmorecfg.h from redefining it */
+
+#else
+
+/* Define if your (broken) compiler shifts signed values as if they were
+   unsigned. */
+/* #undef RIGHT_SHIFT_IS_UNSIGNED */
+
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jconfigint.h b/thirdparty/libjpeg-turbo/src/jconfigint.h
new file mode 100644
index 00000000000..f6171bf8465
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jconfigint.h
@@ -0,0 +1,94 @@
+// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
+
+/* libjpeg-turbo build number */
+#define BUILD  "20250317"
+
+/* How to hide global symbols. */
+#ifndef HIDDEN
+	#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+		#define HIDDEN  __attribute__((visibility("hidden")))
+	#else
+		#define HIDDEN
+	#endif
+#endif
+
+/* Compiler's inline keyword */
+#undef inline
+
+/* How to obtain function inlining. */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__MINGW32__)
+	#define INLINE  __inline__ __attribute__((always_inline))
+#else
+	#define INLINE inline
+#endif
+
+/* How to obtain thread-local storage */
+#if defined(_MSC_VER)
+#define THREAD_LOCAL  __declspec(thread)
+#else
+#define THREAD_LOCAL  __thread
+#endif
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME  "libjpeg-turbo"
+
+/* Version number of package */
+#define VERSION  "3.1.1"
+
+/* The size of `size_t', as computed by sizeof. */
+#define SIZEOF_SIZE_T  8
+
+/* Define if your compiler has __builtin_ctzl() and sizeof(unsigned long) == sizeof(size_t). */
+#if defined(__GNUC__)
+	#define HAVE_BUILTIN_CTZL
+#endif
+
+/* Define to 1 if you have the <intrin.h> header file. */
+/* #undef HAVE_INTRIN_H */
+
+#if defined(_MSC_VER) && defined(HAVE_INTRIN_H)
+#if (SIZEOF_SIZE_T == 8)
+#define HAVE_BITSCANFORWARD64
+#elif (SIZEOF_SIZE_T == 4)
+#define HAVE_BITSCANFORWARD
+#endif
+#endif
+
+#if defined(__has_attribute)
+#if __has_attribute(fallthrough)
+#define FALLTHROUGH  __attribute__((fallthrough));
+#else
+#define FALLTHROUGH
+#endif
+#else
+#define FALLTHROUGH
+#endif
+
+/*
+ * Define BITS_IN_JSAMPLE as either
+ *   8   for 8-bit sample values (the usual setting)
+ *   12  for 12-bit sample values
+ * Only 8 and 12 are legal data precisions for lossy JPEG according to the
+ * JPEG standard, and the IJG code does not support anything else!
+ */
+
+#ifndef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  8      /* use 8 or 12 */
+#endif
+
+#undef C_ARITH_CODING_SUPPORTED
+#undef D_ARITH_CODING_SUPPORTED
+#undef WITH_SIMD
+
+#if BITS_IN_JSAMPLE == 8
+
+/* Support arithmetic encoding */
+#define C_ARITH_CODING_SUPPORTED 1
+
+/* Support arithmetic decoding */
+#define D_ARITH_CODING_SUPPORTED 1
+
+/* Use accelerated SIMD routines. */
+//#define WITH_SIMD 1
+
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jcparam.c b/thirdparty/libjpeg-turbo/src/jcparam.c
new file mode 100644
index 00000000000..d74623c207c
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcparam.c
@@ -0,0 +1,592 @@
+/*
+ * jcparam.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2003-2008 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2018, 2023-2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains optional default-setting code for the JPEG compressor.
+ * Applications do not have to use this file, but those that don't use it
+ * must know a lot more about the innards of the JPEG code.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jstdhuff.c"
+
+
+/*
+ * Quantization table setup routines
+ */
+
+GLOBAL(void)
+jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+                     const unsigned int *basic_table, int scale_factor,
+                     boolean force_baseline)
+/* Define a quantization table equal to the basic_table times
+ * a scale factor (given as a percentage).
+ * If force_baseline is TRUE, the computed quantization table entries
+ * are limited to 1..255 for JPEG baseline compatibility.
+ */
+{
+  JQUANT_TBL **qtblptr;
+  int i;
+  long temp;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (which_tbl < 0 || which_tbl >= NUM_QUANT_TBLS)
+    ERREXIT1(cinfo, JERR_DQT_INDEX, which_tbl);
+
+  qtblptr = &cinfo->quant_tbl_ptrs[which_tbl];
+
+  if (*qtblptr == NULL)
+    *qtblptr = jpeg_alloc_quant_table((j_common_ptr)cinfo);
+
+  for (i = 0; i < DCTSIZE2; i++) {
+    temp = ((long)basic_table[i] * scale_factor + 50L) / 100L;
+    /* limit the values to the valid range */
+    if (temp <= 0L) temp = 1L;
+    if (temp > 32767L) temp = 32767L; /* max quantizer needed for 12 bits */
+    if (force_baseline && temp > 255L)
+      temp = 255L;              /* limit to baseline range if requested */
+    (*qtblptr)->quantval[i] = (UINT16)temp;
+  }
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*qtblptr)->sent_table = FALSE;
+}
+
+
+/* These are the sample quantization tables given in Annex K (Clause K.1) of
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ * The spec says that the values given produce "good" quality, and
+ * when divided by 2, "very good" quality.
+ */
+static const unsigned int std_luminance_quant_tbl[DCTSIZE2] = {
+  16,  11,  10,  16,  24,  40,  51,  61,
+  12,  12,  14,  19,  26,  58,  60,  55,
+  14,  13,  16,  24,  40,  57,  69,  56,
+  14,  17,  22,  29,  51,  87,  80,  62,
+  18,  22,  37,  56,  68, 109, 103,  77,
+  24,  35,  55,  64,  81, 104, 113,  92,
+  49,  64,  78,  87, 103, 121, 120, 101,
+  72,  92,  95,  98, 112, 100, 103,  99
+};
+static const unsigned int std_chrominance_quant_tbl[DCTSIZE2] = {
+  17,  18,  24,  47,  99,  99,  99,  99,
+  18,  21,  26,  66,  99,  99,  99,  99,
+  24,  26,  56,  99,  99,  99,  99,  99,
+  47,  66,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99,
+  99,  99,  99,  99,  99,  99,  99,  99
+};
+
+
+#if JPEG_LIB_VERSION >= 70
+GLOBAL(void)
+jpeg_default_qtables(j_compress_ptr cinfo, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and straight percentage-scaling quality scales.
+ * This entry point allows different scalings for luminance and chrominance.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+                       cinfo->q_scale_factor[0], force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+                       cinfo->q_scale_factor[1], force_baseline);
+}
+#endif
+
+
+GLOBAL(void)
+jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+                        boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables
+ * and a straight percentage-scaling quality scale.  In most cases it's better
+ * to use jpeg_set_quality (below); this entry point is provided for
+ * applications that insist on a linear percentage scaling.
+ */
+{
+  /* Set up two quantization tables using the specified scaling */
+  jpeg_add_quant_table(cinfo, 0, std_luminance_quant_tbl,
+                       scale_factor, force_baseline);
+  jpeg_add_quant_table(cinfo, 1, std_chrominance_quant_tbl,
+                       scale_factor, force_baseline);
+}
+
+
+GLOBAL(int)
+jpeg_quality_scaling(int quality)
+/* Convert a user-specified quality rating to a percentage scaling factor
+ * for an underlying quantization table, using our recommended scaling curve.
+ * The input 'quality' factor should be 0 (terrible) to 100 (very good).
+ */
+{
+  /* Safety limit on quality factor.  Convert 0 to 1 to avoid zero divide. */
+  if (quality <= 0) quality = 1;
+  if (quality > 100) quality = 100;
+
+  /* The basic table is used as-is (scaling 100) for a quality of 50.
+   * Qualities 50..100 are converted to scaling percentage 200 - 2*Q;
+   * note that at Q=100 the scaling is 0, which will cause jpeg_add_quant_table
+   * to make all the table entries 1 (hence, minimum quantization loss).
+   * Qualities 1..50 are converted to scaling percentage 5000/Q.
+   */
+  if (quality < 50)
+    quality = 5000 / quality;
+  else
+    quality = 200 - quality * 2;
+
+  return quality;
+}
+
+
+GLOBAL(void)
+jpeg_set_quality(j_compress_ptr cinfo, int quality, boolean force_baseline)
+/* Set or change the 'quality' (quantization) setting, using default tables.
+ * This is the standard quality-adjusting entry point for typical user
+ * interfaces; only those who want detailed control over quantization tables
+ * would use the preceding three routines directly.
+ */
+{
+  /* Convert user 0-100 rating to percentage scaling */
+  quality = jpeg_quality_scaling(quality);
+
+  /* Set up standard quality tables */
+  jpeg_set_linear_quality(cinfo, quality, force_baseline);
+}
+
+
+/*
+ * Default parameter setup for compression.
+ *
+ * Applications that don't choose to use this routine must do their
+ * own setup of all these parameters.  Alternately, you can call this
+ * to establish defaults and then alter parameters selectively.  This
+ * is the recommended approach since, if we add any new parameters,
+ * your code will still work (they'll be set to reasonable defaults).
+ */
+
+GLOBAL(void)
+jpeg_set_defaults(j_compress_ptr cinfo)
+{
+  int i;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Allocate comp_info array large enough for maximum component count.
+   * Array is made permanent in case application wants to compress
+   * multiple images at same param settings.
+   */
+  if (cinfo->comp_info == NULL)
+    cinfo->comp_info = (jpeg_component_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  MAX_COMPONENTS * sizeof(jpeg_component_info));
+
+  /* Initialize everything not dependent on the color space */
+
+#if JPEG_LIB_VERSION >= 70
+  cinfo->scale_num = 1;         /* 1:1 scaling */
+  cinfo->scale_denom = 1;
+#endif
+  /* Set up two quantization tables using default quality of 75 */
+  jpeg_set_quality(cinfo, 75, TRUE);
+  /* Set up two Huffman tables */
+  std_huff_tables((j_common_ptr)cinfo);
+
+  /* Initialize default arithmetic coding conditioning */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+
+  /* Default is no multiple-scan output */
+  cinfo->scan_info = NULL;
+  cinfo->num_scans = 0;
+
+  /* Default is lossy output */
+  cinfo->master->lossless = FALSE;
+
+  /* Expect normal source image, not raw downsampled data */
+  cinfo->raw_data_in = FALSE;
+
+  /* Use Huffman coding, not arithmetic coding, by default */
+  cinfo->arith_code = FALSE;
+
+  /* By default, don't do extra passes to optimize entropy coding */
+  cinfo->optimize_coding = FALSE;
+  /* The standard Huffman tables are only valid for 8-bit data precision.
+   * If the precision is higher, force optimization on so that usable
+   * tables will be computed.  This test can be removed if default tables
+   * are supplied that are valid for the desired precision.
+   */
+  if (cinfo->data_precision == 12)
+    cinfo->optimize_coding = TRUE;
+
+  /* By default, use the simpler non-cosited sampling alignment */
+  cinfo->CCIR601_sampling = FALSE;
+
+#if JPEG_LIB_VERSION >= 70
+  /* By default, apply fancy downsampling */
+  cinfo->do_fancy_downsampling = TRUE;
+#endif
+
+  /* No input smoothing */
+  cinfo->smoothing_factor = 0;
+
+  /* DCT algorithm preference */
+  cinfo->dct_method = JDCT_DEFAULT;
+
+  /* No restart markers */
+  cinfo->restart_interval = 0;
+  cinfo->restart_in_rows = 0;
+
+  /* Fill in default JFIF marker parameters.  Note that whether the marker
+   * will actually be written is determined by jpeg_set_colorspace.
+   *
+   * By default, the library emits JFIF version code 1.01.
+   * An application that wants to emit JFIF 1.02 extension markers should set
+   * JFIF_minor_version to 2.  We could probably get away with just defaulting
+   * to 1.02, but there may still be some decoders in use that will complain
+   * about that; saying 1.01 should minimize compatibility problems.
+   */
+  cinfo->JFIF_major_version = 1; /* Default JFIF version = 1.01 */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;      /* Pixel size is unknown by default */
+  cinfo->X_density = 1;         /* Pixel aspect ratio is square by default */
+  cinfo->Y_density = 1;
+
+  /* Choose JPEG colorspace based on input space, set defaults accordingly */
+
+  jpeg_default_colorspace(cinfo);
+}
+
+
+/*
+ * Select an appropriate JPEG colorspace for in_color_space.
+ */
+
+GLOBAL(void)
+jpeg_default_colorspace(j_compress_ptr cinfo)
+{
+  switch (cinfo->in_color_space) {
+  case JCS_GRAYSCALE:
+    jpeg_set_colorspace(cinfo, JCS_GRAYSCALE);
+    break;
+  case JCS_RGB:
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+#ifdef C_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless)
+      jpeg_set_colorspace(cinfo, JCS_RGB);
+    else
+#endif
+      jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_YCbCr:
+    jpeg_set_colorspace(cinfo, JCS_YCbCr);
+    break;
+  case JCS_CMYK:
+    jpeg_set_colorspace(cinfo, JCS_CMYK); /* By default, no translation */
+    break;
+  case JCS_YCCK:
+    jpeg_set_colorspace(cinfo, JCS_YCCK);
+    break;
+  case JCS_UNKNOWN:
+    jpeg_set_colorspace(cinfo, JCS_UNKNOWN);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_IN_COLORSPACE);
+  }
+}
+
+
+/*
+ * Set the JPEG colorspace, and choose colorspace-dependent default values.
+ */
+
+GLOBAL(void)
+jpeg_set_colorspace(j_compress_ptr cinfo, J_COLOR_SPACE colorspace)
+{
+  jpeg_component_info *compptr;
+  int ci;
+
+#define SET_COMP(index, id, hsamp, vsamp, quant, dctbl, actbl) \
+  (compptr = &cinfo->comp_info[index], \
+   compptr->component_id = (id), \
+   compptr->h_samp_factor = (hsamp), \
+   compptr->v_samp_factor = (vsamp), \
+   compptr->quant_tbl_no = (quant), \
+   compptr->dc_tbl_no = (dctbl), \
+   compptr->ac_tbl_no = (actbl) )
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* For all colorspaces, we use Q and Huff tables 0 for luminance components,
+   * tables 1 for chrominance components.
+   */
+
+  cinfo->jpeg_color_space = colorspace;
+
+  cinfo->write_JFIF_header = FALSE; /* No marker for non-JFIF colorspaces */
+  cinfo->write_Adobe_marker = FALSE; /* write no Adobe marker by default */
+
+  switch (colorspace) {
+  case JCS_GRAYSCALE:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 1;
+    /* JFIF specifies component ID 1 */
+    SET_COMP(0, 1, 1, 1, 0, 0, 0);
+    break;
+  case JCS_RGB:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag RGB */
+    cinfo->num_components = 3;
+    SET_COMP(0, 0x52 /* 'R' */, 1, 1, 0, 0, 0);
+    SET_COMP(1, 0x47 /* 'G' */, 1, 1, 0, 0, 0);
+    SET_COMP(2, 0x42 /* 'B' */, 1, 1, 0, 0, 0);
+    break;
+  case JCS_YCbCr:
+    cinfo->write_JFIF_header = TRUE; /* Write a JFIF marker */
+    cinfo->num_components = 3;
+    /* JFIF specifies component IDs 1,2,3 */
+    /* We default to 2x2 subsamples of chrominance */
+    SET_COMP(0, 1, 2, 2, 0, 0, 0);
+    SET_COMP(1, 2, 1, 1, 1, 1, 1);
+    SET_COMP(2, 3, 1, 1, 1, 1, 1);
+    break;
+  case JCS_CMYK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag CMYK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 0x43 /* 'C' */, 1, 1, 0, 0, 0);
+    SET_COMP(1, 0x4D /* 'M' */, 1, 1, 0, 0, 0);
+    SET_COMP(2, 0x59 /* 'Y' */, 1, 1, 0, 0, 0);
+    SET_COMP(3, 0x4B /* 'K' */, 1, 1, 0, 0, 0);
+    break;
+  case JCS_YCCK:
+    cinfo->write_Adobe_marker = TRUE; /* write Adobe marker to flag YCCK */
+    cinfo->num_components = 4;
+    SET_COMP(0, 1, 2, 2, 0, 0, 0);
+    SET_COMP(1, 2, 1, 1, 1, 1, 1);
+    SET_COMP(2, 3, 1, 1, 1, 1, 1);
+    SET_COMP(3, 4, 2, 2, 0, 0, 0);
+    break;
+  case JCS_UNKNOWN:
+    cinfo->num_components = cinfo->input_components;
+    if (cinfo->num_components < 1 || cinfo->num_components > MAX_COMPONENTS)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+               MAX_COMPONENTS);
+    for (ci = 0; ci < cinfo->num_components; ci++) {
+      SET_COMP(ci, ci, 1, 1, 0, 0, 0);
+    }
+    break;
+  default:
+    ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+  }
+}
+
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+LOCAL(jpeg_scan_info *)
+fill_a_scan(jpeg_scan_info *scanptr, int ci, int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for specified component */
+{
+  scanptr->comps_in_scan = 1;
+  scanptr->component_index[0] = ci;
+  scanptr->Ss = Ss;
+  scanptr->Se = Se;
+  scanptr->Ah = Ah;
+  scanptr->Al = Al;
+  scanptr++;
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_scans(jpeg_scan_info *scanptr, int ncomps, int Ss, int Se, int Ah, int Al)
+/* Support routine: generate one scan for each component */
+{
+  int ci;
+
+  for (ci = 0; ci < ncomps; ci++) {
+    scanptr->comps_in_scan = 1;
+    scanptr->component_index[0] = ci;
+    scanptr->Ss = Ss;
+    scanptr->Se = Se;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  }
+  return scanptr;
+}
+
+LOCAL(jpeg_scan_info *)
+fill_dc_scans(jpeg_scan_info *scanptr, int ncomps, int Ah, int Al)
+/* Support routine: generate interleaved DC scan if possible, else N scans */
+{
+  int ci;
+
+  if (ncomps <= MAX_COMPS_IN_SCAN) {
+    /* Single interleaved DC scan */
+    scanptr->comps_in_scan = ncomps;
+    for (ci = 0; ci < ncomps; ci++)
+      scanptr->component_index[ci] = ci;
+    scanptr->Ss = scanptr->Se = 0;
+    scanptr->Ah = Ah;
+    scanptr->Al = Al;
+    scanptr++;
+  } else {
+    /* Noninterleaved DC scan for each component */
+    scanptr = fill_scans(scanptr, ncomps, 0, 0, Ah, Al);
+  }
+  return scanptr;
+}
+
+
+/*
+ * Create a recommended progressive-JPEG script.
+ * cinfo->num_components and cinfo->jpeg_color_space must be correct.
+ */
+
+GLOBAL(void)
+jpeg_simple_progression(j_compress_ptr cinfo)
+{
+  int ncomps = cinfo->num_components;
+  int nscans;
+  jpeg_scan_info *scanptr;
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+    cinfo->master->lossless = FALSE;
+    jpeg_default_colorspace(cinfo);
+  }
+#endif
+
+  /* Figure space needed for script.  Calculation must match code below! */
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    nscans = 10;
+  } else {
+    /* All-purpose script for other color spaces. */
+    if (ncomps > MAX_COMPS_IN_SCAN)
+      nscans = 6 * ncomps;      /* 2 DC + 4 AC scans per component */
+    else
+      nscans = 2 + 4 * ncomps;  /* 2 DC scans; 4 AC scans per component */
+  }
+
+  /* Allocate space for script.
+   * We need to put it in the permanent pool in case the application performs
+   * multiple compressions without changing the settings.  To avoid a memory
+   * leak if jpeg_simple_progression is called repeatedly for the same JPEG
+   * object, we try to re-use previously allocated space, and we allocate
+   * enough space to handle YCbCr even if initially asked for grayscale.
+   */
+  if (cinfo->script_space == NULL || cinfo->script_space_size < nscans) {
+    cinfo->script_space_size = MAX(nscans, 10);
+    cinfo->script_space = (jpeg_scan_info *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                        cinfo->script_space_size * sizeof(jpeg_scan_info));
+  }
+  scanptr = cinfo->script_space;
+  cinfo->scan_info = scanptr;
+  cinfo->num_scans = nscans;
+
+  if (ncomps == 3 && cinfo->jpeg_color_space == JCS_YCbCr) {
+    /* Custom script for YCbCr color images. */
+    /* Initial DC scan */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    /* Initial AC scan: get some luma data out in a hurry */
+    scanptr = fill_a_scan(scanptr, 0, 1, 5, 0, 2);
+    /* Chroma data is too small to be worth expending many scans on */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 0, 1);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 0, 1);
+    /* Complete spectral selection for luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 6, 63, 0, 2);
+    /* Refine next bit of luma AC */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 2, 1);
+    /* Finish DC successive approximation */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    /* Finish AC successive approximation */
+    scanptr = fill_a_scan(scanptr, 2, 1, 63, 1, 0);
+    scanptr = fill_a_scan(scanptr, 1, 1, 63, 1, 0);
+    /* Luma bottom bit comes last since it's usually largest scan */
+    scanptr = fill_a_scan(scanptr, 0, 1, 63, 1, 0);
+  } else {
+    /* All-purpose script for other color spaces. */
+    /* Successive approximation first pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 0, 1);
+    scanptr = fill_scans(scanptr, ncomps, 1, 5, 0, 2);
+    scanptr = fill_scans(scanptr, ncomps, 6, 63, 0, 2);
+    /* Successive approximation second pass */
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 2, 1);
+    /* Successive approximation final pass */
+    scanptr = fill_dc_scans(scanptr, ncomps, 1, 0);
+    scanptr = fill_scans(scanptr, ncomps, 1, 63, 1, 0);
+  }
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
+
+
+#ifdef C_LOSSLESS_SUPPORTED
+
+/*
+ * Enable lossless mode.
+ */
+
+GLOBAL(void)
+jpeg_enable_lossless(j_compress_ptr cinfo, int predictor_selection_value,
+                     int point_transform)
+{
+  /* Safety check to ensure start_compress not called yet. */
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  cinfo->master->lossless = TRUE;
+  cinfo->Ss = predictor_selection_value;
+  cinfo->Se = 0;
+  cinfo->Ah = 0;
+  cinfo->Al = point_transform;
+
+  /* The JPEG spec simply gives the range 0..15 for Al (Pt), but that seems
+   * wrong: the upper bound ought to depend on data precision.  Perhaps they
+   * really meant 0..N-1 for N-bit precision, which is what we allow here.
+   * Values greater than or equal to the data precision will result in a blank
+   * image.
+   */
+  if (cinfo->Ss < 1 || cinfo->Ss > 7 ||
+      cinfo->Al < 0 || cinfo->Al >= cinfo->data_precision)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+}
+
+#endif /* C_LOSSLESS_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jcphuff.c b/thirdparty/libjpeg-turbo/src/jcphuff.c
new file mode 100644
index 00000000000..58287328d2d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcphuff.c
@@ -0,0 +1,1102 @@
+/*
+ * jcphuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2015, 2018, 2021-2022, 2024, D. R. Commander.
+ * Copyright (C) 2016, 2018, 2022, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
+ * Copyright (C) 2021, Alex Richardson.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy encoding routines for progressive JPEG.
+ *
+ * We do not support output suspension in this module, since the library
+ * currently does not allow multiple-scan files to be written with output
+ * suspension.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#ifdef WITH_SIMD
+#include "jsimd.h"
+#else
+#include "jchuff.h"             /* Declarations shared with jc*huff.c */
+#endif
+#include <limits.h>
+
+#ifdef HAVE_INTRIN_H
+#include <intrin.h>
+#ifdef _MSC_VER
+#ifdef HAVE_BITSCANFORWARD64
+#pragma intrinsic(_BitScanForward64)
+#endif
+#ifdef HAVE_BITSCANFORWARD
+#pragma intrinsic(_BitScanForward)
+#endif
+#endif
+#endif
+
+#ifdef C_PROGRESSIVE_SUPPORTED
+
+#include "jpeg_nbits.h"
+
+
+/* Expanded entropy encoder object for progressive Huffman encoding. */
+
+typedef struct {
+  struct jpeg_entropy_encoder pub; /* public fields */
+
+  /* Pointer to routine to prepare data for encode_mcu_AC_first() */
+  void (*AC_first_prepare) (const JCOEF *block,
+                            const int *jpeg_natural_order_start, int Sl,
+                            int Al, UJCOEF *values, size_t *zerobits);
+  /* Pointer to routine to prepare data for encode_mcu_AC_refine() */
+  int (*AC_refine_prepare) (const JCOEF *block,
+                            const int *jpeg_natural_order_start, int Sl,
+                            int Al, UJCOEF *absvalues, size_t *bits);
+
+  /* Mode flag: TRUE for optimization, FALSE for actual data output */
+  boolean gather_statistics;
+
+  /* Bit-level coding status.
+   * next_output_byte/free_in_buffer are local copies of cinfo->dest fields.
+   */
+  JOCTET *next_output_byte;     /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+  size_t put_buffer;            /* current bit-accumulation buffer */
+  int put_bits;                 /* # of bits now in it */
+  j_compress_ptr cinfo;         /* link to cinfo (needed for dump_buffer) */
+
+  /* Coding status for DC components */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+
+  /* Coding status for AC components */
+  int ac_tbl_no;                /* the table number of the single component */
+  unsigned int EOBRUN;          /* run length of EOBs */
+  unsigned int BE;              /* # of buffered correction bits before MCU */
+  char *bit_buffer;             /* buffer for correction bits (1 per char) */
+  /* packing correction bits tightly would save some space but cost time... */
+
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+  int next_restart_num;         /* next restart number to write (0-7) */
+
+  /* Pointers to derived tables (these workspaces have image lifespan).
+   * Since any one scan codes only DC or only AC, we only need one set
+   * of tables, not one for DC and one for AC.
+   */
+  c_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
+
+  /* Statistics tables for optimization; again, one set is enough */
+  long *count_ptrs[NUM_HUFF_TBLS];
+} phuff_entropy_encoder;
+
+typedef phuff_entropy_encoder *phuff_entropy_ptr;
+
+/* MAX_CORR_BITS is the number of bits the AC refinement correction-bit
+ * buffer can hold.  Larger sizes may slightly improve compression, but
+ * 1000 is already well into the realm of overkill.
+ * The minimum safe size is 64 bits.
+ */
+
+#define MAX_CORR_BITS  1000     /* Max # of correction bits I can buffer */
+
+/* IRIGHT_SHIFT is like RIGHT_SHIFT, but works on int rather than JLONG.
+ * We assume that int right shift is unsigned if JLONG right shift is,
+ * which should be safe.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS    int ishift_temp;
+#define IRIGHT_SHIFT(x, shft) \
+  ((ishift_temp = (x)) < 0 ? \
+   (ishift_temp >> (shft)) | ((~0) << (16 - (shft))) : \
+   (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
+#endif
+
+#define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
+
+/* Forward declarations */
+METHODDEF(boolean) encode_mcu_DC_first(j_compress_ptr cinfo,
+                                       JBLOCKROW *MCU_data);
+METHODDEF(void) encode_mcu_AC_first_prepare
+  (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
+   UJCOEF *values, size_t *zerobits);
+METHODDEF(boolean) encode_mcu_AC_first(j_compress_ptr cinfo,
+                                       JBLOCKROW *MCU_data);
+METHODDEF(boolean) encode_mcu_DC_refine(j_compress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(int) encode_mcu_AC_refine_prepare
+  (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
+   UJCOEF *absvalues, size_t *bits);
+METHODDEF(boolean) encode_mcu_AC_refine(j_compress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(void) finish_pass_phuff(j_compress_ptr cinfo);
+METHODDEF(void) finish_pass_gather_phuff(j_compress_ptr cinfo);
+
+
+/* Count bit loop zeroes */
+INLINE
+METHODDEF(int)
+count_zeroes(size_t *x)
+{
+#if defined(HAVE_BUILTIN_CTZL)
+  int result;
+  result = __builtin_ctzl(*x);
+  *x >>= result;
+#elif defined(HAVE_BITSCANFORWARD64)
+  unsigned long result;
+  _BitScanForward64(&result, *x);
+  *x >>= result;
+#elif defined(HAVE_BITSCANFORWARD)
+  unsigned long result;
+  _BitScanForward(&result, *x);
+  *x >>= result;
+#else
+  int result = 0;
+  while ((*x & 1) == 0) {
+    ++result;
+    *x >>= 1;
+  }
+#endif
+  return (int)result;
+}
+
+
+/*
+ * Initialize for a Huffman-compressed scan using progressive JPEG.
+ */
+
+METHODDEF(void)
+start_pass_phuff(j_compress_ptr cinfo, boolean gather_statistics)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info *compptr;
+
+  entropy->cinfo = cinfo;
+  entropy->gather_statistics = gather_statistics;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* We assume jcmaster.c already validated the scan parameters. */
+
+  /* Select execution routines */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_first;
+    else
+      entropy->pub.encode_mcu = encode_mcu_AC_first;
+#ifdef WITH_SIMD
+    if (jsimd_can_encode_mcu_AC_first_prepare())
+      entropy->AC_first_prepare = jsimd_encode_mcu_AC_first_prepare;
+    else
+#endif
+      entropy->AC_first_prepare = encode_mcu_AC_first_prepare;
+  } else {
+    if (is_DC_band)
+      entropy->pub.encode_mcu = encode_mcu_DC_refine;
+    else {
+      entropy->pub.encode_mcu = encode_mcu_AC_refine;
+#ifdef WITH_SIMD
+      if (jsimd_can_encode_mcu_AC_refine_prepare())
+        entropy->AC_refine_prepare = jsimd_encode_mcu_AC_refine_prepare;
+      else
+#endif
+        entropy->AC_refine_prepare = encode_mcu_AC_refine_prepare;
+      /* AC refinement needs a correction bit buffer */
+      if (entropy->bit_buffer == NULL)
+        entropy->bit_buffer = (char *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      MAX_CORR_BITS * sizeof(char));
+    }
+  }
+  if (gather_statistics)
+    entropy->pub.finish_pass = finish_pass_gather_phuff;
+  else
+    entropy->pub.finish_pass = finish_pass_phuff;
+
+  /* Only DC coefficients may be interleaved, so cinfo->comps_in_scan = 1
+   * for AC coefficients.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Initialize DC predictions to 0 */
+    entropy->last_dc_val[ci] = 0;
+    /* Get table index */
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      entropy->ac_tbl_no = tbl = compptr->ac_tbl_no;
+    }
+    if (gather_statistics) {
+      /* Check for invalid table index */
+      /* (make_c_derived_tbl does this in the other path) */
+      if (tbl < 0 || tbl >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tbl);
+      /* Allocate and zero the statistics tables */
+      /* Note that jpeg_gen_optimal_table expects 257 entries in each table! */
+      if (entropy->count_ptrs[tbl] == NULL)
+        entropy->count_ptrs[tbl] = (long *)
+          (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                      257 * sizeof(long));
+      memset(entropy->count_ptrs[tbl], 0, 257 * sizeof(long));
+    } else {
+      /* Compute derived values for Huffman table */
+      /* We may do this more than once for a table, but it's not expensive */
+      jpeg_make_c_derived_tbl(cinfo, is_DC_band, tbl,
+                              &entropy->derived_tbls[tbl]);
+    }
+  }
+
+  /* Initialize AC stuff */
+  entropy->EOBRUN = 0;
+  entropy->BE = 0;
+
+  /* Initialize bit buffer to empty */
+  entropy->put_buffer = 0;
+  entropy->put_bits = 0;
+
+  /* Initialize restart stuff */
+  entropy->restarts_to_go = cinfo->restart_interval;
+  entropy->next_restart_num = 0;
+}
+
+
+/* Outputting bytes to the file.
+ * NB: these must be called only when actually outputting,
+ * that is, entropy->gather_statistics == FALSE.
+ */
+
+/* Emit a byte */
+#define emit_byte(entropy, val) { \
+  *(entropy)->next_output_byte++ = (JOCTET)(val); \
+  if (--(entropy)->free_in_buffer == 0) \
+    dump_buffer(entropy); \
+}
+
+
+LOCAL(void)
+dump_buffer(phuff_entropy_ptr entropy)
+/* Empty the output buffer; we do not support suspension in this module. */
+{
+  struct jpeg_destination_mgr *dest = entropy->cinfo->dest;
+
+  if (!(*dest->empty_output_buffer) (entropy->cinfo))
+    ERREXIT(entropy->cinfo, JERR_CANT_SUSPEND);
+  /* After a successful buffer dump, must reset buffer pointers */
+  entropy->next_output_byte = dest->next_output_byte;
+  entropy->free_in_buffer = dest->free_in_buffer;
+}
+
+
+/* Outputting bits to the file */
+
+/* Only the right 24 bits of put_buffer are used; the valid bits are
+ * left-justified in this part.  At most 16 bits can be passed to emit_bits
+ * in one call, and we never retain more than 7 bits in put_buffer
+ * between calls, so 24 bits are sufficient.
+ */
+
+LOCAL(void)
+emit_bits(phuff_entropy_ptr entropy, unsigned int code, int size)
+/* Emit some bits, unless we are in gather mode */
+{
+  /* This routine is heavily used, so it's worth coding tightly. */
+  register size_t put_buffer = (size_t)code;
+  register int put_bits = entropy->put_bits;
+
+  /* if size is 0, caller used an invalid Huffman table entry */
+  if (size == 0)
+    ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+  if (entropy->gather_statistics)
+    return;                     /* do nothing if we're only getting stats */
+
+  put_buffer &= (((size_t)1) << size) - 1; /* mask off any extra bits in code */
+
+  put_bits += size;             /* new number of bits in buffer */
+
+  put_buffer <<= 24 - put_bits; /* align incoming bits */
+
+  put_buffer |= entropy->put_buffer; /* and merge with old buffer contents */
+
+  while (put_bits >= 8) {
+    int c = (int)((put_buffer >> 16) & 0xFF);
+
+    emit_byte(entropy, c);
+    if (c == 0xFF) {            /* need to stuff a zero byte? */
+      emit_byte(entropy, 0);
+    }
+    put_buffer <<= 8;
+    put_bits -= 8;
+  }
+
+  entropy->put_buffer = put_buffer; /* update variables */
+  entropy->put_bits = put_bits;
+}
+
+
+LOCAL(void)
+flush_bits(phuff_entropy_ptr entropy)
+{
+  emit_bits(entropy, 0x7F, 7); /* fill any partial byte with ones */
+  entropy->put_buffer = 0;     /* and reset bit-buffer to empty */
+  entropy->put_bits = 0;
+}
+
+
+/*
+ * Emit (or just count) a Huffman symbol.
+ */
+
+LOCAL(void)
+emit_symbol(phuff_entropy_ptr entropy, int tbl_no, int symbol)
+{
+  if (entropy->gather_statistics)
+    entropy->count_ptrs[tbl_no][symbol]++;
+  else {
+    c_derived_tbl *tbl = entropy->derived_tbls[tbl_no];
+    emit_bits(entropy, tbl->ehufco[symbol], tbl->ehufsi[symbol]);
+  }
+}
+
+
+/*
+ * Emit bits from a correction bit buffer.
+ */
+
+LOCAL(void)
+emit_buffered_bits(phuff_entropy_ptr entropy, char *bufstart,
+                   unsigned int nbits)
+{
+  if (entropy->gather_statistics)
+    return;                     /* no real work */
+
+  while (nbits > 0) {
+    emit_bits(entropy, (unsigned int)(*bufstart), 1);
+    bufstart++;
+    nbits--;
+  }
+}
+
+
+/*
+ * Emit any pending EOBRUN symbol.
+ */
+
+LOCAL(void)
+emit_eobrun(phuff_entropy_ptr entropy)
+{
+  register int temp, nbits;
+
+  if (entropy->EOBRUN > 0) {    /* if there is any pending EOBRUN */
+    temp = entropy->EOBRUN;
+    nbits = JPEG_NBITS_NONZERO(temp) - 1;
+    /* safety check: shouldn't happen given limited correction-bit buffer */
+    if (nbits > 14)
+      ERREXIT(entropy->cinfo, JERR_HUFF_MISSING_CODE);
+
+    emit_symbol(entropy, entropy->ac_tbl_no, nbits << 4);
+    if (nbits)
+      emit_bits(entropy, entropy->EOBRUN, nbits);
+
+    entropy->EOBRUN = 0;
+
+    /* Emit any buffered correction bits */
+    emit_buffered_bits(entropy, entropy->bit_buffer, entropy->BE);
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * Emit a restart marker & resynchronize predictions.
+ */
+
+LOCAL(void)
+emit_restart(phuff_entropy_ptr entropy, int restart_num)
+{
+  int ci;
+
+  emit_eobrun(entropy);
+
+  if (!entropy->gather_statistics) {
+    flush_bits(entropy);
+    emit_byte(entropy, 0xFF);
+    emit_byte(entropy, JPEG_RST0 + restart_num);
+  }
+
+  if (entropy->cinfo->Ss == 0) {
+    /* Re-initialize DC predictions to 0 */
+    for (ci = 0; ci < entropy->cinfo->comps_in_scan; ci++)
+      entropy->last_dc_val[ci] = 0;
+  } else {
+    /* Re-initialize all AC-related fields to 0 */
+    entropy->EOBRUN = 0;
+    entropy->BE = 0;
+  }
+}
+
+
+/*
+ * MCU encoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  register int temp, temp2, temp3;
+  register int nbits;
+  int blkn, ci;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+  jpeg_component_info *compptr;
+  ISHIFT_TEMPS
+  int max_coef_bits = cinfo->data_precision + 2;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Compute the DC value after the required point transform by Al.
+     * This is simply an arithmetic right shift.
+     */
+    temp2 = IRIGHT_SHIFT((int)((*block)[0]), Al);
+
+    /* DC differences are figured on the point-transformed values. */
+    temp = temp2 - entropy->last_dc_val[ci];
+    entropy->last_dc_val[ci] = temp2;
+
+    /* Encode the DC coefficient difference per section G.1.2.1 */
+
+    /* This is a well-known technique for obtaining the absolute value without
+     * a branch.  It is derived from an assembly language technique presented
+     * in "How to Optimize for the Pentium Processors", Copyright (c) 1996,
+     * 1997 by Agner Fog.
+     */
+    temp3 = temp >> (CHAR_BIT * sizeof(int) - 1);
+    temp ^= temp3;
+    temp -= temp3;              /* temp is abs value of input */
+    /* For a negative input, want temp2 = bitwise complement of abs(input) */
+    temp2 = temp ^ temp3;
+
+    /* Find the number of bits needed for the magnitude of the coefficient */
+    nbits = JPEG_NBITS(temp);
+    /* Check for out-of-range coefficient values.
+     * Since we're encoding a difference, the range limit is twice as much.
+     */
+    if (nbits > max_coef_bits + 1)
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+
+    /* Count/emit the Huffman-coded symbol for the number of bits */
+    emit_symbol(entropy, compptr->dc_tbl_no, nbits);
+
+    /* Emit that number of bits of the value, if positive, */
+    /* or the complement of its magnitude, if negative. */
+    if (nbits)                  /* emit_bits rejects calls with size 0 */
+      emit_bits(entropy, (unsigned int)temp2, nbits);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Data preparation for encode_mcu_AC_first().
+ */
+
+#define COMPUTE_ABSVALUES_AC_FIRST(Sl) { \
+  for (k = 0; k < Sl; k++) { \
+    temp = block[jpeg_natural_order_start[k]]; \
+    if (temp == 0) \
+      continue; \
+    /* We must apply the point transform by Al.  For AC coefficients this \
+     * is an integer division with rounding towards 0.  To do this portably \
+     * in C, we shift after obtaining the absolute value; so the code is \
+     * interwoven with finding the abs value (temp) and output bits (temp2). \
+     */ \
+    temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp ^= temp2; \
+    temp -= temp2;              /* temp is abs value of input */ \
+    temp >>= Al;                /* apply the point transform */ \
+    /* Watch out for case that nonzero coef is zero after point transform */ \
+    if (temp == 0) \
+      continue; \
+    /* For a negative coef, want temp2 = bitwise complement of abs(coef) */ \
+    temp2 ^= temp; \
+    values[k] = (UJCOEF)temp; \
+    values[k + DCTSIZE2] = (UJCOEF)temp2; \
+    zerobits |= ((size_t)1U) << k; \
+  } \
+}
+
+METHODDEF(void)
+encode_mcu_AC_first_prepare(const JCOEF *block,
+                            const int *jpeg_natural_order_start, int Sl,
+                            int Al, UJCOEF *values, size_t *bits)
+{
+  register int k, temp, temp2;
+  size_t zerobits = 0U;
+  int Sl0 = Sl;
+
+#if SIZEOF_SIZE_T == 4
+  if (Sl0 > 32)
+    Sl0 = 32;
+#endif
+
+  COMPUTE_ABSVALUES_AC_FIRST(Sl0);
+
+  bits[0] = zerobits;
+#if SIZEOF_SIZE_T == 4
+  zerobits = 0U;
+
+  if (Sl > 32) {
+    Sl -= 32;
+    jpeg_natural_order_start += 32;
+    values += 32;
+
+    COMPUTE_ABSVALUES_AC_FIRST(Sl);
+  }
+  bits[1] = zerobits;
+#endif
+}
+
+/*
+ * MCU encoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+#define ENCODE_COEFS_AC_FIRST(label) { \
+  while (zerobits) { \
+    r = count_zeroes(&zerobits); \
+    cvalue += r; \
+label \
+    temp  = cvalue[0]; \
+    temp2 = cvalue[DCTSIZE2]; \
+    \
+    /* if run length > 15, must emit special run-length-16 codes (0xF0) */ \
+    while (r > 15) { \
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
+      r -= 16; \
+    } \
+    \
+    /* Find the number of bits needed for the magnitude of the coefficient */ \
+    nbits = JPEG_NBITS_NONZERO(temp);  /* there must be at least one 1 bit */ \
+    /* Check for out-of-range coefficient values */ \
+    if (nbits > max_coef_bits) \
+      ERREXIT(cinfo, JERR_BAD_DCT_COEF); \
+    \
+    /* Count/emit Huffman symbol for run length / number of bits */ \
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + nbits); \
+    \
+    /* Emit that number of bits of the value, if positive, */ \
+    /* or the complement of its magnitude, if negative. */ \
+    emit_bits(entropy, (unsigned int)temp2, nbits); \
+    \
+    cvalue++; \
+    zerobits >>= 1; \
+  } \
+}
+
+METHODDEF(boolean)
+encode_mcu_AC_first(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  register int temp, temp2;
+  register int nbits, r;
+  int Sl = cinfo->Se - cinfo->Ss + 1;
+  int Al = cinfo->Al;
+  UJCOEF values_unaligned[2 * DCTSIZE2 + 15];
+  UJCOEF *values;
+  const UJCOEF *cvalue;
+  size_t zerobits;
+  size_t bits[8 / SIZEOF_SIZE_T];
+  int max_coef_bits = cinfo->data_precision + 2;
+
+#ifdef ZERO_BUFFERS
+  memset(values_unaligned, 0, sizeof(values_unaligned));
+  memset(bits, 0, sizeof(bits));
+#endif
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+#ifdef WITH_SIMD
+  cvalue = values = (UJCOEF *)PAD((JUINTPTR)values_unaligned, 16);
+#else
+  /* Not using SIMD, so alignment is not needed */
+  cvalue = values = values_unaligned;
+#endif
+
+  /* Prepare data */
+  entropy->AC_first_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
+                            Sl, Al, values, bits);
+
+  zerobits = bits[0];
+#if SIZEOF_SIZE_T == 4
+  zerobits |= bits[1];
+#endif
+
+  /* Emit any pending EOBRUN */
+  if (zerobits && (entropy->EOBRUN > 0))
+    emit_eobrun(entropy);
+
+#if SIZEOF_SIZE_T == 4
+  zerobits = bits[0];
+#endif
+
+  /* Encode the AC coefficients per section G.1.2.2, fig. G.3 */
+
+  ENCODE_COEFS_AC_FIRST((void)0;);
+
+#if SIZEOF_SIZE_T == 4
+  zerobits = bits[1];
+  if (zerobits) {
+    int diff = ((values + DCTSIZE2 / 2) - cvalue);
+    r = count_zeroes(&zerobits);
+    r += diff;
+    cvalue += r;
+    goto first_iter_ac_first;
+  }
+
+  ENCODE_COEFS_AC_FIRST(first_iter_ac_first:);
+#endif
+
+  if (cvalue < (values + Sl)) { /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
+    if (entropy->EOBRUN == 0x7FFF)
+      emit_eobrun(entropy);     /* force it out to avoid overflow */
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU encoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+encode_mcu_DC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  register int temp;
+  int blkn;
+  int Al = cinfo->Al;
+  JBLOCKROW block;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+  /* Encode the MCU data blocks */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* We simply emit the Al'th bit of the DC coefficient value. */
+    temp = (*block)[0];
+    emit_bits(entropy, (unsigned int)(temp >> Al), 1);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Data preparation for encode_mcu_AC_refine().
+ */
+
+#define COMPUTE_ABSVALUES_AC_REFINE(Sl, koffset) { \
+  /* It is convenient to make a pre-pass to determine the transformed \
+   * coefficients' absolute values and the EOB position. \
+   */ \
+  for (k = 0; k < Sl; k++) { \
+    temp = block[jpeg_natural_order_start[k]]; \
+    /* We must apply the point transform by Al.  For AC coefficients this \
+     * is an integer division with rounding towards 0.  To do this portably \
+     * in C, we shift after obtaining the absolute value. \
+     */ \
+    temp2 = temp >> (CHAR_BIT * sizeof(int) - 1); \
+    temp ^= temp2; \
+    temp -= temp2;              /* temp is abs value of input */ \
+    temp >>= Al;                /* apply the point transform */ \
+    if (temp != 0) { \
+      zerobits |= ((size_t)1U) << k; \
+      signbits |= ((size_t)(temp2 + 1)) << k; \
+    } \
+    absvalues[k] = (UJCOEF)temp; /* save abs value for main pass */ \
+    if (temp == 1) \
+      EOB = k + koffset;        /* EOB = index of last newly-nonzero coef */ \
+  } \
+}
+
+METHODDEF(int)
+encode_mcu_AC_refine_prepare(const JCOEF *block,
+                             const int *jpeg_natural_order_start, int Sl,
+                             int Al, UJCOEF *absvalues, size_t *bits)
+{
+  register int k, temp, temp2;
+  int EOB = 0;
+  size_t zerobits = 0U, signbits = 0U;
+  int Sl0 = Sl;
+
+#if SIZEOF_SIZE_T == 4
+  if (Sl0 > 32)
+    Sl0 = 32;
+#endif
+
+  COMPUTE_ABSVALUES_AC_REFINE(Sl0, 0);
+
+  bits[0] = zerobits;
+#if SIZEOF_SIZE_T == 8
+  bits[1] = signbits;
+#else
+  bits[2] = signbits;
+
+  zerobits = 0U;
+  signbits = 0U;
+
+  if (Sl > 32) {
+    Sl -= 32;
+    jpeg_natural_order_start += 32;
+    absvalues += 32;
+
+    COMPUTE_ABSVALUES_AC_REFINE(Sl, 32);
+  }
+
+  bits[1] = zerobits;
+  bits[3] = signbits;
+#endif
+
+  return EOB;
+}
+
+
+/*
+ * MCU encoding for AC successive approximation refinement scan.
+ */
+
+#define ENCODE_COEFS_AC_REFINE(label) { \
+  while (zerobits) { \
+    idx = count_zeroes(&zerobits); \
+    r += idx; \
+    cabsvalue += idx; \
+    signbits >>= idx; \
+label \
+    /* Emit any required ZRLs, but not if they can be folded into EOB */ \
+    while (r > 15 && (cabsvalue <= EOBPTR)) { \
+      /* emit any pending EOBRUN and the BE correction bits */ \
+      emit_eobrun(entropy); \
+      /* Emit ZRL */ \
+      emit_symbol(entropy, entropy->ac_tbl_no, 0xF0); \
+      r -= 16; \
+      /* Emit buffered correction bits that must be associated with ZRL */ \
+      emit_buffered_bits(entropy, BR_buffer, BR); \
+      BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
+      BR = 0; \
+    } \
+    \
+    temp = *cabsvalue++; \
+    \
+    /* If the coef was previously nonzero, it only needs a correction bit. \
+     * NOTE: a straight translation of the spec's figure G.7 would suggest \
+     * that we also need to test r > 15.  But if r > 15, we can only get here \
+     * if k > EOB, which implies that this coefficient is not 1. \
+     */ \
+    if (temp > 1) { \
+      /* The correction bit is the next bit of the absolute value. */ \
+      BR_buffer[BR++] = (char)(temp & 1); \
+      signbits >>= 1; \
+      zerobits >>= 1; \
+      continue; \
+    } \
+    \
+    /* Emit any pending EOBRUN and the BE correction bits */ \
+    emit_eobrun(entropy); \
+    \
+    /* Count/emit Huffman symbol for run length / number of bits */ \
+    emit_symbol(entropy, entropy->ac_tbl_no, (r << 4) + 1); \
+    \
+    /* Emit output bit for newly-nonzero coef */ \
+    temp = signbits & 1; /* ((*block)[jpeg_natural_order_start[k]] < 0) ? 0 : 1 */ \
+    emit_bits(entropy, (unsigned int)temp, 1); \
+    \
+    /* Emit buffered correction bits that must be associated with this code */ \
+    emit_buffered_bits(entropy, BR_buffer, BR); \
+    BR_buffer = entropy->bit_buffer; /* BE bits are gone now */ \
+    BR = 0; \
+    r = 0;                      /* reset zero run length */ \
+    signbits >>= 1; \
+    zerobits >>= 1; \
+  } \
+}
+
+METHODDEF(boolean)
+encode_mcu_AC_refine(j_compress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  register int temp, r, idx;
+  char *BR_buffer;
+  unsigned int BR;
+  int Sl = cinfo->Se - cinfo->Ss + 1;
+  int Al = cinfo->Al;
+  UJCOEF absvalues_unaligned[DCTSIZE2 + 15];
+  UJCOEF *absvalues;
+  const UJCOEF *cabsvalue, *EOBPTR;
+  size_t zerobits, signbits;
+  size_t bits[16 / SIZEOF_SIZE_T];
+
+#ifdef ZERO_BUFFERS
+  memset(absvalues_unaligned, 0, sizeof(absvalues_unaligned));
+  memset(bits, 0, sizeof(bits));
+#endif
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Emit restart marker if needed */
+  if (cinfo->restart_interval)
+    if (entropy->restarts_to_go == 0)
+      emit_restart(entropy, entropy->next_restart_num);
+
+#ifdef WITH_SIMD
+  cabsvalue = absvalues = (UJCOEF *)PAD((JUINTPTR)absvalues_unaligned, 16);
+#else
+  /* Not using SIMD, so alignment is not needed */
+  cabsvalue = absvalues = absvalues_unaligned;
+#endif
+
+  /* Prepare data */
+  EOBPTR = absvalues +
+    entropy->AC_refine_prepare(MCU_data[0][0], jpeg_natural_order + cinfo->Ss,
+                               Sl, Al, absvalues, bits);
+
+  /* Encode the AC coefficients per section G.1.2.3, fig. G.7 */
+
+  r = 0;                        /* r = run length of zeros */
+  BR = 0;                       /* BR = count of buffered bits added now */
+  BR_buffer = entropy->bit_buffer + entropy->BE; /* Append bits to buffer */
+
+  zerobits = bits[0];
+#if SIZEOF_SIZE_T == 8
+  signbits = bits[1];
+#else
+  signbits = bits[2];
+#endif
+  ENCODE_COEFS_AC_REFINE((void)0;);
+
+#if SIZEOF_SIZE_T == 4
+  zerobits = bits[1];
+  signbits = bits[3];
+
+  if (zerobits) {
+    int diff = ((absvalues + DCTSIZE2 / 2) - cabsvalue);
+    idx = count_zeroes(&zerobits);
+    signbits >>= idx;
+    idx += diff;
+    r += idx;
+    cabsvalue += idx;
+    goto first_iter_ac_refine;
+  }
+
+  ENCODE_COEFS_AC_REFINE(first_iter_ac_refine:);
+#endif
+
+  r |= (int)((absvalues + Sl) - cabsvalue);
+
+  if (r > 0 || BR > 0) {        /* If there are trailing zeroes, */
+    entropy->EOBRUN++;          /* count an EOB */
+    entropy->BE += BR;          /* concat my correction bits to older ones */
+    /* We force out the EOB if we risk either:
+     * 1. overflow of the EOB counter;
+     * 2. overflow of the correction bit buffer during the next MCU.
+     */
+    if (entropy->EOBRUN == 0x7FFF ||
+        entropy->BE > (MAX_CORR_BITS - DCTSIZE2 + 1))
+      emit_eobrun(entropy);
+  }
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+
+  /* Update restart-interval state too */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0) {
+      entropy->restarts_to_go = cinfo->restart_interval;
+      entropy->next_restart_num++;
+      entropy->next_restart_num &= 7;
+    }
+    entropy->restarts_to_go--;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Finish up at the end of a Huffman-compressed progressive scan.
+ */
+
+METHODDEF(void)
+finish_pass_phuff(j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+
+  entropy->next_output_byte = cinfo->dest->next_output_byte;
+  entropy->free_in_buffer = cinfo->dest->free_in_buffer;
+
+  /* Flush out any buffered data */
+  emit_eobrun(entropy);
+  flush_bits(entropy);
+
+  cinfo->dest->next_output_byte = entropy->next_output_byte;
+  cinfo->dest->free_in_buffer = entropy->free_in_buffer;
+}
+
+
+/*
+ * Finish up a statistics-gathering pass and create the new Huffman tables.
+ */
+
+METHODDEF(void)
+finish_pass_gather_phuff(j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  boolean is_DC_band;
+  int ci, tbl;
+  jpeg_component_info *compptr;
+  JHUFF_TBL **htblptr;
+  boolean did[NUM_HUFF_TBLS];
+
+  /* Flush out buffered data (all we care about is counting the EOB symbol) */
+  emit_eobrun(entropy);
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* It's important not to apply jpeg_gen_optimal_table more than once
+   * per table, because it clobbers the input frequency counts!
+   */
+  memset(did, 0, sizeof(did));
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (is_DC_band) {
+      if (cinfo->Ah != 0)       /* DC refinement needs no table */
+        continue;
+      tbl = compptr->dc_tbl_no;
+    } else {
+      tbl = compptr->ac_tbl_no;
+    }
+    if (!did[tbl]) {
+      if (is_DC_band)
+        htblptr = &cinfo->dc_huff_tbl_ptrs[tbl];
+      else
+        htblptr = &cinfo->ac_huff_tbl_ptrs[tbl];
+      if (*htblptr == NULL)
+        *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
+      jpeg_gen_optimal_table(cinfo, *htblptr, entropy->count_ptrs[tbl]);
+      did[tbl] = TRUE;
+    }
+  }
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy encoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_encoder(j_compress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(phuff_entropy_encoder));
+  cinfo->entropy = (struct jpeg_entropy_encoder *)entropy;
+  entropy->pub.start_pass = start_pass_phuff;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+    entropy->count_ptrs[i] = NULL;
+  }
+  entropy->bit_buffer = NULL;   /* needed only in AC refinement scan */
+}
+
+#endif /* C_PROGRESSIVE_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jcprepct.c b/thirdparty/libjpeg-turbo/src/jcprepct.c
new file mode 100644
index 00000000000..725856d7383
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcprepct.c
@@ -0,0 +1,378 @@
+/*
+ * jcprepct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the compression preprocessing controller.
+ * This controller manages the color conversion, downsampling,
+ * and edge expansion steps.
+ *
+ * Most of the complexity here is associated with buffering input rows
+ * as required by the downsampler.  See the comments at the head of
+ * jcsample.c for the downsampler's needs.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
+/* At present, jcsample.c can request context rows only for smoothing.
+ * In the future, we might also need context rows for CCIR601 sampling
+ * or other more-complex downsampling procedures.  The code to support
+ * context rows should be compiled only if needed.
+ */
+#ifdef INPUT_SMOOTHING_SUPPORTED
+#define CONTEXT_ROWS_SUPPORTED
+#endif
+
+
+/*
+ * For the simple (no-context-row) case, we just need to buffer one
+ * row group's worth of pixels for the downsampling step.  At the bottom of
+ * the image, we pad to a full row group by replicating the last pixel row.
+ * The downsampler's last output row is then replicated if needed to pad
+ * out to a full iMCU row.
+ *
+ * When providing context rows, we must buffer three row groups' worth of
+ * pixels.  Three row groups are physically allocated, but the row pointer
+ * arrays are made five row groups high, with the extra pointers above and
+ * below "wrapping around" to point to the last and first real row groups.
+ * This allows the downsampler to access the proper context rows.
+ * At the top and bottom of the image, we create dummy context rows by
+ * copying the first or last real pixel row.  This copying could be avoided
+ * by pointer hacking as is done in jdmainct.c, but it doesn't seem worth the
+ * trouble on the compression side.
+ */
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_prep_controller pub; /* public fields */
+
+  /* Downsampling input buffer.  This buffer holds color-converted data
+   * until we have enough to do a downsample step.
+   */
+  _JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  JDIMENSION rows_to_go;        /* counts rows remaining in source image */
+  int next_buf_row;             /* index of next row to store in color_buf */
+
+#ifdef CONTEXT_ROWS_SUPPORTED   /* only needed for context case */
+  int this_row_group;           /* starting row index of group to process */
+  int next_buf_stop;            /* downsample when we reach this index */
+#endif
+} my_prep_controller;
+
+typedef my_prep_controller *my_prep_ptr;
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_prep(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
+
+  if (pass_mode != JBUF_PASS_THRU)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Initialize total-height counter for detecting bottom of image */
+  prep->rows_to_go = cinfo->image_height;
+  /* Mark the conversion buffer empty */
+  prep->next_buf_row = 0;
+#ifdef CONTEXT_ROWS_SUPPORTED
+  /* Preset additional state variables for context mode.
+   * These aren't used in non-context mode, so we needn't test which mode.
+   */
+  prep->this_row_group = 0;
+  /* Set next_buf_stop to stop after two row groups have been read in. */
+  prep->next_buf_stop = 2 * cinfo->max_v_samp_factor;
+#endif
+}
+
+
+/*
+ * Expand an image vertically from height input_rows to height output_rows,
+ * by duplicating the bottom row.
+ */
+
+LOCAL(void)
+expand_bottom_edge(_JSAMPARRAY image_data, JDIMENSION num_cols, int input_rows,
+                   int output_rows)
+{
+  register int row;
+
+  for (row = input_rows; row < output_rows; row++) {
+    _jcopy_sample_rows(image_data, input_rows - 1, image_data, row, 1,
+                       num_cols);
+  }
+}
+
+
+/*
+ * Process some data in the simple no-context case.
+ *
+ * Preprocessor output data is counted in "row groups".  A row group
+ * is defined to be v_samp_factor sample rows of each component.
+ * Downsampling will produce this much data from each max_v_samp_factor
+ * input rows.
+ */
+
+METHODDEF(void)
+pre_process_data(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                 JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
+                 _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                 JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
+  int numrows, ci;
+  JDIMENSION inrows;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  while (*in_row_ctr < in_rows_avail &&
+         *out_row_group_ctr < out_row_groups_avail) {
+    /* Do color conversion to fill the conversion buffer. */
+    inrows = in_rows_avail - *in_row_ctr;
+    numrows = cinfo->max_v_samp_factor - prep->next_buf_row;
+    numrows = (int)MIN((JDIMENSION)numrows, inrows);
+    (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
+                                        prep->color_buf,
+                                        (JDIMENSION)prep->next_buf_row,
+                                        numrows);
+    *in_row_ctr += numrows;
+    prep->next_buf_row += numrows;
+    prep->rows_to_go -= numrows;
+    /* If at bottom of image, pad to fill the conversion buffer. */
+    if (prep->rows_to_go == 0 &&
+        prep->next_buf_row < cinfo->max_v_samp_factor) {
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+        expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                           prep->next_buf_row, cinfo->max_v_samp_factor);
+      }
+      prep->next_buf_row = cinfo->max_v_samp_factor;
+    }
+    /* If we've filled the conversion buffer, empty it. */
+    if (prep->next_buf_row == cinfo->max_v_samp_factor) {
+      (*cinfo->downsample->_downsample) (cinfo,
+                                         prep->color_buf, (JDIMENSION)0,
+                                         output_buf, *out_row_group_ctr);
+      prep->next_buf_row = 0;
+      (*out_row_group_ctr)++;
+    }
+    /* If at bottom of image, pad the output to a full iMCU height.
+     * Note we assume the caller is providing a one-iMCU-height output buffer!
+     */
+    if (prep->rows_to_go == 0 && *out_row_group_ctr < out_row_groups_avail) {
+      for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+           ci++, compptr++) {
+        expand_bottom_edge(output_buf[ci],
+                           compptr->width_in_blocks * data_unit,
+                           (int)(*out_row_group_ctr * compptr->v_samp_factor),
+                           (int)(out_row_groups_avail * compptr->v_samp_factor));
+      }
+      *out_row_group_ctr = out_row_groups_avail;
+      break;                    /* can exit outer loop without test */
+    }
+  }
+}
+
+
+#ifdef CONTEXT_ROWS_SUPPORTED
+
+/*
+ * Process some data in the context case.
+ */
+
+METHODDEF(void)
+pre_process_context(j_compress_ptr cinfo, _JSAMPARRAY input_buf,
+                    JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
+                    _JSAMPIMAGE output_buf, JDIMENSION *out_row_group_ctr,
+                    JDIMENSION out_row_groups_avail)
+{
+  my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
+  int numrows, ci;
+  int buf_height = cinfo->max_v_samp_factor * 3;
+  JDIMENSION inrows;
+
+  while (*out_row_group_ctr < out_row_groups_avail) {
+    if (*in_row_ctr < in_rows_avail) {
+      /* Do color conversion to fill the conversion buffer. */
+      inrows = in_rows_avail - *in_row_ctr;
+      numrows = prep->next_buf_stop - prep->next_buf_row;
+      numrows = (int)MIN((JDIMENSION)numrows, inrows);
+      (*cinfo->cconvert->_color_convert) (cinfo, input_buf + *in_row_ctr,
+                                          prep->color_buf,
+                                          (JDIMENSION)prep->next_buf_row,
+                                          numrows);
+      /* Pad at top of image, if first time through */
+      if (prep->rows_to_go == cinfo->image_height) {
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          int row;
+          for (row = 1; row <= cinfo->max_v_samp_factor; row++) {
+            _jcopy_sample_rows(prep->color_buf[ci], 0, prep->color_buf[ci],
+                               -row, 1, cinfo->image_width);
+          }
+        }
+      }
+      *in_row_ctr += numrows;
+      prep->next_buf_row += numrows;
+      prep->rows_to_go -= numrows;
+    } else {
+      /* Return for more data, unless we are at the bottom of the image. */
+      if (prep->rows_to_go != 0)
+        break;
+      /* When at bottom of image, pad to fill the conversion buffer. */
+      if (prep->next_buf_row < prep->next_buf_stop) {
+        for (ci = 0; ci < cinfo->num_components; ci++) {
+          expand_bottom_edge(prep->color_buf[ci], cinfo->image_width,
+                             prep->next_buf_row, prep->next_buf_stop);
+        }
+        prep->next_buf_row = prep->next_buf_stop;
+      }
+    }
+    /* If we've gotten enough data, downsample a row group. */
+    if (prep->next_buf_row == prep->next_buf_stop) {
+      (*cinfo->downsample->_downsample) (cinfo, prep->color_buf,
+                                         (JDIMENSION)prep->this_row_group,
+                                         output_buf, *out_row_group_ctr);
+      (*out_row_group_ctr)++;
+      /* Advance pointers with wraparound as necessary. */
+      prep->this_row_group += cinfo->max_v_samp_factor;
+      if (prep->this_row_group >= buf_height)
+        prep->this_row_group = 0;
+      if (prep->next_buf_row >= buf_height)
+        prep->next_buf_row = 0;
+      prep->next_buf_stop = prep->next_buf_row + cinfo->max_v_samp_factor;
+    }
+  }
+}
+
+
+/*
+ * Create the wrapped-around downsampling input buffer needed for context mode.
+ */
+
+LOCAL(void)
+create_context_buffer(j_compress_ptr cinfo)
+{
+  my_prep_ptr prep = (my_prep_ptr)cinfo->prep;
+  int rgroup_height = cinfo->max_v_samp_factor;
+  int ci, i;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY true_buffer, fake_buffer;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  /* Grab enough space for fake row pointers for all the components;
+   * we need five row groups' worth of pointers for each component.
+   */
+  fake_buffer = (_JSAMPARRAY)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (cinfo->num_components * 5 * rgroup_height) *
+                                sizeof(_JSAMPROW));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate the actual buffer space (3 row groups) for this component.
+     * We make the buffer wide enough to allow the downsampler to edge-expand
+     * horizontally within the buffer, if it so chooses.
+     */
+    true_buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
+                     cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+       (JDIMENSION)(3 * rgroup_height));
+    /* Copy true buffer row pointers into the middle of the fake row array */
+    memcpy(fake_buffer + rgroup_height, true_buffer,
+           3 * rgroup_height * sizeof(_JSAMPROW));
+    /* Fill in the above and below wraparound pointers */
+    for (i = 0; i < rgroup_height; i++) {
+      fake_buffer[i] = true_buffer[2 * rgroup_height + i];
+      fake_buffer[4 * rgroup_height + i] = true_buffer[i];
+    }
+    prep->color_buf[ci] = fake_buffer + rgroup_height;
+    fake_buffer += 5 * rgroup_height; /* point to space for next component */
+  }
+}
+
+#endif /* CONTEXT_ROWS_SUPPORTED */
+
+
+/*
+ * Initialize preprocessing controller.
+ */
+
+GLOBAL(void)
+_jinit_c_prep_controller(j_compress_ptr cinfo, boolean need_full_buffer)
+{
+  my_prep_ptr prep;
+  int ci;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  if (need_full_buffer)         /* safety check */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  prep = (my_prep_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_prep_controller));
+  cinfo->prep = (struct jpeg_c_prep_controller *)prep;
+  prep->pub.start_pass = start_pass_prep;
+
+  /* Allocate the color conversion buffer.
+   * We make the buffer wide enough to allow the downsampler to edge-expand
+   * horizontally within the buffer, if it so chooses.
+   */
+  if (cinfo->downsample->need_context_rows) {
+    /* Set up to provide context rows */
+#ifdef CONTEXT_ROWS_SUPPORTED
+    prep->pub._pre_process_data = pre_process_context;
+    create_context_buffer(cinfo);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* No context, just make it tall enough for one row group */
+    prep->pub._pre_process_data = pre_process_data;
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      prep->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+         (JDIMENSION)(((long)compptr->width_in_blocks * data_unit *
+                       cinfo->max_h_samp_factor) / compptr->h_samp_factor),
+         (JDIMENSION)cinfo->max_v_samp_factor);
+    }
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jcsample.c b/thirdparty/libjpeg-turbo/src/jcsample.c
new file mode 100644
index 00000000000..ca3bea131f5
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jcsample.c
@@ -0,0 +1,556 @@
+/*
+ * jcsample.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2014, MIPS Technologies, Inc., California.
+ * Copyright (C) 2015, 2019, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains downsampling routines.
+ *
+ * Downsampling input data is counted in "row groups".  A row group
+ * is defined to be max_v_samp_factor pixel rows of each component,
+ * from which the downsampler produces v_samp_factor sample rows.
+ * A single row group is processed in each call to the downsampler module.
+ *
+ * The downsampler is responsible for edge-expansion of its output data
+ * to fill an integral number of DCT blocks horizontally.  The source buffer
+ * may be modified if it is helpful for this purpose (the source buffer is
+ * allocated wide enough to correspond to the desired output width).
+ * The caller (the prep controller) is responsible for vertical padding.
+ *
+ * The downsampler may request "context rows" by setting need_context_rows
+ * during startup.  In this case, the input arrays will contain at least
+ * one row group's worth of pixels above and below the passed-in data;
+ * the caller will create dummy rows at image top and bottom by replicating
+ * the first or last real pixel row.
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ *
+ * The downsampling algorithm used here is a simple average of the source
+ * pixels covered by the output pixel.  The hi-falutin sampling literature
+ * refers to this as a "box filter".  In general the characteristics of a box
+ * filter are not very good, but for the specific cases we normally use (1:1
+ * and 2:1 ratios) the box is equivalent to a "triangle filter" which is not
+ * nearly so bad.  If you intend to use other sampling ratios, you'd be well
+ * advised to improve this code.
+ *
+ * A simple input-smoothing capability is provided.  This is mainly intended
+ * for cleaning up color-dithered GIF input files (if you find it inadequate,
+ * we suggest using an external filtering program such as pnmconvol).  When
+ * enabled, each input pixel P is replaced by a weighted sum of itself and its
+ * eight neighbors.  P's weight is 1-8*SF and each neighbor's weight is SF,
+ * where SF = (smoothing_factor / 1024).
+ * Currently, smoothing is only supported for 2h2v sampling factors.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsimd.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED)
+
+/* Pointer to routine to downsample a single component */
+typedef void (*downsample1_ptr) (j_compress_ptr cinfo,
+                                 jpeg_component_info *compptr,
+                                 _JSAMPARRAY input_data,
+                                 _JSAMPARRAY output_data);
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_downsampler pub;  /* public fields */
+
+  /* Downsampling method pointers, one per component */
+  downsample1_ptr methods[MAX_COMPONENTS];
+} my_downsampler;
+
+typedef my_downsampler *my_downsample_ptr;
+
+
+/*
+ * Initialize for a downsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_downsample(j_compress_ptr cinfo)
+{
+  /* no work for now */
+}
+
+
+/*
+ * Expand a component horizontally from width input_cols to width output_cols,
+ * by duplicating the rightmost samples.
+ */
+
+LOCAL(void)
+expand_right_edge(_JSAMPARRAY image_data, int num_rows, JDIMENSION input_cols,
+                  JDIMENSION output_cols)
+{
+  register _JSAMPROW ptr;
+  register _JSAMPLE pixval;
+  register int count;
+  int row;
+  int numcols = (int)(output_cols - input_cols);
+
+  if (numcols > 0) {
+    for (row = 0; row < num_rows; row++) {
+      ptr = image_data[row] + input_cols;
+      pixval = ptr[-1];
+      for (count = numcols; count > 0; count--)
+        *ptr++ = pixval;
+    }
+  }
+}
+
+
+/*
+ * Do downsampling for a whole row group (all components).
+ *
+ * In this version we simply downsample each component independently.
+ */
+
+METHODDEF(void)
+sep_downsample(j_compress_ptr cinfo, _JSAMPIMAGE input_buf,
+               JDIMENSION in_row_index, _JSAMPIMAGE output_buf,
+               JDIMENSION out_row_group_index)
+{
+  my_downsample_ptr downsample = (my_downsample_ptr)cinfo->downsample;
+  int ci;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY in_ptr, out_ptr;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    in_ptr = input_buf[ci] + in_row_index;
+    out_ptr = output_buf[ci] + (out_row_group_index * compptr->v_samp_factor);
+    (*downsample->methods[ci]) (cinfo, compptr, in_ptr, out_ptr);
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * One row group is processed per call.
+ * This version handles arbitrary integral sampling ratios, without smoothing.
+ * Note that this version is not actually used for customary sampling ratios.
+ */
+
+METHODDEF(void)
+int_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+               _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int inrow, outrow, h_expand, v_expand, numpix, numpix2, h, v;
+  JDIMENSION outcol, outcol_h;  /* outcol_h == outcol*h_expand */
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  _JSAMPROW inptr, outptr;
+  JLONG outvalue;
+
+  h_expand = cinfo->max_h_samp_factor / compptr->h_samp_factor;
+  v_expand = cinfo->max_v_samp_factor / compptr->v_samp_factor;
+  numpix = h_expand * v_expand;
+  numpix2 = numpix / 2;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width,
+                    output_cols * h_expand);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    for (outcol = 0, outcol_h = 0; outcol < output_cols;
+         outcol++, outcol_h += h_expand) {
+      outvalue = 0;
+      for (v = 0; v < v_expand; v++) {
+        inptr = input_data[inrow + v] + outcol_h;
+        for (h = 0; h < h_expand; h++) {
+          outvalue += (JLONG)(*inptr++);
+        }
+      }
+      *outptr++ = (_JSAMPLE)((outvalue + numpix2) / numpix);
+    }
+    inrow += v_expand;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+fullsize_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                    _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  /* Copy the data */
+  _jcopy_sample_rows(input_data, 0, output_data, 0, cinfo->max_v_samp_factor,
+                     cinfo->image_width);
+  /* Edge-expand */
+  expand_right_edge(output_data, cinfo->max_v_samp_factor, cinfo->image_width,
+                    compptr->width_in_blocks * data_unit);
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the common case of 2:1 horizontal and 1:1 vertical,
+ * without smoothing.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION outcol;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width,
+                    output_cols * 2);
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    bias = 0;                   /* bias = 0,1,0,1,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (_JSAMPLE)((inptr[0] + inptr[1] + bias) >> 1);
+      bias ^= 1;                /* 0=>1, 1=>0 */
+      inptr += 2;
+    }
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * without smoothing.
+ */
+
+METHODDEF(void)
+h2v2_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION outcol;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr0, inptr1, outptr;
+  register int bias;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data, cinfo->max_v_samp_factor, cinfo->image_width,
+                    output_cols * 2);
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow + 1];
+    bias = 1;                   /* bias = 1,2,1,2,... for successive samples */
+    for (outcol = 0; outcol < output_cols; outcol++) {
+      *outptr++ = (_JSAMPLE)
+        ((inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1] + bias) >> 2);
+      bias ^= 3;                /* 1=>2, 2=>1 */
+      inptr0 += 2;  inptr1 += 2;
+    }
+    inrow += 2;
+  }
+}
+
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the standard case of 2:1 horizontal and 2:1 vertical,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+h2v2_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                       _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int inrow, outrow;
+  JDIMENSION colctr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr0, inptr1, above_ptr, below_ptr, outptr;
+  JLONG membersum, neighsum, memberscale, neighscale;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+                    cinfo->image_width, output_cols * 2);
+
+  /* We don't bother to form the individual "smoothed" input pixel values;
+   * we can directly compute the output which is the average of the four
+   * smoothed values.  Each of the four member pixels contributes a fraction
+   * (1-8*SF) to its own smoothed image and a fraction SF to each of the three
+   * other smoothed pixels, therefore a total fraction (1-5*SF)/4 to the final
+   * output.  The four corner-adjacent neighbor pixels contribute a fraction
+   * SF to just one smoothed pixel, or SF/4 to the final output; while the
+   * eight edge-adjacent neighbors contribute SF to each of two smoothed
+   * pixels, or SF/2 overall.  In order to use integer arithmetic, these
+   * factors are scaled by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 16384 - cinfo->smoothing_factor * 80; /* scaled (1-5*SF)/4 */
+  neighscale = cinfo->smoothing_factor * 16; /* scaled SF/4 */
+
+  inrow = 0;
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr0 = input_data[inrow];
+    inptr1 = input_data[inrow + 1];
+    above_ptr = input_data[inrow - 1];
+    below_ptr = input_data[inrow + 2];
+
+    /* Special case for first column: pretend column -1 is same as column 0 */
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+               inptr0[0] + inptr0[2] + inptr1[0] + inptr1[2];
+    neighsum += neighsum;
+    neighsum += above_ptr[0] + above_ptr[2] + below_ptr[0] + below_ptr[2];
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
+    inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      /* sum of pixels directly mapped to this output element */
+      membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+      /* sum of edge-neighbor pixels */
+      neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+                 inptr0[-1] + inptr0[2] + inptr1[-1] + inptr1[2];
+      /* The edge-neighbors count twice as much as corner-neighbors */
+      neighsum += neighsum;
+      /* Add in the corner-neighbors */
+      neighsum += above_ptr[-1] + above_ptr[2] + below_ptr[-1] + below_ptr[2];
+      /* form final output scaled up by 2^16 */
+      membersum = membersum * memberscale + neighsum * neighscale;
+      /* round, descale and output it */
+      *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
+      inptr0 += 2;  inptr1 += 2;  above_ptr += 2;  below_ptr += 2;
+    }
+
+    /* Special case for last column */
+    membersum = inptr0[0] + inptr0[1] + inptr1[0] + inptr1[1];
+    neighsum = above_ptr[0] + above_ptr[1] + below_ptr[0] + below_ptr[1] +
+               inptr0[-1] + inptr0[1] + inptr1[-1] + inptr1[1];
+    neighsum += neighsum;
+    neighsum += above_ptr[-1] + above_ptr[1] + below_ptr[-1] + below_ptr[1];
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
+
+    inrow += 2;
+  }
+}
+
+
+/*
+ * Downsample pixel values of a single component.
+ * This version handles the special case of a full-size component,
+ * with smoothing.  One row of context is required.
+ */
+
+METHODDEF(void)
+fullsize_smooth_downsample(j_compress_ptr cinfo, jpeg_component_info *compptr,
+                           _JSAMPARRAY input_data, _JSAMPARRAY output_data)
+{
+  int outrow;
+  JDIMENSION colctr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+  JDIMENSION output_cols = compptr->width_in_blocks * data_unit;
+  register _JSAMPROW inptr, above_ptr, below_ptr, outptr;
+  JLONG membersum, neighsum, memberscale, neighscale;
+  int colsum, lastcolsum, nextcolsum;
+
+  /* Expand input data enough to let all the output samples be generated
+   * by the standard loop.  Special-casing padded output would be more
+   * efficient.
+   */
+  expand_right_edge(input_data - 1, cinfo->max_v_samp_factor + 2,
+                    cinfo->image_width, output_cols);
+
+  /* Each of the eight neighbor pixels contributes a fraction SF to the
+   * smoothed pixel, while the main pixel contributes (1-8*SF).  In order
+   * to use integer arithmetic, these factors are multiplied by 2^16 = 65536.
+   * Also recall that SF = smoothing_factor / 1024.
+   */
+
+  memberscale = 65536L - cinfo->smoothing_factor * 512L; /* scaled 1-8*SF */
+  neighscale = cinfo->smoothing_factor * 64; /* scaled SF */
+
+  for (outrow = 0; outrow < compptr->v_samp_factor; outrow++) {
+    outptr = output_data[outrow];
+    inptr = input_data[outrow];
+    above_ptr = input_data[outrow - 1];
+    below_ptr = input_data[outrow + 1];
+
+    /* Special case for first column */
+    colsum = (*above_ptr++) + (*below_ptr++) + inptr[0];
+    membersum = *inptr++;
+    nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
+    neighsum = colsum + (colsum - membersum) + nextcolsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
+    lastcolsum = colsum;  colsum = nextcolsum;
+
+    for (colctr = output_cols - 2; colctr > 0; colctr--) {
+      membersum = *inptr++;
+      above_ptr++;  below_ptr++;
+      nextcolsum = above_ptr[0] + below_ptr[0] + inptr[0];
+      neighsum = lastcolsum + (colsum - membersum) + nextcolsum;
+      membersum = membersum * memberscale + neighsum * neighscale;
+      *outptr++ = (_JSAMPLE)((membersum + 32768) >> 16);
+      lastcolsum = colsum;  colsum = nextcolsum;
+    }
+
+    /* Special case for last column */
+    membersum = *inptr;
+    neighsum = lastcolsum + (colsum - membersum) + colsum;
+    membersum = membersum * memberscale + neighsum * neighscale;
+    *outptr = (_JSAMPLE)((membersum + 32768) >> 16);
+
+  }
+}
+
+#endif /* INPUT_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Module initialization routine for downsampling.
+ * Note that we must select a routine for each component.
+ */
+
+GLOBAL(void)
+_jinit_downsampler(j_compress_ptr cinfo)
+{
+  my_downsample_ptr downsample;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean smoothok = TRUE;
+
+#ifdef C_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  downsample = (my_downsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_downsampler));
+  cinfo->downsample = (struct jpeg_downsampler *)downsample;
+  downsample->pub.start_pass = start_pass_downsample;
+  downsample->pub._downsample = sep_downsample;
+  downsample->pub.need_context_rows = FALSE;
+
+  if (cinfo->CCIR601_sampling)
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* Verify we can handle the sampling factors, and set up method pointers */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor == cinfo->max_h_samp_factor &&
+        compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+        downsample->methods[ci] = fullsize_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+        downsample->methods[ci] = fullsize_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+               compptr->v_samp_factor == cinfo->max_v_samp_factor) {
+      smoothok = FALSE;
+#ifdef WITH_SIMD
+      if (jsimd_can_h2v1_downsample())
+        downsample->methods[ci] = jsimd_h2v1_downsample;
+      else
+#endif
+        downsample->methods[ci] = h2v1_downsample;
+    } else if (compptr->h_samp_factor * 2 == cinfo->max_h_samp_factor &&
+               compptr->v_samp_factor * 2 == cinfo->max_v_samp_factor) {
+#ifdef INPUT_SMOOTHING_SUPPORTED
+      if (cinfo->smoothing_factor) {
+#if defined(WITH_SIMD) && defined(__mips__)
+        if (jsimd_can_h2v2_smooth_downsample())
+          downsample->methods[ci] = jsimd_h2v2_smooth_downsample;
+        else
+#endif
+          downsample->methods[ci] = h2v2_smooth_downsample;
+        downsample->pub.need_context_rows = TRUE;
+      } else
+#endif
+      {
+#ifdef WITH_SIMD
+        if (jsimd_can_h2v2_downsample())
+          downsample->methods[ci] = jsimd_h2v2_downsample;
+        else
+#endif
+          downsample->methods[ci] = h2v2_downsample;
+      }
+    } else if ((cinfo->max_h_samp_factor % compptr->h_samp_factor) == 0 &&
+               (cinfo->max_v_samp_factor % compptr->v_samp_factor) == 0) {
+      smoothok = FALSE;
+      downsample->methods[ci] = int_downsample;
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+  }
+
+#ifdef INPUT_SMOOTHING_SUPPORTED
+  if (cinfo->smoothing_factor && !smoothok)
+    TRACEMS(cinfo, 0, JTRC_SMOOTH_NOTIMPL);
+#endif
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(C_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jctrans.c b/thirdparty/libjpeg-turbo/src/jctrans.c
new file mode 100644
index 00000000000..ae52e3989ee
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jctrans.c
@@ -0,0 +1,415 @@
+/*
+ * jctrans.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1995-1998, Thomas G. Lane.
+ * Modified 2000-2009 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains library routines for transcoding compression,
+ * that is, writing raw DCT coefficient arrays to an output JPEG file.
+ * The routines in jcapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+
+
+/* Forward declarations */
+LOCAL(void) transencode_master_selection(j_compress_ptr cinfo,
+                                         jvirt_barray_ptr *coef_arrays);
+LOCAL(void) transencode_coef_controller(j_compress_ptr cinfo,
+                                        jvirt_barray_ptr *coef_arrays);
+
+
+/*
+ * Compression initialization for writing raw-coefficient data.
+ * Before calling this, all parameters and a data destination must be set up.
+ * Call jpeg_finish_compress() to actually write the data.
+ *
+ * The number of passed virtual arrays must match cinfo->num_components.
+ * Note that the virtual arrays need not be filled or even realized at
+ * the time write_coefficients is called; indeed, if the virtual arrays
+ * were requested from this compression object's memory manager, they
+ * typically will be realized during this routine and filled afterwards.
+ */
+
+GLOBAL(void)
+jpeg_write_coefficients(j_compress_ptr cinfo, jvirt_barray_ptr *coef_arrays)
+{
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if (cinfo->global_state != CSTATE_START)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Mark all tables to be written */
+  jpeg_suppress_tables(cinfo, FALSE);
+  /* (Re)initialize error mgr and destination modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  (*cinfo->dest->init_destination) (cinfo);
+  /* Perform master selection of active modules */
+  transencode_master_selection(cinfo, coef_arrays);
+  /* Wait for jpeg_finish_compress() call */
+  cinfo->next_scanline = 0;     /* so jpeg_write_marker works */
+  cinfo->global_state = CSTATE_WRCOEFS;
+}
+
+
+/*
+ * Initialize the compression object with default parameters,
+ * then copy from the source object all parameters needed for lossless
+ * transcoding.  Parameters that can be varied without loss (such as
+ * scan script and Huffman optimization) are left in their default states.
+ */
+
+GLOBAL(void)
+jpeg_copy_critical_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo)
+{
+  JQUANT_TBL **qtblptr;
+  jpeg_component_info *incomp, *outcomp;
+  JQUANT_TBL *c_quant, *slot_quant;
+  int tblno, ci, coefi;
+
+  if (srcinfo->master->lossless)
+    ERREXIT(dstinfo, JERR_NOTIMPL);
+
+  /* Safety check to ensure start_compress not called yet. */
+  if (dstinfo->global_state != CSTATE_START)
+    ERREXIT1(dstinfo, JERR_BAD_STATE, dstinfo->global_state);
+  /* Copy fundamental image dimensions */
+  dstinfo->image_width = srcinfo->image_width;
+  dstinfo->image_height = srcinfo->image_height;
+  dstinfo->input_components = srcinfo->num_components;
+  dstinfo->in_color_space = srcinfo->jpeg_color_space;
+#if JPEG_LIB_VERSION >= 70
+  dstinfo->jpeg_width = srcinfo->output_width;
+  dstinfo->jpeg_height = srcinfo->output_height;
+  dstinfo->min_DCT_h_scaled_size = srcinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = srcinfo->min_DCT_v_scaled_size;
+#endif
+  /* Initialize all parameters to default values */
+  jpeg_set_defaults(dstinfo);
+  /* jpeg_set_defaults may choose wrong colorspace, eg YCbCr if input is RGB.
+   * Fix it to get the right header markers for the image colorspace.
+   */
+  jpeg_set_colorspace(dstinfo, srcinfo->jpeg_color_space);
+  dstinfo->data_precision = srcinfo->data_precision;
+  dstinfo->CCIR601_sampling = srcinfo->CCIR601_sampling;
+  /* Copy the source's quantization tables. */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    if (srcinfo->quant_tbl_ptrs[tblno] != NULL) {
+      qtblptr = &dstinfo->quant_tbl_ptrs[tblno];
+      if (*qtblptr == NULL)
+        *qtblptr = jpeg_alloc_quant_table((j_common_ptr)dstinfo);
+      memcpy((*qtblptr)->quantval, srcinfo->quant_tbl_ptrs[tblno]->quantval,
+             sizeof((*qtblptr)->quantval));
+      (*qtblptr)->sent_table = FALSE;
+    }
+  }
+  /* Copy the source's per-component info.
+   * Note we assume jpeg_set_defaults has allocated the dest comp_info array.
+   */
+  dstinfo->num_components = srcinfo->num_components;
+  if (dstinfo->num_components < 1 || dstinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(dstinfo, JERR_COMPONENT_COUNT, dstinfo->num_components,
+             MAX_COMPONENTS);
+  for (ci = 0, incomp = srcinfo->comp_info, outcomp = dstinfo->comp_info;
+       ci < dstinfo->num_components; ci++, incomp++, outcomp++) {
+    outcomp->component_id = incomp->component_id;
+    outcomp->h_samp_factor = incomp->h_samp_factor;
+    outcomp->v_samp_factor = incomp->v_samp_factor;
+    outcomp->quant_tbl_no = incomp->quant_tbl_no;
+    /* Make sure saved quantization table for component matches the qtable
+     * slot.  If not, the input file re-used this qtable slot.
+     * IJG encoder currently cannot duplicate this.
+     */
+    tblno = outcomp->quant_tbl_no;
+    if (tblno < 0 || tblno >= NUM_QUANT_TBLS ||
+        srcinfo->quant_tbl_ptrs[tblno] == NULL)
+      ERREXIT1(dstinfo, JERR_NO_QUANT_TABLE, tblno);
+    slot_quant = srcinfo->quant_tbl_ptrs[tblno];
+    c_quant = incomp->quant_table;
+    if (c_quant != NULL) {
+      for (coefi = 0; coefi < DCTSIZE2; coefi++) {
+        if (c_quant->quantval[coefi] != slot_quant->quantval[coefi])
+          ERREXIT1(dstinfo, JERR_MISMATCHED_QUANT_TABLE, tblno);
+      }
+    }
+    /* Note: we do not copy the source's Huffman table assignments;
+     * instead we rely on jpeg_set_colorspace to have made a suitable choice.
+     */
+  }
+  /* Also copy JFIF version and resolution information, if available.
+   * Strictly speaking this isn't "critical" info, but it's nearly
+   * always appropriate to copy it if available.  In particular,
+   * if the application chooses to copy JFIF 1.02 extension markers from
+   * the source file, we need to copy the version to make sure we don't
+   * emit a file that has 1.02 extensions but a claimed version of 1.01.
+   * We will *not*, however, copy version info from mislabeled "2.01" files.
+   */
+  if (srcinfo->saw_JFIF_marker) {
+    if (srcinfo->JFIF_major_version == 1) {
+      dstinfo->JFIF_major_version = srcinfo->JFIF_major_version;
+      dstinfo->JFIF_minor_version = srcinfo->JFIF_minor_version;
+    }
+    dstinfo->density_unit = srcinfo->density_unit;
+    dstinfo->X_density = srcinfo->X_density;
+    dstinfo->Y_density = srcinfo->Y_density;
+  }
+}
+
+
+/*
+ * Master selection of compression modules for transcoding.
+ * This substitutes for jcinit.c's initialization of the full compressor.
+ */
+
+LOCAL(void)
+transencode_master_selection(j_compress_ptr cinfo,
+                             jvirt_barray_ptr *coef_arrays)
+{
+  /* Although we don't actually use input_components for transcoding,
+   * jcmaster.c's initial_setup will complain if input_components is 0.
+   */
+  cinfo->input_components = 1;
+  /* Initialize master control (includes parameter checking/processing) */
+  jinit_c_master_control(cinfo, TRUE /* transcode only */);
+
+  /* Entropy encoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+#ifdef C_ARITH_CODING_SUPPORTED
+    jinit_arith_encoder(cinfo);
+#else
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef C_PROGRESSIVE_SUPPORTED
+      jinit_phuff_encoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_encoder(cinfo);
+  }
+
+  /* We need a special coefficient buffer controller. */
+  transencode_coef_controller(cinfo, coef_arrays);
+
+  jinit_marker_writer(cinfo);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+
+  /* Write the datastream header (SOI, JFIF) immediately.
+   * Frame and scan headers are postponed till later.
+   * This lets application insert special markers after the SOI.
+   */
+  (*cinfo->marker->write_file_header) (cinfo);
+}
+
+
+/*
+ * The rest of this file is a special implementation of the coefficient
+ * buffer controller.  This is similar to jccoefct.c, but it handles only
+ * output from presupplied virtual arrays.  Furthermore, we generate any
+ * dummy padding blocks on-the-fly rather than expecting them to be present
+ * in the arrays.
+ */
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_c_coef_controller pub; /* public fields */
+
+  JDIMENSION iMCU_row_num;      /* iMCU row # within image */
+  JDIMENSION mcu_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  /* Virtual block array for each component. */
+  jvirt_barray_ptr *whole_image;
+
+  /* Workspace for constructing dummy blocks at right/bottom edges. */
+  JBLOCKROW dummy_buffer[C_MAX_BLOCKS_IN_MCU];
+} my_coef_controller;
+
+typedef my_coef_controller *my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row(j_compress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row */
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (coef->iMCU_row_num < (cinfo->total_iMCU_rows - 1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->mcu_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_coef(j_compress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  if (pass_mode != JBUF_CRANK_DEST)
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  coef->iMCU_row_num = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Process some data.
+ * We process the equivalent of one fully interleaved MCU row ("iMCU" row)
+ * per call, ie, v_samp_factor block rows for each component in the scan.
+ * The data is obtained from the virtual arrays and fed to the entropy coder.
+ * Returns TRUE if the iMCU row is completed, FALSE if suspended.
+ *
+ * NB: input_buf is ignored; it is likely to be a NULL pointer.
+ */
+
+METHODDEF(boolean)
+compress_output(j_compress_ptr cinfo, JSAMPIMAGE input_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, blockcnt;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW MCU_buffer[C_MAX_BLOCKS_IN_MCU];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index],
+       coef->iMCU_row_num * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, FALSE);
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->mcu_ctr; MCU_col_num < cinfo->MCUs_per_row;
+         MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        blockcnt = (MCU_col_num < last_MCU_col) ? compptr->MCU_width :
+                                                  compptr->last_col_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          if (coef->iMCU_row_num < last_iMCU_row ||
+              yindex + yoffset < compptr->last_row_height) {
+            /* Fill in pointers to real blocks in this row */
+            buffer_ptr = buffer[ci][yindex + yoffset] + start_col;
+            for (xindex = 0; xindex < blockcnt; xindex++)
+              MCU_buffer[blkn++] = buffer_ptr++;
+          } else {
+            /* At bottom of image, need a whole row of dummy blocks */
+            xindex = 0;
+          }
+          /* Fill in any dummy blocks needed in this row.
+           * Dummy blocks are filled in the same way as in jccoefct.c:
+           * all zeroes in the AC entries, DC entries equal to previous
+           * block's DC value.  The init routine has already zeroed the
+           * AC entries, so we need only set the DC entries correctly.
+           */
+          for (; xindex < compptr->MCU_width; xindex++) {
+            MCU_buffer[blkn] = coef->dummy_buffer[blkn];
+            MCU_buffer[blkn][0][0] = MCU_buffer[blkn - 1][0][0];
+            blkn++;
+          }
+        }
+      }
+      /* Try to write the MCU. */
+      if (!(*cinfo->entropy->encode_mcu) (cinfo, MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->mcu_ctr = MCU_col_num;
+        return FALSE;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->mcu_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  coef->iMCU_row_num++;
+  start_iMCU_row(cinfo);
+  return TRUE;
+}
+
+
+METHODDEF(boolean)
+compress_output_12(j_compress_ptr cinfo, J12SAMPIMAGE input_buf)
+{
+  return compress_output(cinfo, (JSAMPIMAGE)input_buf);
+}
+
+
+/*
+ * Initialize coefficient buffer controller.
+ *
+ * Each passed coefficient array must be the right size for that
+ * coefficient: width_in_blocks wide and height_in_blocks high,
+ * with unitheight at least v_samp_factor.
+ */
+
+LOCAL(void)
+transencode_coef_controller(j_compress_ptr cinfo,
+                            jvirt_barray_ptr *coef_arrays)
+{
+  my_coef_ptr coef;
+  JBLOCKROW buffer;
+  int i;
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_coef_controller));
+  cinfo->coef = (struct jpeg_c_coef_controller *)coef;
+  coef->pub.start_pass = start_pass_coef;
+  coef->pub.compress_data = compress_output;
+  coef->pub.compress_data_12 = compress_output_12;
+
+  /* Save pointer to virtual arrays */
+  coef->whole_image = coef_arrays;
+
+  /* Allocate and pre-zero space for dummy DCT blocks. */
+  buffer = (JBLOCKROW)
+    (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
+  jzero_far((void *)buffer, C_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
+  for (i = 0; i < C_MAX_BLOCKS_IN_MCU; i++) {
+    coef->dummy_buffer[i] = buffer + i;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdapimin.c b/thirdparty/libjpeg-turbo/src/jdapimin.c
new file mode 100644
index 00000000000..f2419f8adb6
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdapimin.c
@@ -0,0 +1,421 @@
+/*
+ * jdapimin.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2016, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "minimum" API routines that may be
+ * needed in either the normal full-decompression case or the
+ * transcoding-only case.
+ *
+ * Most of the routines intended to be called directly by an application
+ * are in this file or in jdapistd.c.  But also see jcomapi.c for routines
+ * shared by compression and decompression, and jdtrans.c for the transcoding
+ * case.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdmaster.h"
+
+
+/*
+ * Initialization of a JPEG decompression object.
+ * The error manager must already be set up (in case memory manager fails).
+ */
+
+GLOBAL(void)
+jpeg_CreateDecompress(j_decompress_ptr cinfo, int version, size_t structsize)
+{
+  int i;
+
+  /* Guard against version mismatches between library and caller. */
+  cinfo->mem = NULL;            /* so jpeg_destroy knows mem mgr not called */
+  if (version != JPEG_LIB_VERSION)
+    ERREXIT2(cinfo, JERR_BAD_LIB_VERSION, JPEG_LIB_VERSION, version);
+  if (structsize != sizeof(struct jpeg_decompress_struct))
+    ERREXIT2(cinfo, JERR_BAD_STRUCT_SIZE,
+             (int)sizeof(struct jpeg_decompress_struct), (int)structsize);
+
+  /* For debugging purposes, we zero the whole master structure.
+   * But the application has already set the err pointer, and may have set
+   * client_data, so we have to save and restore those fields.
+   * Note: if application hasn't set client_data, tools like Purify may
+   * complain here.
+   */
+  {
+    struct jpeg_error_mgr *err = cinfo->err;
+    void *client_data = cinfo->client_data; /* ignore Purify complaint here */
+    memset(cinfo, 0, sizeof(struct jpeg_decompress_struct));
+    cinfo->err = err;
+    cinfo->client_data = client_data;
+  }
+  cinfo->is_decompressor = TRUE;
+
+  /* Initialize a memory manager instance for this object */
+  jinit_memory_mgr((j_common_ptr)cinfo);
+
+  /* Zero out pointers to permanent structures. */
+  cinfo->progress = NULL;
+  cinfo->src = NULL;
+
+  for (i = 0; i < NUM_QUANT_TBLS; i++)
+    cinfo->quant_tbl_ptrs[i] = NULL;
+
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    cinfo->dc_huff_tbl_ptrs[i] = NULL;
+    cinfo->ac_huff_tbl_ptrs[i] = NULL;
+  }
+
+  /* Initialize marker processor so application can override methods
+   * for COM, APPn markers before calling jpeg_read_header.
+   */
+  cinfo->marker_list = NULL;
+  jinit_marker_reader(cinfo);
+
+  /* And initialize the overall input controller. */
+  jinit_input_controller(cinfo);
+
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+
+  /* OK, I'm ready */
+  cinfo->global_state = DSTATE_START;
+
+  /* The master struct is used to store extension parameters, so we allocate it
+   * here.
+   */
+  cinfo->master = (struct jpeg_decomp_master *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                sizeof(my_decomp_master));
+  memset(cinfo->master, 0, sizeof(my_decomp_master));
+}
+
+
+/*
+ * Destruction of a JPEG decompression object
+ */
+
+GLOBAL(void)
+jpeg_destroy_decompress(j_decompress_ptr cinfo)
+{
+  jpeg_destroy((j_common_ptr)cinfo); /* use common routine */
+}
+
+
+/*
+ * Abort processing of a JPEG decompression operation,
+ * but don't destroy the object itself.
+ */
+
+GLOBAL(void)
+jpeg_abort_decompress(j_decompress_ptr cinfo)
+{
+  jpeg_abort((j_common_ptr)cinfo); /* use common routine */
+}
+
+
+/*
+ * Set default decompression parameters.
+ */
+
+LOCAL(void)
+default_decompress_parms(j_decompress_ptr cinfo)
+{
+  /* Guess the input colorspace, and set output colorspace accordingly. */
+  /* (Wish JPEG committee had provided a real way to specify this...) */
+  /* Note application may override our guesses. */
+  switch (cinfo->num_components) {
+  case 1:
+    cinfo->jpeg_color_space = JCS_GRAYSCALE;
+    cinfo->out_color_space = JCS_GRAYSCALE;
+    break;
+
+  case 3:
+    if (cinfo->saw_JFIF_marker) {
+      cinfo->jpeg_color_space = JCS_YCbCr; /* JFIF implies YCbCr */
+    } else if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+        cinfo->jpeg_color_space = JCS_RGB;
+        break;
+      case 1:
+        cinfo->jpeg_color_space = JCS_YCbCr;
+        break;
+      default:
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+        break;
+      }
+    } else {
+      /* Saw no special markers, try to guess from the component IDs */
+      int cid0 = cinfo->comp_info[0].component_id;
+      int cid1 = cinfo->comp_info[1].component_id;
+      int cid2 = cinfo->comp_info[2].component_id;
+
+      if (cid0 == 1 && cid1 == 2 && cid2 == 3) {
+#ifdef D_LOSSLESS_SUPPORTED
+        if (cinfo->master->lossless)
+          cinfo->jpeg_color_space = JCS_RGB; /* assume RGB w/out marker */
+        else
+#endif
+          cinfo->jpeg_color_space = JCS_YCbCr; /* assume JFIF w/out marker */
+      } else if (cid0 == 82 && cid1 == 71 && cid2 == 66)
+        cinfo->jpeg_color_space = JCS_RGB; /* ASCII 'R', 'G', 'B' */
+      else {
+        TRACEMS3(cinfo, 1, JTRC_UNKNOWN_IDS, cid0, cid1, cid2);
+#ifdef D_LOSSLESS_SUPPORTED
+        if (cinfo->master->lossless)
+          cinfo->jpeg_color_space = JCS_RGB; /* assume it's RGB */
+        else
+#endif
+          cinfo->jpeg_color_space = JCS_YCbCr; /* assume it's YCbCr */
+      }
+    }
+    /* Always guess RGB is proper output colorspace. */
+    cinfo->out_color_space = JCS_RGB;
+    break;
+
+  case 4:
+    if (cinfo->saw_Adobe_marker) {
+      switch (cinfo->Adobe_transform) {
+      case 0:
+        cinfo->jpeg_color_space = JCS_CMYK;
+        break;
+      case 2:
+        cinfo->jpeg_color_space = JCS_YCCK;
+        break;
+      default:
+        WARNMS1(cinfo, JWRN_ADOBE_XFORM, cinfo->Adobe_transform);
+        cinfo->jpeg_color_space = JCS_YCCK; /* assume it's YCCK */
+        break;
+      }
+    } else {
+      /* No special markers, assume straight CMYK. */
+      cinfo->jpeg_color_space = JCS_CMYK;
+    }
+    cinfo->out_color_space = JCS_CMYK;
+    break;
+
+  default:
+    cinfo->jpeg_color_space = JCS_UNKNOWN;
+    cinfo->out_color_space = JCS_UNKNOWN;
+    break;
+  }
+
+  /* Set defaults for other decompression parameters. */
+  cinfo->scale_num = 1;         /* 1:1 scaling */
+  cinfo->scale_denom = 1;
+  cinfo->output_gamma = 1.0;
+  cinfo->buffered_image = FALSE;
+  cinfo->raw_data_out = FALSE;
+  cinfo->dct_method = JDCT_DEFAULT;
+  cinfo->do_fancy_upsampling = TRUE;
+  cinfo->do_block_smoothing = TRUE;
+  cinfo->quantize_colors = FALSE;
+  /* We set these in case application only sets quantize_colors. */
+  cinfo->dither_mode = JDITHER_FS;
+#ifdef QUANT_2PASS_SUPPORTED
+  cinfo->two_pass_quantize = TRUE;
+#else
+  cinfo->two_pass_quantize = FALSE;
+#endif
+  cinfo->desired_number_of_colors = 256;
+  cinfo->colormap = NULL;
+  /* Initialize for no mode change in buffered-image mode. */
+  cinfo->enable_1pass_quant = FALSE;
+  cinfo->enable_external_quant = FALSE;
+  cinfo->enable_2pass_quant = FALSE;
+}
+
+
+/*
+ * Decompression startup: read start of JPEG datastream to see what's there.
+ * Need only initialize JPEG object and supply a data source before calling.
+ *
+ * This routine will read as far as the first SOS marker (ie, actual start of
+ * compressed data), and will save all tables and parameters in the JPEG
+ * object.  It will also initialize the decompression parameters to default
+ * values, and finally return JPEG_HEADER_OK.  On return, the application may
+ * adjust the decompression parameters and then call jpeg_start_decompress.
+ * (Or, if the application only wanted to determine the image parameters,
+ * the data need not be decompressed.  In that case, call jpeg_abort or
+ * jpeg_destroy to release any temporary space.)
+ * If an abbreviated (tables only) datastream is presented, the routine will
+ * return JPEG_HEADER_TABLES_ONLY upon reaching EOI.  The application may then
+ * re-use the JPEG object to read the abbreviated image datastream(s).
+ * It is unnecessary (but OK) to call jpeg_abort in this case.
+ * The JPEG_SUSPENDED return code only occurs if the data source module
+ * requests suspension of the decompressor.  In this case the application
+ * should load more source data and then re-call jpeg_read_header to resume
+ * processing.
+ * If a non-suspending data source is used and require_image is TRUE, then the
+ * return code need not be inspected since only JPEG_HEADER_OK is possible.
+ *
+ * This routine is now just a front end to jpeg_consume_input, with some
+ * extra error checking.
+ */
+
+GLOBAL(int)
+jpeg_read_header(j_decompress_ptr cinfo, boolean require_image)
+{
+  int retcode;
+
+  if (cinfo->global_state != DSTATE_START &&
+      cinfo->global_state != DSTATE_INHEADER)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  retcode = jpeg_consume_input(cinfo);
+
+  switch (retcode) {
+  case JPEG_REACHED_SOS:
+    retcode = JPEG_HEADER_OK;
+    break;
+  case JPEG_REACHED_EOI:
+    if (require_image)          /* Complain if application wanted an image */
+      ERREXIT(cinfo, JERR_NO_IMAGE);
+    /* Reset to start state; it would be safer to require the application to
+     * call jpeg_abort, but we can't change it now for compatibility reasons.
+     * A side effect is to free any temporary memory (there shouldn't be any).
+     */
+    jpeg_abort((j_common_ptr)cinfo); /* sets state = DSTATE_START */
+    retcode = JPEG_HEADER_TABLES_ONLY;
+    break;
+  case JPEG_SUSPENDED:
+    /* no work */
+    break;
+  }
+
+  return retcode;
+}
+
+
+/*
+ * Consume data in advance of what the decompressor requires.
+ * This can be called at any time once the decompressor object has
+ * been created and a data source has been set up.
+ *
+ * This routine is essentially a state machine that handles a couple
+ * of critical state-transition actions, namely initial setup and
+ * transition from header scanning to ready-for-start_decompress.
+ * All the actual input is done via the input controller's consume_input
+ * method.
+ */
+
+GLOBAL(int)
+jpeg_consume_input(j_decompress_ptr cinfo)
+{
+  int retcode = JPEG_SUSPENDED;
+
+  /* NB: every possible DSTATE value should be listed in this switch */
+  switch (cinfo->global_state) {
+  case DSTATE_START:
+    /* Start-of-datastream actions: reset appropriate modules */
+    (*cinfo->inputctl->reset_input_controller) (cinfo);
+    /* Initialize application's data source module */
+    (*cinfo->src->init_source) (cinfo);
+    cinfo->global_state = DSTATE_INHEADER;
+    FALLTHROUGH                 /*FALLTHROUGH*/
+  case DSTATE_INHEADER:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    if (retcode == JPEG_REACHED_SOS) { /* Found SOS, prepare to decompress */
+      /* Set up default parameters based on header data */
+      default_decompress_parms(cinfo);
+      /* Set global state: ready for start_decompress */
+      cinfo->global_state = DSTATE_READY;
+    }
+    break;
+  case DSTATE_READY:
+    /* Can't advance past first SOS until start_decompress is called */
+    retcode = JPEG_REACHED_SOS;
+    break;
+  case DSTATE_PRELOAD:
+  case DSTATE_PRESCAN:
+  case DSTATE_SCANNING:
+  case DSTATE_RAW_OK:
+  case DSTATE_BUFIMAGE:
+  case DSTATE_BUFPOST:
+  case DSTATE_STOPPING:
+    retcode = (*cinfo->inputctl->consume_input) (cinfo);
+    break;
+  default:
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  return retcode;
+}
+
+
+/*
+ * Have we finished reading the input file?
+ */
+
+GLOBAL(boolean)
+jpeg_input_complete(j_decompress_ptr cinfo)
+{
+  /* Check for valid jpeg object */
+  if (cinfo->global_state < DSTATE_START ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->eoi_reached;
+}
+
+
+/*
+ * Is there more than one scan?
+ */
+
+GLOBAL(boolean)
+jpeg_has_multiple_scans(j_decompress_ptr cinfo)
+{
+  /* Only valid after jpeg_read_header completes */
+  if (cinfo->global_state < DSTATE_READY ||
+      cinfo->global_state > DSTATE_STOPPING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return cinfo->inputctl->has_multiple_scans;
+}
+
+
+/*
+ * Finish JPEG decompression.
+ *
+ * This will normally just verify the file trailer and release temp storage.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_decompress(j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && !cinfo->buffered_image) {
+    /* Terminate final pass of non-buffered mode */
+    if (cinfo->output_scanline < cinfo->output_height)
+      ERREXIT(cinfo, JERR_TOO_LITTLE_DATA);
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state == DSTATE_BUFIMAGE) {
+    /* Finishing after a buffered-image operation */
+    cinfo->global_state = DSTATE_STOPPING;
+  } else if (cinfo->global_state != DSTATE_STOPPING) {
+    /* STOPPING = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read until EOI */
+  while (!cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;             /* Suspend, come back later */
+  }
+  /* Do final cleanup */
+  (*cinfo->src->term_source) (cinfo);
+  /* We can use jpeg_abort to release memory and reset global_state */
+  jpeg_abort((j_common_ptr)cinfo);
+  return TRUE;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdapistd.c b/thirdparty/libjpeg-turbo/src/jdapistd.c
new file mode 100644
index 00000000000..d0e5c0e5b9f
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdapistd.c
@@ -0,0 +1,764 @@
+/*
+ * jdapistd.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2015-2020, 2022-2024, D. R. Commander.
+ * Copyright (C) 2015, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains application interface code for the decompression half
+ * of the JPEG library.  These are the "standard" API routines that are
+ * used in the normal full-decompression case.  They are not used by a
+ * transcoding-only application.  Note that if an application links in
+ * jpeg_start_decompress, it will end up linking in the entire decompressor.
+ * We thus must separate this file from jdapimin.c to avoid linking the
+ * whole decompression library into a transcoder.
+ */
+
+#include "jinclude.h"
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+#include "jdmainct.h"
+#include "jdcoefct.h"
+#else
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#endif
+#include "jdmaster.h"
+#include "jdmerge.h"
+#include "jdsample.h"
+#include "jmemsys.h"
+
+#if BITS_IN_JSAMPLE == 8
+
+/* Forward declarations */
+LOCAL(boolean) output_pass_setup(j_decompress_ptr cinfo);
+
+
+/*
+ * Decompression initialization.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * If a multipass operating mode was selected, this will do all but the
+ * last pass, and thus may take a great deal of time.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_start_decompress(j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize master control, select active modules */
+    jinit_master_decompress(cinfo);
+    if (cinfo->buffered_image) {
+      /* No more work here; expecting jpeg_start_output next */
+      cinfo->global_state = DSTATE_BUFIMAGE;
+      return TRUE;
+    }
+    cinfo->global_state = DSTATE_PRELOAD;
+  }
+  if (cinfo->global_state == DSTATE_PRELOAD) {
+    /* If file has multiple scans, absorb them all into the coef buffer */
+    if (cinfo->inputctl->has_multiple_scans) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+      for (;;) {
+        int retcode;
+        /* Call progress monitor hook if present */
+        if (cinfo->progress != NULL)
+          (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+        /* Absorb some more input */
+        retcode = (*cinfo->inputctl->consume_input) (cinfo);
+        if (retcode == JPEG_SUSPENDED)
+          return FALSE;
+        if (retcode == JPEG_REACHED_EOI)
+          break;
+        /* Advance progress counter if appropriate */
+        if (cinfo->progress != NULL &&
+            (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+          if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+            /* jdmaster underestimated number of scans; ratchet up one scan */
+            cinfo->progress->pass_limit += (long)cinfo->total_iMCU_rows;
+          }
+        }
+      }
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+    }
+    cinfo->output_scan_number = cinfo->input_scan_number;
+  } else if (cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Perform any dummy output passes, and set up for the final pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Set up for an output pass, and perform any dummy pass(es) needed.
+ * Common subroutine for jpeg_start_decompress and jpeg_start_output.
+ * Entry: global_state = DSTATE_PRESCAN only if previously suspended.
+ * Exit: If done, returns TRUE and sets global_state for proper output mode.
+ *       If suspended, returns FALSE and sets global_state = DSTATE_PRESCAN.
+ */
+
+LOCAL(boolean)
+output_pass_setup(j_decompress_ptr cinfo)
+{
+  if (cinfo->global_state != DSTATE_PRESCAN) {
+    /* First call: do pass setup */
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+    cinfo->global_state = DSTATE_PRESCAN;
+  }
+  /* Loop over any required dummy passes */
+  while (cinfo->master->is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Crank through the dummy pass */
+    while (cinfo->output_scanline < cinfo->output_height) {
+      JDIMENSION last_scanline;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL) {
+        cinfo->progress->pass_counter = (long)cinfo->output_scanline;
+        cinfo->progress->pass_limit = (long)cinfo->output_height;
+        (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+      }
+      /* Process some data */
+      last_scanline = cinfo->output_scanline;
+      if (cinfo->data_precision <= 8)
+        (*cinfo->main->process_data) (cinfo, (JSAMPARRAY)NULL,
+                                      &cinfo->output_scanline, (JDIMENSION)0);
+      else if (cinfo->data_precision <= 12)
+        (*cinfo->main->process_data_12) (cinfo, (J12SAMPARRAY)NULL,
+                                         &cinfo->output_scanline,
+                                         (JDIMENSION)0);
+#ifdef D_LOSSLESS_SUPPORTED
+      else
+        (*cinfo->main->process_data_16) (cinfo, (J16SAMPARRAY)NULL,
+                                         &cinfo->output_scanline,
+                                         (JDIMENSION)0);
+#endif
+      if (cinfo->output_scanline == last_scanline)
+        return FALSE;           /* No progress made, must suspend */
+    }
+    /* Finish up dummy pass, and set up for another one */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    (*cinfo->master->prepare_for_output_pass) (cinfo);
+    cinfo->output_scanline = 0;
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  }
+  /* Ready for application to drive output pass through
+   * _jpeg_read_scanlines or _jpeg_read_raw_data.
+   */
+  cinfo->global_state = cinfo->raw_data_out ? DSTATE_RAW_OK : DSTATE_SCANNING;
+  return TRUE;
+}
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE != 16
+
+/*
+ * Enable partial scanline decompression
+ *
+ * Must be called after jpeg_start_decompress() and before any calls to
+ * _jpeg_read_scanlines() or _jpeg_skip_scanlines().
+ *
+ * Refer to libjpeg.txt for more information.
+ */
+
+GLOBAL(void)
+_jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                    JDIMENSION *width)
+{
+  int ci, align, orig_downsampled_width;
+  JDIMENSION input_xoffset;
+  boolean reinit_upsampler = FALSE;
+  jpeg_component_info *compptr;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+#endif
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if ((cinfo->global_state != DSTATE_SCANNING &&
+       cinfo->global_state != DSTATE_BUFIMAGE) || cinfo->output_scanline != 0)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (!xoffset || !width)
+    ERREXIT(cinfo, JERR_BAD_CROP_SPEC);
+
+  /* xoffset and width must fall within the output image dimensions. */
+  if (*width == 0 ||
+      (unsigned long long)(*xoffset) + *width > cinfo->output_width)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* No need to do anything if the caller wants the entire width. */
+  if (*width == cinfo->output_width)
+    return;
+
+  /* Ensuring the proper alignment of xoffset is tricky.  At minimum, it
+   * must align with an MCU boundary, because:
+   *
+   *   (1) The IDCT is performed in blocks, and it is not feasible to modify
+   *       the algorithm so that it can transform partial blocks.
+   *   (2) Because of the SIMD extensions, any input buffer passed to the
+   *       upsampling and color conversion routines must be aligned to the
+   *       SIMD word size (for instance, 128-bit in the case of SSE2.)  The
+   *       easiest way to accomplish this without copying data is to ensure
+   *       that upsampling and color conversion begin at the start of the
+   *       first MCU column that will be inverse transformed.
+   *
+   * In practice, we actually impose a stricter alignment requirement.  We
+   * require that xoffset be a multiple of the maximum MCU column width of all
+   * of the components (the "iMCU column width.")  This is to simplify the
+   * single-pass decompression case, allowing us to use the same MCU column
+   * width for all of the components.
+   */
+  if (cinfo->comps_in_scan == 1 && cinfo->num_components == 1)
+    align = cinfo->_min_DCT_scaled_size;
+  else
+    align = cinfo->_min_DCT_scaled_size * cinfo->max_h_samp_factor;
+
+  /* Adjust xoffset to the nearest iMCU boundary <= the requested value */
+  input_xoffset = *xoffset;
+  *xoffset = (input_xoffset / align) * align;
+
+  /* Adjust the width so that the right edge of the output image is as
+   * requested (only the left edge is altered.)  It is important that calling
+   * programs check this value after this function returns, so that they can
+   * allocate an output buffer with the appropriate size.
+   */
+  *width = *width + input_xoffset - *xoffset;
+  cinfo->output_width = *width;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    upsample->out_row_width =
+      cinfo->output_width * cinfo->out_color_components;
+  }
+#endif
+
+  /* Set the first and last iMCU columns that we must decompress.  These values
+   * will be used in single-scan decompressions.
+   */
+  cinfo->master->first_iMCU_col = (JDIMENSION)(long)(*xoffset) / (long)align;
+  cinfo->master->last_iMCU_col =
+    (JDIMENSION)jdiv_round_up((long)(*xoffset + cinfo->output_width),
+                              (long)align) - 1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    int hsf = (cinfo->comps_in_scan == 1 && cinfo->num_components == 1) ?
+              1 : compptr->h_samp_factor;
+
+    /* Set downsampled_width to the new output width. */
+    orig_downsampled_width = compptr->downsampled_width;
+    compptr->downsampled_width =
+      (JDIMENSION)jdiv_round_up((long)cinfo->output_width *
+                                (long)(compptr->h_samp_factor *
+                                       compptr->_DCT_scaled_size),
+                                (long)(cinfo->max_h_samp_factor *
+                                       cinfo->_min_DCT_scaled_size));
+    if (compptr->downsampled_width < 2 && orig_downsampled_width >= 2)
+      reinit_upsampler = TRUE;
+
+    /* Set the first and last iMCU columns that we must decompress.  These
+     * values will be used in multi-scan decompressions.
+     */
+    cinfo->master->first_MCU_col[ci] =
+      (JDIMENSION)(long)(*xoffset * hsf) / (long)align;
+    cinfo->master->last_MCU_col[ci] =
+      (JDIMENSION)jdiv_round_up((long)((*xoffset + cinfo->output_width) * hsf),
+                                (long)align) - 1;
+  }
+
+  if (reinit_upsampler) {
+    cinfo->master->jinit_upsampler_no_alloc = TRUE;
+    _jinit_upsampler(cinfo);
+    cinfo->master->jinit_upsampler_no_alloc = FALSE;
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 */
+
+
+/*
+ * Read some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually read.
+ * This may be less than the number requested in several cases,
+ * including bottom of image, data source suspension, and operating
+ * modes that emit multiple scanlines at a time.
+ *
+ * Note: we warn about excess calls to _jpeg_read_scanlines() since
+ * this likely signals an application programmer error.  However,
+ * an oversize buffer (max_lines > scanlines remaining) is not an error.
+ */
+
+GLOBAL(JDIMENSION)
+_jpeg_read_scanlines(j_decompress_ptr cinfo, _JSAMPARRAY scanlines,
+                     JDIMENSION max_lines)
+{
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+  JDIMENSION row_ctr;
+
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long)cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long)cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+  }
+
+  /* Process some data */
+  row_ctr = 0;
+  (*cinfo->main->_process_data) (cinfo, scanlines, &row_ctr, max_lines);
+  cinfo->output_scanline += row_ctr;
+  return row_ctr;
+#else
+  ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  return 0;
+#endif
+}
+
+
+#if BITS_IN_JSAMPLE != 16
+
+/* Dummy color convert function used by _jpeg_skip_scanlines() */
+LOCAL(void)
+noop_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+             JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+}
+
+
+/* Dummy quantize function used by _jpeg_skip_scanlines() */
+LOCAL(void)
+noop_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+              _JSAMPARRAY output_buf, int num_rows)
+{
+}
+
+
+/*
+ * In some cases, it is best to call _jpeg_read_scanlines() and discard the
+ * output, rather than skipping the scanlines, because this allows us to
+ * maintain the internal state of the context-based upsampler.  In these cases,
+ * we set up and tear down a dummy color converter in order to avoid valgrind
+ * errors and to achieve the best possible performance.
+ */
+
+LOCAL(void)
+read_and_discard_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
+{
+  JDIMENSION n;
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+#endif
+  _JSAMPLE dummy_sample[1] = { 0 };
+  _JSAMPROW dummy_row = dummy_sample;
+  _JSAMPARRAY scanlines = NULL;
+  void (*color_convert) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
+                         int num_rows) = NULL;
+  void (*color_quantize) (j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                          _JSAMPARRAY output_buf, int num_rows) = NULL;
+
+  if (cinfo->cconvert && cinfo->cconvert->_color_convert) {
+    color_convert = cinfo->cconvert->_color_convert;
+    cinfo->cconvert->_color_convert = noop_convert;
+    /* This just prevents UBSan from complaining about adding 0 to a NULL
+     * pointer.  The pointer isn't actually used.
+     */
+    scanlines = &dummy_row;
+  }
+
+  if (cinfo->cquantize && cinfo->cquantize->_color_quantize) {
+    color_quantize = cinfo->cquantize->_color_quantize;
+    cinfo->cquantize->_color_quantize = noop_quantize;
+  }
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+    scanlines = &upsample->spare_row;
+  }
+#endif
+
+  for (n = 0; n < num_lines; n++)
+    _jpeg_read_scanlines(cinfo, scanlines, 1);
+
+  if (color_convert)
+    cinfo->cconvert->_color_convert = color_convert;
+
+  if (color_quantize)
+    cinfo->cquantize->_color_quantize = color_quantize;
+}
+
+
+/*
+ * Called by _jpeg_skip_scanlines().  This partially skips a decompress block
+ * by incrementing the rowgroup counter.
+ */
+
+LOCAL(void)
+increment_simple_rowgroup_ctr(j_decompress_ptr cinfo, JDIMENSION rows)
+{
+  JDIMENSION rows_left;
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (master->using_merged_upsample && cinfo->max_v_samp_factor == 2) {
+    read_and_discard_scanlines(cinfo, rows);
+    return;
+  }
+
+  /* Increment the counter to the next row group after the skipped rows. */
+  main_ptr->rowgroup_ctr += rows / cinfo->max_v_samp_factor;
+
+  /* Partially skipping a row group would involve modifying the internal state
+   * of the upsampler, so read the remaining rows into a dummy buffer instead.
+   */
+  rows_left = rows % cinfo->max_v_samp_factor;
+  cinfo->output_scanline += rows - rows_left;
+
+  read_and_discard_scanlines(cinfo, rows_left);
+}
+
+/*
+ * Skips some scanlines of data from the JPEG decompressor.
+ *
+ * The return value will be the number of lines actually skipped.  If skipping
+ * num_lines would move beyond the end of the image, then the actual number of
+ * lines remaining in the image is returned.  Otherwise, the return value will
+ * be equal to num_lines.
+ *
+ * Refer to libjpeg.txt for more information.
+ */
+
+GLOBAL(JDIMENSION)
+_jpeg_skip_scanlines(j_decompress_ptr cinfo, JDIMENSION num_lines)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  JDIMENSION i, x;
+  int y;
+  JDIMENSION lines_per_iMCU_row, lines_left_in_iMCU_row, lines_after_iMCU_row;
+  JDIMENSION lines_to_skip, lines_to_read;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Two-pass color quantization is not supported. */
+  if (cinfo->quantize_colors && cinfo->two_pass_quantize)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if (cinfo->global_state != DSTATE_SCANNING)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Do not skip past the bottom of the image. */
+  if ((unsigned long long)cinfo->output_scanline + num_lines >=
+      cinfo->output_height) {
+    num_lines = cinfo->output_height - cinfo->output_scanline;
+    cinfo->output_scanline = cinfo->output_height;
+    (*cinfo->inputctl->finish_input_pass) (cinfo);
+    cinfo->inputctl->eoi_reached = TRUE;
+    return num_lines;
+  }
+
+  if (num_lines == 0)
+    return 0;
+
+  lines_per_iMCU_row = cinfo->_min_DCT_scaled_size * cinfo->max_v_samp_factor;
+  lines_left_in_iMCU_row =
+    (lines_per_iMCU_row - (cinfo->output_scanline % lines_per_iMCU_row)) %
+    lines_per_iMCU_row;
+  lines_after_iMCU_row = num_lines - lines_left_in_iMCU_row;
+
+  /* Skip the lines remaining in the current iMCU row.  When upsampling
+   * requires context rows, we need the previous and next rows in order to read
+   * the current row.  This adds some complexity.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    /* If the skipped lines would not move us past the current iMCU row, we
+     * read the lines and ignore them.  There might be a faster way of doing
+     * this, but we are facing increasing complexity for diminishing returns.
+     * The increasing complexity would be a by-product of meddling with the
+     * state machine used to skip context rows.  Near the end of an iMCU row,
+     * the next iMCU row may have already been entropy-decoded.  In this unique
+     * case, we will read the next iMCU row if we cannot skip past it as well.
+     */
+    if ((num_lines < lines_left_in_iMCU_row + 1) ||
+        (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full &&
+         lines_after_iMCU_row < lines_per_iMCU_row + 1)) {
+      read_and_discard_scanlines(cinfo, num_lines);
+      return num_lines;
+    }
+
+    /* If the next iMCU row has already been entropy-decoded, make sure that
+     * we do not skip too far.
+     */
+    if (lines_left_in_iMCU_row <= 1 && main_ptr->buffer_full) {
+      cinfo->output_scanline += lines_left_in_iMCU_row + lines_per_iMCU_row;
+      lines_after_iMCU_row -= lines_per_iMCU_row;
+    } else {
+      cinfo->output_scanline += lines_left_in_iMCU_row;
+    }
+
+    /* If we have just completed the first block, adjust the buffer pointers */
+    if (main_ptr->iMCU_row_ctr == 0 ||
+        (main_ptr->iMCU_row_ctr == 1 && lines_left_in_iMCU_row > 2))
+      set_wraparound_pointers(cinfo);
+    main_ptr->buffer_full = FALSE;
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
+    if (!master->using_merged_upsample) {
+      upsample->next_row_out = cinfo->max_v_samp_factor;
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    }
+  }
+
+  /* Skipping is much simpler when context rows are not required. */
+  else {
+    if (num_lines < lines_left_in_iMCU_row) {
+      increment_simple_rowgroup_ctr(cinfo, num_lines);
+      return num_lines;
+    } else {
+      cinfo->output_scanline += lines_left_in_iMCU_row;
+      main_ptr->buffer_full = FALSE;
+      main_ptr->rowgroup_ctr = 0;
+      if (!master->using_merged_upsample) {
+        upsample->next_row_out = cinfo->max_v_samp_factor;
+        upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+      }
+    }
+  }
+
+  /* Calculate how many full iMCU rows we can skip. */
+  if (cinfo->upsample->need_context_rows)
+    lines_to_skip = ((lines_after_iMCU_row - 1) / lines_per_iMCU_row) *
+                    lines_per_iMCU_row;
+  else
+    lines_to_skip = (lines_after_iMCU_row / lines_per_iMCU_row) *
+                    lines_per_iMCU_row;
+  /* Calculate the number of lines that remain to be skipped after skipping all
+   * of the full iMCU rows that we can.  We will not read these lines unless we
+   * have to.
+   */
+  lines_to_read = lines_after_iMCU_row - lines_to_skip;
+
+  /* For images requiring multiple scans (progressive, non-interleaved, etc.),
+   * all of the entropy decoding occurs in jpeg_start_decompress(), assuming
+   * that the input data source is non-suspending.  This makes skipping easy.
+   */
+  if (cinfo->inputctl->has_multiple_scans || cinfo->buffered_image) {
+    if (cinfo->upsample->need_context_rows) {
+      cinfo->output_scanline += lines_to_skip;
+      cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
+      main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row;
+      /* It is complex to properly move to the middle of a context block, so
+       * read the remaining lines instead of skipping them.
+       */
+      read_and_discard_scanlines(cinfo, lines_to_read);
+    } else {
+      cinfo->output_scanline += lines_to_skip;
+      cinfo->output_iMCU_row += lines_to_skip / lines_per_iMCU_row;
+      increment_simple_rowgroup_ctr(cinfo, lines_to_read);
+    }
+    if (!master->using_merged_upsample)
+      upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+    return num_lines;
+  }
+
+  /* Skip the iMCU rows that we can safely skip. */
+  for (i = 0; i < lines_to_skip; i += lines_per_iMCU_row) {
+    for (y = 0; y < coef->MCU_rows_per_iMCU_row; y++) {
+      for (x = 0; x < cinfo->MCUs_per_row; x++) {
+        /* Calling decode_mcu() with a NULL pointer causes it to discard the
+         * decoded coefficients.  This is ~5% faster for large subsets, but
+         * it's tough to tell a difference for smaller images.
+         */
+        if (!cinfo->entropy->insufficient_data)
+          cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
+        (*cinfo->entropy->decode_mcu) (cinfo, NULL);
+      }
+    }
+    cinfo->input_iMCU_row++;
+    cinfo->output_iMCU_row++;
+    if (cinfo->input_iMCU_row < cinfo->total_iMCU_rows)
+      start_iMCU_row(cinfo);
+    else
+      (*cinfo->inputctl->finish_input_pass) (cinfo);
+  }
+  cinfo->output_scanline += lines_to_skip;
+
+  if (cinfo->upsample->need_context_rows) {
+    /* Context-based upsampling keeps track of iMCU rows. */
+    main_ptr->iMCU_row_ctr += lines_to_skip / lines_per_iMCU_row;
+
+    /* It is complex to properly move to the middle of a context block, so
+     * read the remaining lines instead of skipping them.
+     */
+    read_and_discard_scanlines(cinfo, lines_to_read);
+  } else {
+    increment_simple_rowgroup_ctr(cinfo, lines_to_read);
+  }
+
+  /* Since skipping lines involves skipping the upsampling step, the value of
+   * "rows_to_go" will become invalid unless we set it here.  NOTE: This is a
+   * bit odd, since "rows_to_go" seems to be redundantly keeping track of
+   * output_scanline.
+   */
+  if (!master->using_merged_upsample)
+    upsample->rows_to_go = cinfo->output_height - cinfo->output_scanline;
+
+  /* Always skip the requested number of lines. */
+  return num_lines;
+}
+
+/*
+ * Alternate entry point to read raw data.
+ * Processes exactly one iMCU row per call, unless suspended.
+ */
+
+GLOBAL(JDIMENSION)
+_jpeg_read_raw_data(j_decompress_ptr cinfo, _JSAMPIMAGE data,
+                    JDIMENSION max_lines)
+{
+  JDIMENSION lines_per_iMCU_row;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if (cinfo->global_state != DSTATE_RAW_OK)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  if (cinfo->output_scanline >= cinfo->output_height) {
+    WARNMS(cinfo, JWRN_TOO_MUCH_DATA);
+    return 0;
+  }
+
+  /* Call progress monitor hook if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->pass_counter = (long)cinfo->output_scanline;
+    cinfo->progress->pass_limit = (long)cinfo->output_height;
+    (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+  }
+
+  /* Verify that at least one iMCU row can be returned. */
+  lines_per_iMCU_row = cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size;
+  if (max_lines < lines_per_iMCU_row)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Decompress directly into user's buffer. */
+  if (!(*cinfo->coef->_decompress_data) (cinfo, data))
+    return 0;                   /* suspension forced, can do nothing more */
+
+  /* OK, we processed one iMCU row. */
+  cinfo->output_scanline += lines_per_iMCU_row;
+  return lines_per_iMCU_row;
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 */
+
+
+#if BITS_IN_JSAMPLE == 8
+
+/* Additional entry points for buffered-image mode. */
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Initialize for an output pass in buffered-image mode.
+ */
+
+GLOBAL(boolean)
+jpeg_start_output(j_decompress_ptr cinfo, int scan_number)
+{
+  if (cinfo->global_state != DSTATE_BUFIMAGE &&
+      cinfo->global_state != DSTATE_PRESCAN)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  /* Limit scan number to valid range */
+  if (scan_number <= 0)
+    scan_number = 1;
+  if (cinfo->inputctl->eoi_reached && scan_number > cinfo->input_scan_number)
+    scan_number = cinfo->input_scan_number;
+  cinfo->output_scan_number = scan_number;
+  /* Perform any dummy output passes, and set up for the real pass */
+  return output_pass_setup(cinfo);
+}
+
+
+/*
+ * Finish up after an output pass in buffered-image mode.
+ *
+ * Returns FALSE if suspended.  The return value need be inspected only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(boolean)
+jpeg_finish_output(j_decompress_ptr cinfo)
+{
+  if ((cinfo->global_state == DSTATE_SCANNING ||
+       cinfo->global_state == DSTATE_RAW_OK) && cinfo->buffered_image) {
+    /* Terminate this pass. */
+    /* We do not require the whole pass to have been completed. */
+    (*cinfo->master->finish_output_pass) (cinfo);
+    cinfo->global_state = DSTATE_BUFPOST;
+  } else if (cinfo->global_state != DSTATE_BUFPOST) {
+    /* BUFPOST = repeat call after a suspension, anything else is error */
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  }
+  /* Read markers looking for SOS or EOI */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+         !cinfo->inputctl->eoi_reached) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return FALSE;             /* Suspend, come back later */
+  }
+  cinfo->global_state = DSTATE_BUFIMAGE;
+  return TRUE;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+#endif /* BITS_IN_JSAMPLE == 8 */
diff --git a/thirdparty/libjpeg-turbo/src/jdarith.c b/thirdparty/libjpeg-turbo/src/jdarith.c
new file mode 100644
index 00000000000..21575e80c72
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdarith.c
@@ -0,0 +1,782 @@
+/*
+ * jdarith.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Developed 1997-2015 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015-2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains portable arithmetic entropy encoding routines for JPEG
+ * (implementing Recommendation ITU-T T.81 | ISO/IEC 10918-1).
+ *
+ * Both sequential and progressive modes are supported in this single module.
+ *
+ * Suspension is not currently supported in this module.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+#define NEG_1  ((unsigned int)-1)
+
+
+/* Expanded entropy decoder object for arithmetic decoding. */
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  JLONG c;       /* C register, base of coding interval + input bit buffer */
+  JLONG a;               /* A register, normalized size of coding interval */
+  int ct;     /* bit shift counter, # of bits left in bit buffer part of C */
+                                                         /* init: ct = -16 */
+                                                         /* run: ct = 0..7 */
+                                                         /* error: ct = -1 */
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+  int dc_context[MAX_COMPS_IN_SCAN]; /* context index for DC conditioning */
+
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+
+  /* Pointers to statistics areas (these workspaces have image lifespan) */
+  unsigned char *dc_stats[NUM_ARITH_TBLS];
+  unsigned char *ac_stats[NUM_ARITH_TBLS];
+
+  /* Statistics bin for coding with fixed probability 0.5 */
+  unsigned char fixed_bin[4];
+} arith_entropy_decoder;
+
+typedef arith_entropy_decoder *arith_entropy_ptr;
+
+/* The following two definitions specify the allocation chunk size
+ * for the statistics area.
+ * According to sections F.1.4.4.1.3 and F.1.4.4.2, we need at least
+ * 49 statistics bins for DC, and 245 statistics bins for AC coding.
+ *
+ * We use a compact representation with 1 byte per statistics bin,
+ * thus the numbers directly represent byte sizes.
+ * This 1 byte per statistics bin contains the meaning of the MPS
+ * (more probable symbol) in the highest bit (mask 0x80), and the
+ * index into the probability estimation state machine table
+ * in the lower bits (mask 0x7F).
+ */
+
+#define DC_STAT_BINS  64
+#define AC_STAT_BINS  256
+
+
+LOCAL(int)
+get_byte(j_decompress_ptr cinfo)
+/* Read next input byte; we do not support suspension in this module. */
+{
+  struct jpeg_source_mgr *src = cinfo->src;
+
+  if (src->bytes_in_buffer == 0)
+    if (!(*src->fill_input_buffer) (cinfo))
+      ERREXIT(cinfo, JERR_CANT_SUSPEND);
+  src->bytes_in_buffer--;
+  return *src->next_input_byte++;
+}
+
+
+/*
+ * The core arithmetic decoding routine (common in JPEG and JBIG).
+ * This needs to go as fast as possible.
+ * Machine-dependent optimization facilities
+ * are not utilized in this portable implementation.
+ * However, this code should be fairly efficient and
+ * may be a good base for further optimizations anyway.
+ *
+ * Return value is 0 or 1 (binary decision).
+ *
+ * Note: I've changed the handling of the code base & bit
+ * buffer register C compared to other implementations
+ * based on the standards layout & procedures.
+ * While it also contains both the actual base of the
+ * coding interval (16 bits) and the next-bits buffer,
+ * the cut-point between these two parts is floating
+ * (instead of fixed) with the bit shift counter CT.
+ * Thus, we also need only one (variable instead of
+ * fixed size) shift for the LPS/MPS decision, and
+ * we can do away with any renormalization update
+ * of C (except for new data insertion, of course).
+ *
+ * I've also introduced a new scheme for accessing
+ * the probability estimation state machine table,
+ * derived from Markus Kuhn's JBIG implementation.
+ */
+
+LOCAL(int)
+arith_decode(j_decompress_ptr cinfo, unsigned char *st)
+{
+  register arith_entropy_ptr e = (arith_entropy_ptr)cinfo->entropy;
+  register unsigned char nl, nm;
+  register JLONG qe, temp;
+  register int sv, data;
+
+  /* Renormalization & data input per section D.2.6 */
+  while (e->a < 0x8000L) {
+    if (--e->ct < 0) {
+      /* Need to fetch next data byte */
+      if (cinfo->unread_marker)
+        data = 0;               /* stuff zero data */
+      else {
+        data = get_byte(cinfo); /* read next input byte */
+        if (data == 0xFF) {     /* zero stuff or marker code */
+          do data = get_byte(cinfo);
+          while (data == 0xFF); /* swallow extra 0xFF bytes */
+          if (data == 0)
+            data = 0xFF;        /* discard stuffed zero byte */
+          else {
+            /* Note: Different from the Huffman decoder, hitting
+             * a marker while processing the compressed data
+             * segment is legal in arithmetic coding.
+             * The convention is to supply zero data
+             * then until decoding is complete.
+             */
+            cinfo->unread_marker = data;
+            data = 0;
+          }
+        }
+      }
+      e->c = (e->c << 8) | data; /* insert data into C register */
+      if ((e->ct += 8) < 0)      /* update bit shift counter */
+        /* Need more initial bytes */
+        if (++e->ct == 0)
+          /* Got 2 initial bytes -> re-init A and exit loop */
+          e->a = 0x8000L; /* => e->a = 0x10000L after loop exit */
+    }
+    e->a <<= 1;
+  }
+
+  /* Fetch values from our compact representation of Table D.2:
+   * Qe values and probability estimation state machine
+   */
+  sv = *st;
+  qe = jpeg_aritab[sv & 0x7F];  /* => Qe_Value */
+  nl = qe & 0xFF;  qe >>= 8;    /* Next_Index_LPS + Switch_MPS */
+  nm = qe & 0xFF;  qe >>= 8;    /* Next_Index_MPS */
+
+  /* Decode & estimation procedures per sections D.2.4 & D.2.5 */
+  temp = e->a - qe;
+  e->a = temp;
+  temp <<= e->ct;
+  if (e->c >= temp) {
+    e->c -= temp;
+    /* Conditional LPS (less probable symbol) exchange */
+    if (e->a < qe) {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
+    } else {
+      e->a = qe;
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
+    }
+  } else if (e->a < 0x8000L) {
+    /* Conditional MPS (more probable symbol) exchange */
+    if (e->a < qe) {
+      *st = (sv & 0x80) ^ nl;   /* Estimate_after_LPS */
+      sv ^= 0x80;               /* Exchange LPS/MPS */
+    } else {
+      *st = (sv & 0x80) ^ nm;   /* Estimate_after_MPS */
+    }
+  }
+
+  return sv >> 7;
+}
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ */
+
+LOCAL(void)
+process_restart(j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  int ci;
+  jpeg_component_info *compptr;
+
+  /* Advance past the RSTn marker */
+  if (!(*cinfo->marker->read_restart_marker) (cinfo))
+    ERREXIT(cinfo, JERR_CANT_SUSPEND);
+
+  /* Re-initialize statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      memset(entropy->dc_stats[compptr->dc_tbl_no], 0, DC_STAT_BINS);
+      /* Reset DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if (!cinfo->progressive_mode || cinfo->Ss) {
+      memset(entropy->ac_stats[compptr->ac_tbl_no], 0, AC_STAT_BINS);
+    }
+  }
+
+  /* Reset arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Arithmetic MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * arithmetic-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+    ci = cinfo->MCU_membership[blkn];
+    tbl = cinfo->cur_comp_info[ci]->dc_tbl_no;
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2;  st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;               /* zero diff category */
+      else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1;  if (sign) v = -v;
+      entropy->last_dc_val[ci] = (entropy->last_dc_val[ci] + v) & 0xffff;
+    }
+
+    /* Scale and output the DC coefficient (assumes jpeg_natural_order[0]=0) */
+    (*block)[0] = (JCOEF)LEFT_SHIFT(entropy->last_dc_val[ci], cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  unsigned char *st;
+  int tbl, sign, k;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+  /* Figure F.20: Decode_AC_coefficients */
+  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (arith_decode(cinfo, st)) break;         /* EOB flag */
+    while (arith_decode(cinfo, st + 1) == 0) {
+      st += 3;  k++;
+      if (k > cinfo->Se) {
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
+      }
+    }
+    /* Figure F.21: Decoding nonzero value v */
+    /* Figure F.22: Decoding the sign of v */
+    sign = arith_decode(cinfo, entropy->fixed_bin);
+    st += 2;
+    /* Figure F.23: Decoding the magnitude category of v */
+    if ((m = arith_decode(cinfo, st)) != 0) {
+      if (arith_decode(cinfo, st)) {
+        m <<= 1;
+        st = entropy->ac_stats[tbl] +
+             (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+    }
+    v = m;
+    /* Figure F.24: Decoding the magnitude bit pattern of v */
+    st += 14;
+    while (m >>= 1)
+      if (arith_decode(cinfo, st)) v |= m;
+    v += 1;  if (sign) v = -v;
+    /* Scale and output coefficient in natural (dezigzagged) order */
+    (*block)[jpeg_natural_order[k]] = (JCOEF)((unsigned)v << cinfo->Al);
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  unsigned char *st;
+  int p1, blkn;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  st = entropy->fixed_bin;      /* use fixed probability estimation */
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    if (arith_decode(cinfo, st))
+      MCU_data[blkn][0][0] |= p1;
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  unsigned char *st;
+  int tbl, k, kex;
+  int p1, m1;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* There is always only one block per MCU */
+  block = MCU_data[0];
+  tbl = cinfo->cur_comp_info[0]->ac_tbl_no;
+
+  p1 = 1 << cinfo->Al;          /* 1 in the bit position being coded */
+  m1 = (NEG_1) << cinfo->Al;    /* -1 in the bit position being coded */
+
+  /* Establish EOBx (previous stage end-of-block) index */
+  for (kex = cinfo->Se; kex > 0; kex--)
+    if ((*block)[jpeg_natural_order[kex]]) break;
+
+  for (k = cinfo->Ss; k <= cinfo->Se; k++) {
+    st = entropy->ac_stats[tbl] + 3 * (k - 1);
+    if (k > kex)
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
+    for (;;) {
+      thiscoef = *block + jpeg_natural_order[k];
+      if (*thiscoef) {                          /* previously nonzero coef */
+        if (arith_decode(cinfo, st + 2)) {
+          if (*thiscoef < 0)
+            *thiscoef += (JCOEF)m1;
+          else
+            *thiscoef += (JCOEF)p1;
+        }
+        break;
+      }
+      if (arith_decode(cinfo, st + 1)) {        /* newly nonzero coef */
+        if (arith_decode(cinfo, entropy->fixed_bin))
+          *thiscoef = (JCOEF)m1;
+        else
+          *thiscoef = (JCOEF)p1;
+        break;
+      }
+      st += 3;  k++;
+      if (k > cinfo->Se) {
+        WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+        entropy->ct = -1;                       /* spectral overflow */
+        return TRUE;
+      }
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Decode one MCU's worth of arithmetic-compressed coefficients.
+ */
+
+METHODDEF(boolean)
+decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  jpeg_component_info *compptr;
+  JBLOCKROW block;
+  unsigned char *st;
+  int blkn, ci, tbl, sign, k;
+  int v, m;
+
+  /* Process restart marker if needed */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      process_restart(cinfo);
+    entropy->restarts_to_go--;
+  }
+
+  if (entropy->ct == -1) return TRUE;   /* if error do nothing */
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data ? MCU_data[blkn] : NULL;
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+
+    /* Sections F.2.4.1 & F.1.4.4.1: Decoding of DC coefficients */
+
+    tbl = compptr->dc_tbl_no;
+
+    /* Table F.4: Point to statistics bin S0 for DC coefficient coding */
+    st = entropy->dc_stats[tbl] + entropy->dc_context[ci];
+
+    /* Figure F.19: Decode_DC_DIFF */
+    if (arith_decode(cinfo, st) == 0)
+      entropy->dc_context[ci] = 0;
+    else {
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, st + 1);
+      st += 2;  st += sign;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        st = entropy->dc_stats[tbl] + 20;       /* Table F.4: X1 = 20 */
+        while (arith_decode(cinfo, st)) {
+          if ((m <<= 1) == 0x8000) {
+            WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+            entropy->ct = -1;                   /* magnitude overflow */
+            return TRUE;
+          }
+          st += 1;
+        }
+      }
+      /* Section F.1.4.4.1.2: Establish dc_context conditioning category */
+      if (m < (int)((1L << cinfo->arith_dc_L[tbl]) >> 1))
+        entropy->dc_context[ci] = 0;               /* zero diff category */
+      else if (m > (int)((1L << cinfo->arith_dc_U[tbl]) >> 1))
+        entropy->dc_context[ci] = 12 + (sign * 4); /* large diff category */
+      else
+        entropy->dc_context[ci] = 4 + (sign * 4);  /* small diff category */
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1;  if (sign) v = -v;
+      entropy->last_dc_val[ci] = (entropy->last_dc_val[ci] + v) & 0xffff;
+    }
+
+    if (block)
+      (*block)[0] = (JCOEF)entropy->last_dc_val[ci];
+
+    /* Sections F.2.4.2 & F.1.4.4.2: Decoding of AC coefficients */
+
+    tbl = compptr->ac_tbl_no;
+
+    /* Figure F.20: Decode_AC_coefficients */
+    for (k = 1; k <= DCTSIZE2 - 1; k++) {
+      st = entropy->ac_stats[tbl] + 3 * (k - 1);
+      if (arith_decode(cinfo, st)) break;       /* EOB flag */
+      while (arith_decode(cinfo, st + 1) == 0) {
+        st += 3;  k++;
+        if (k > DCTSIZE2 - 1) {
+          WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+          entropy->ct = -1;                     /* spectral overflow */
+          return TRUE;
+        }
+      }
+      /* Figure F.21: Decoding nonzero value v */
+      /* Figure F.22: Decoding the sign of v */
+      sign = arith_decode(cinfo, entropy->fixed_bin);
+      st += 2;
+      /* Figure F.23: Decoding the magnitude category of v */
+      if ((m = arith_decode(cinfo, st)) != 0) {
+        if (arith_decode(cinfo, st)) {
+          m <<= 1;
+          st = entropy->ac_stats[tbl] +
+               (k <= cinfo->arith_ac_K[tbl] ? 189 : 217);
+          while (arith_decode(cinfo, st)) {
+            if ((m <<= 1) == 0x8000) {
+              WARNMS(cinfo, JWRN_ARITH_BAD_CODE);
+              entropy->ct = -1;                 /* magnitude overflow */
+              return TRUE;
+            }
+            st += 1;
+          }
+        }
+      }
+      v = m;
+      /* Figure F.24: Decoding the magnitude bit pattern of v */
+      st += 14;
+      while (m >>= 1)
+        if (arith_decode(cinfo, st)) v |= m;
+      v += 1;  if (sign) v = -v;
+      if (block)
+        (*block)[jpeg_natural_order[k]] = (JCOEF)v;
+    }
+  }
+
+  return TRUE;
+}
+
+
+/*
+ * Initialize for an arithmetic-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass(j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy = (arith_entropy_ptr)cinfo->entropy;
+  int ci, tbl;
+  jpeg_component_info *compptr;
+
+  if (cinfo->progressive_mode) {
+    /* Validate progressive scan parameters */
+    if (cinfo->Ss == 0) {
+      if (cinfo->Se != 0)
+        goto bad;
+    } else {
+      /* need not check Ss/Se < 0 since they came from unsigned bytes */
+      if (cinfo->Se < cinfo->Ss || cinfo->Se > DCTSIZE2 - 1)
+        goto bad;
+      /* AC scans may have only one component */
+      if (cinfo->comps_in_scan != 1)
+        goto bad;
+    }
+    if (cinfo->Ah != 0) {
+      /* Successive approximation refinement scan: must have Al = Ah-1. */
+      if (cinfo->Ah - 1 != cinfo->Al)
+        goto bad;
+    }
+    if (cinfo->Al > 13) {       /* need not check for < 0 */
+bad:
+      ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+               cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+    }
+    /* Update progression status, and verify that scan order is legal.
+     * Note that inter-scan inconsistencies are treated as warnings
+     * not fatal errors ... not clear if this is right way to behave.
+     */
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      int coefi, cindex = cinfo->cur_comp_info[ci]->component_index;
+      int *coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+      int *prev_coef_bit_ptr =
+        &cinfo->coef_bits[cindex + cinfo->num_components][0];
+      if (cinfo->Ss && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+      for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+        if (cinfo->input_scan_number > 1)
+          prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+        else
+          prev_coef_bit_ptr[coefi] = 0;
+      }
+      for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+        int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+        if (cinfo->Ah != expected)
+          WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+        coef_bit_ptr[coefi] = cinfo->Al;
+      }
+    }
+    /* Select MCU decoding routine */
+    if (cinfo->Ah == 0) {
+      if (cinfo->Ss == 0)
+        entropy->pub.decode_mcu = decode_mcu_DC_first;
+      else
+        entropy->pub.decode_mcu = decode_mcu_AC_first;
+    } else {
+      if (cinfo->Ss == 0)
+        entropy->pub.decode_mcu = decode_mcu_DC_refine;
+      else
+        entropy->pub.decode_mcu = decode_mcu_AC_refine;
+    }
+  } else {
+    /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+     * This ought to be an error condition, but we make it a warning.
+     */
+    if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 ||
+        cinfo->Ah != 0 || cinfo->Al != 0)
+      WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+    /* Select MCU decoding routine */
+    entropy->pub.decode_mcu = decode_mcu;
+  }
+
+  /* Allocate & initialize requested statistics areas */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    if (!cinfo->progressive_mode || (cinfo->Ss == 0 && cinfo->Ah == 0)) {
+      tbl = compptr->dc_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->dc_stats[tbl] == NULL)
+        entropy->dc_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
+          ((j_common_ptr)cinfo, JPOOL_IMAGE, DC_STAT_BINS);
+      memset(entropy->dc_stats[tbl], 0, DC_STAT_BINS);
+      /* Initialize DC predictions to 0 */
+      entropy->last_dc_val[ci] = 0;
+      entropy->dc_context[ci] = 0;
+    }
+    if (!cinfo->progressive_mode || cinfo->Ss) {
+      tbl = compptr->ac_tbl_no;
+      if (tbl < 0 || tbl >= NUM_ARITH_TBLS)
+        ERREXIT1(cinfo, JERR_NO_ARITH_TABLE, tbl);
+      if (entropy->ac_stats[tbl] == NULL)
+        entropy->ac_stats[tbl] = (unsigned char *)(*cinfo->mem->alloc_small)
+          ((j_common_ptr)cinfo, JPOOL_IMAGE, AC_STAT_BINS);
+      memset(entropy->ac_stats[tbl], 0, AC_STAT_BINS);
+    }
+  }
+
+  /* Initialize arithmetic decoding variables */
+  entropy->c = 0;
+  entropy->a = 0;
+  entropy->ct = -16;    /* force reading 2 initial bytes to fill C */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Module initialization routine for arithmetic entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_arith_decoder(j_decompress_ptr cinfo)
+{
+  arith_entropy_ptr entropy;
+  int i;
+
+  entropy = (arith_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(arith_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
+  entropy->pub.start_pass = start_pass;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    entropy->dc_stats[i] = NULL;
+    entropy->ac_stats[i] = NULL;
+  }
+
+  /* Initialize index for fixed probability estimation */
+  entropy->fixed_bin[0] = 113;
+
+  if (cinfo->progressive_mode) {
+    /* Create progression status table */
+    int *coef_bit_ptr, ci;
+    cinfo->coef_bits = (int (*)[DCTSIZE2])
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  cinfo->num_components * 2 * DCTSIZE2 *
+                                  sizeof(int));
+    coef_bit_ptr = &cinfo->coef_bits[0][0];
+    for (ci = 0; ci < cinfo->num_components; ci++)
+      for (i = 0; i < DCTSIZE2; i++)
+        *coef_bit_ptr++ = -1;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdatadst-tj.c b/thirdparty/libjpeg-turbo/src/jdatadst-tj.c
new file mode 100644
index 00000000000..270b2c2c3ea
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdatadst-tj.c
@@ -0,0 +1,199 @@
+/*
+ * jdatadst-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2014, 2016, 2019, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+void jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
+                      size_t *outsize, boolean alloc);
+
+
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char **outbuffer;    /* target buffer */
+  size_t *outsize;
+  unsigned char *newbuffer;     /* newly allocated buffer */
+  JOCTET *buffer;               /* start of buffer */
+  size_t bufsize;
+  boolean alloc;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr *my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_mem_destination(j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_mem_output_buffer(j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET *nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  if (!dest->alloc) ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *)MALLOC(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);
+
+  free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_mem_destination(j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  if (dest->alloc) *dest->outbuffer = dest->buffer;
+  *dest->outsize = dest->bufsize - dest->pub.free_in_buffer;
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest_tj(j_compress_ptr cinfo, unsigned char **outbuffer,
+                 size_t *outsize, boolean alloc)
+{
+  boolean reused = FALSE;
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_mem_destination_mgr));
+    dest = (my_mem_dest_ptr)cinfo->dest;
+    dest->newbuffer = NULL;
+    dest->buffer = NULL;
+  } else if (cinfo->dest->init_destination != init_mem_destination) {
+    /* It is unsafe to reuse the existing destination manager unless it was
+     * created by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest = (my_mem_dest_ptr)cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  if (dest->buffer == *outbuffer && *outbuffer != NULL && alloc)
+    reused = TRUE;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->alloc = alloc;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    if (alloc) {
+      /* Allocate initial buffer */
+      dest->newbuffer = *outbuffer = (unsigned char *)MALLOC(OUTPUT_BUF_SIZE);
+      if (dest->newbuffer == NULL)
+        ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+      *outsize = OUTPUT_BUF_SIZE;
+    } else
+      ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  if (!reused)
+    dest->bufsize = *outsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdatadst.c b/thirdparty/libjpeg-turbo/src/jdatadst.c
new file mode 100644
index 00000000000..529f93b4904
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdatadst.c
@@ -0,0 +1,277 @@
+/*
+ * jdatadst.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2012 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains compression data destination routines for the case of
+ * emitting JPEG data to memory or to a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different destination manager.
+ * IMPORTANT: we assume that fwrite() will correctly transcribe an array of
+ * JOCTETs into 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data destination object for stdio output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  FILE *outfile;                /* target stream */
+  JOCTET *buffer;               /* start of buffer */
+} my_destination_mgr;
+
+typedef my_destination_mgr *my_dest_ptr;
+
+#define OUTPUT_BUF_SIZE  4096   /* choose an efficiently fwrite'able size */
+
+
+/* Expanded data destination object for memory output */
+
+typedef struct {
+  struct jpeg_destination_mgr pub; /* public fields */
+
+  unsigned char **outbuffer;    /* target buffer */
+  unsigned long *outsize;
+  unsigned char *newbuffer;     /* newly allocated buffer */
+  JOCTET *buffer;               /* start of buffer */
+  size_t bufsize;
+} my_mem_destination_mgr;
+
+typedef my_mem_destination_mgr *my_mem_dest_ptr;
+
+
+/*
+ * Initialize destination --- called by jpeg_start_compress
+ * before any data is actually written.
+ */
+
+METHODDEF(void)
+init_destination(j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr)cinfo->dest;
+
+  /* Allocate the output buffer --- it will be released when done with image */
+  dest->buffer = (JOCTET *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                OUTPUT_BUF_SIZE * sizeof(JOCTET));
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+}
+
+METHODDEF(void)
+init_mem_destination(j_compress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Empty the output buffer --- called whenever buffer fills up.
+ *
+ * In typical applications, this should write the entire output buffer
+ * (ignoring the current state of next_output_byte & free_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been dumped.
+ *
+ * In applications that need to be able to suspend compression due to output
+ * overrun, a FALSE return indicates that the buffer cannot be emptied now.
+ * In this situation, the compressor will return to its caller (possibly with
+ * an indication that it has not accepted all the supplied scanlines).  The
+ * application should resume compression after it has made more room in the
+ * output buffer.  Note that there are substantial restrictions on the use of
+ * suspension --- see the documentation.
+ *
+ * When suspending, the compressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_output_byte & free_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point will be regenerated after resumption, so do not
+ * write it out when emptying the buffer externally.
+ */
+
+METHODDEF(boolean)
+empty_output_buffer(j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr)cinfo->dest;
+
+  if (fwrite(dest->buffer, 1, OUTPUT_BUF_SIZE, dest->outfile) !=
+      (size_t)OUTPUT_BUF_SIZE)
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+
+  dest->pub.next_output_byte = dest->buffer;
+  dest->pub.free_in_buffer = OUTPUT_BUF_SIZE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+empty_mem_output_buffer(j_compress_ptr cinfo)
+{
+  size_t nextsize;
+  JOCTET *nextbuffer;
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  /* Try to allocate new buffer with double size */
+  nextsize = dest->bufsize * 2;
+  nextbuffer = (JOCTET *)malloc(nextsize);
+
+  if (nextbuffer == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+
+  memcpy(nextbuffer, dest->buffer, dest->bufsize);
+
+  free(dest->newbuffer);
+
+  dest->newbuffer = nextbuffer;
+
+  dest->pub.next_output_byte = nextbuffer + dest->bufsize;
+  dest->pub.free_in_buffer = dest->bufsize;
+
+  dest->buffer = nextbuffer;
+  dest->bufsize = nextsize;
+
+  return TRUE;
+}
+
+
+/*
+ * Terminate destination --- called by jpeg_finish_compress
+ * after all data has been written.  Usually needs to flush buffer.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_destination(j_compress_ptr cinfo)
+{
+  my_dest_ptr dest = (my_dest_ptr)cinfo->dest;
+  size_t datacount = OUTPUT_BUF_SIZE - dest->pub.free_in_buffer;
+
+  /* Write any data remaining in the buffer */
+  if (datacount > 0) {
+    if (fwrite(dest->buffer, 1, datacount, dest->outfile) != datacount)
+      ERREXIT(cinfo, JERR_FILE_WRITE);
+  }
+  fflush(dest->outfile);
+  /* Make sure we wrote the output file OK */
+  if (ferror(dest->outfile))
+    ERREXIT(cinfo, JERR_FILE_WRITE);
+}
+
+METHODDEF(void)
+term_mem_destination(j_compress_ptr cinfo)
+{
+  my_mem_dest_ptr dest = (my_mem_dest_ptr)cinfo->dest;
+
+  *dest->outbuffer = dest->buffer;
+  *dest->outsize = (unsigned long)(dest->bufsize - dest->pub.free_in_buffer);
+}
+
+
+/*
+ * Prepare for output to a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing compression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile)
+{
+  my_dest_ptr dest;
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same file without re-executing jpeg_stdio_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_destination_mgr));
+  } else if (cinfo->dest->init_destination != init_destination) {
+    /* It is unsafe to reuse the existing destination manager unless it was
+     * created by this function.  Otherwise, there is no guarantee that the
+     * opaque structure is the right size.  Note that we could just create a
+     * new structure, but the old structure would not be freed until
+     * jpeg_destroy_compress() was called.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest = (my_dest_ptr)cinfo->dest;
+  dest->pub.init_destination = init_destination;
+  dest->pub.empty_output_buffer = empty_output_buffer;
+  dest->pub.term_destination = term_destination;
+  dest->outfile = outfile;
+}
+
+
+/*
+ * Prepare for output to a memory buffer.
+ * The caller may supply an own initial buffer with appropriate size.
+ * Otherwise, or when the actual data output exceeds the given size,
+ * the library adapts the buffer size as necessary.
+ * The standard library functions malloc/free are used for allocating
+ * larger memory, so the buffer is available to the application after
+ * finishing compression, and then the application is responsible for
+ * freeing the requested memory.
+ * Note:  An initial buffer supplied by the caller is expected to be
+ * managed by the application.  The library does not free such buffer
+ * when allocating a larger buffer.
+ */
+
+GLOBAL(void)
+jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
+              unsigned long *outsize)
+{
+  my_mem_dest_ptr dest;
+
+  if (outbuffer == NULL || outsize == NULL)     /* sanity check */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+
+  /* The destination object is made permanent so that multiple JPEG images
+   * can be written to the same buffer without re-executing jpeg_mem_dest.
+   */
+  if (cinfo->dest == NULL) {    /* first time for this JPEG object? */
+    cinfo->dest = (struct jpeg_destination_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_mem_destination_mgr));
+  } else if (cinfo->dest->init_destination != init_mem_destination) {
+    /* It is unsafe to reuse the existing destination manager unless it was
+     * created by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  dest = (my_mem_dest_ptr)cinfo->dest;
+  dest->pub.init_destination = init_mem_destination;
+  dest->pub.empty_output_buffer = empty_mem_output_buffer;
+  dest->pub.term_destination = term_mem_destination;
+  dest->outbuffer = outbuffer;
+  dest->outsize = outsize;
+  dest->newbuffer = NULL;
+
+  if (*outbuffer == NULL || *outsize == 0) {
+    /* Allocate initial buffer */
+    dest->newbuffer = *outbuffer = (unsigned char *)malloc(OUTPUT_BUF_SIZE);
+    if (dest->newbuffer == NULL)
+      ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 10);
+    *outsize = OUTPUT_BUF_SIZE;
+  }
+
+  dest->pub.next_output_byte = dest->buffer = *outbuffer;
+  dest->pub.free_in_buffer = dest->bufsize = *outsize;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdatasrc-tj.c b/thirdparty/libjpeg-turbo/src/jdatasrc-tj.c
new file mode 100644
index 00000000000..a5970b53fe8
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdatasrc-tj.c
@@ -0,0 +1,194 @@
+/*
+ * jdatasrc-tj.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2016, 2019, 2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+void jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                     size_t insize);
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_mem_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_mem_input_buffer(j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data(j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr *src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long)src->bytes_in_buffer) {
+      num_bytes -= (long)src->bytes_in_buffer;
+      (void)(*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t)num_bytes;
+    src->bytes_in_buffer -= (size_t)num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src_tj(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+                size_t insize)
+{
+  struct jpeg_source_mgr *src;
+
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(struct jpeg_source_mgr));
+  } else if (cinfo->src->init_source != init_mem_source) {
+    /* It is unsafe to reuse the existing source manager unless it was created
+     * by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = insize;
+  src->next_input_byte = (const JOCTET *)inbuffer;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdatasrc.c b/thirdparty/libjpeg-turbo/src/jdatasrc.c
new file mode 100644
index 00000000000..dc135f43a47
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdatasrc.c
@@ -0,0 +1,289 @@
+/*
+ * jdatasrc.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2009-2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, 2016, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains decompression data source routines for the case of
+ * reading JPEG data from memory or from a file (or any stdio stream).
+ * While these routines are sufficient for most applications,
+ * some will want to use a different source manager.
+ * IMPORTANT: we assume that fread() will correctly transcribe an array of
+ * JOCTETs from 8-bit-wide elements on external storage.  If char is wider
+ * than 8 bits on your machine, you may need to do some tweaking.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+/* Expanded data source object for stdio input */
+
+typedef struct {
+  struct jpeg_source_mgr pub;   /* public fields */
+
+  FILE *infile;                 /* source stream */
+  JOCTET *buffer;               /* start of buffer */
+  boolean start_of_file;        /* have we gotten any data yet? */
+} my_source_mgr;
+
+typedef my_source_mgr *my_src_ptr;
+
+#define INPUT_BUF_SIZE  4096    /* choose an efficiently fread'able size */
+
+
+/*
+ * Initialize source --- called by jpeg_read_header
+ * before any data is actually read.
+ */
+
+METHODDEF(void)
+init_source(j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr)cinfo->src;
+
+  /* We reset the empty-input-file flag for each image,
+   * but we don't clear the input buffer.
+   * This is correct behavior for reading a series of images from one source.
+   */
+  src->start_of_file = TRUE;
+}
+
+METHODDEF(void)
+init_mem_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Fill the input buffer --- called whenever buffer is emptied.
+ *
+ * In typical applications, this should read fresh data into the buffer
+ * (ignoring the current state of next_input_byte & bytes_in_buffer),
+ * reset the pointer & count to the start of the buffer, and return TRUE
+ * indicating that the buffer has been reloaded.  It is not necessary to
+ * fill the buffer entirely, only to obtain at least one more byte.
+ *
+ * There is no such thing as an EOF return.  If the end of the file has been
+ * reached, the routine has a choice of ERREXIT() or inserting fake data into
+ * the buffer.  In most cases, generating a warning message and inserting a
+ * fake EOI marker is the best course of action --- this will allow the
+ * decompressor to output however much of the image is there.  However,
+ * the resulting error message is misleading if the real problem is an empty
+ * input file, so we handle that case specially.
+ *
+ * In applications that need to be able to suspend compression due to input
+ * not being available yet, a FALSE return indicates that no more data can be
+ * obtained right now, but more may be forthcoming later.  In this situation,
+ * the decompressor will return to its caller (with an indication of the
+ * number of scanlines it has read, if any).  The application should resume
+ * decompression after it has loaded more data into the input buffer.  Note
+ * that there are substantial restrictions on the use of suspension --- see
+ * the documentation.
+ *
+ * When suspending, the decompressor will back up to a convenient restart point
+ * (typically the start of the current MCU). next_input_byte & bytes_in_buffer
+ * indicate where the restart point will be if the current call returns FALSE.
+ * Data beyond this point must be rescanned after resumption, so move it to
+ * the front of the buffer rather than discarding it.
+ */
+
+METHODDEF(boolean)
+fill_input_buffer(j_decompress_ptr cinfo)
+{
+  my_src_ptr src = (my_src_ptr)cinfo->src;
+  size_t nbytes;
+
+  nbytes = fread(src->buffer, 1, INPUT_BUF_SIZE, src->infile);
+
+  if (nbytes <= 0) {
+    if (src->start_of_file)     /* Treat empty input file as fatal error */
+      ERREXIT(cinfo, JERR_INPUT_EMPTY);
+    WARNMS(cinfo, JWRN_JPEG_EOF);
+    /* Insert a fake EOI marker */
+    src->buffer[0] = (JOCTET)0xFF;
+    src->buffer[1] = (JOCTET)JPEG_EOI;
+    nbytes = 2;
+  }
+
+  src->pub.next_input_byte = src->buffer;
+  src->pub.bytes_in_buffer = nbytes;
+  src->start_of_file = FALSE;
+
+  return TRUE;
+}
+
+METHODDEF(boolean)
+fill_mem_input_buffer(j_decompress_ptr cinfo)
+{
+  static const JOCTET mybuffer[4] = {
+    (JOCTET)0xFF, (JOCTET)JPEG_EOI, 0, 0
+  };
+
+  /* The whole JPEG data is expected to reside in the supplied memory
+   * buffer, so any request for more data beyond the given buffer size
+   * is treated as an error.
+   */
+  WARNMS(cinfo, JWRN_JPEG_EOF);
+
+  /* Insert a fake EOI marker */
+
+  cinfo->src->next_input_byte = mybuffer;
+  cinfo->src->bytes_in_buffer = 2;
+
+  return TRUE;
+}
+
+
+/*
+ * Skip data --- used to skip over a potentially large amount of
+ * uninteresting data (such as an APPn marker).
+ *
+ * Writers of suspendable-input applications must note that skip_input_data
+ * is not granted the right to give a suspension return.  If the skip extends
+ * beyond the data currently in the buffer, the buffer can be marked empty so
+ * that the next read will cause a fill_input_buffer call that can suspend.
+ * Arranging for additional bytes to be discarded before reloading the input
+ * buffer is the application writer's problem.
+ */
+
+METHODDEF(void)
+skip_input_data(j_decompress_ptr cinfo, long num_bytes)
+{
+  struct jpeg_source_mgr *src = cinfo->src;
+
+  /* Just a dumb implementation for now.  Could use fseek() except
+   * it doesn't work on pipes.  Not clear that being smart is worth
+   * any trouble anyway --- large skips are infrequent.
+   */
+  if (num_bytes > 0) {
+    while (num_bytes > (long)src->bytes_in_buffer) {
+      num_bytes -= (long)src->bytes_in_buffer;
+      (void)(*src->fill_input_buffer) (cinfo);
+      /* note we assume that fill_input_buffer will never return FALSE,
+       * so suspension need not be handled.
+       */
+    }
+    src->next_input_byte += (size_t)num_bytes;
+    src->bytes_in_buffer -= (size_t)num_bytes;
+  }
+}
+
+
+/*
+ * An additional method that can be provided by data source modules is the
+ * resync_to_restart method for error recovery in the presence of RST markers.
+ * For the moment, this source module just uses the default resync method
+ * provided by the JPEG library.  That method assumes that no backtracking
+ * is possible.
+ */
+
+
+/*
+ * Terminate source --- called by jpeg_finish_decompress
+ * after all data has been read.  Often a no-op.
+ *
+ * NB: *not* called by jpeg_abort or jpeg_destroy; surrounding
+ * application must deal with any cleanup that should happen even
+ * for error exit.
+ */
+
+METHODDEF(void)
+term_source(j_decompress_ptr cinfo)
+{
+  /* no work necessary here */
+}
+
+
+/*
+ * Prepare for input from a stdio stream.
+ * The caller must have already opened the stream, and is responsible
+ * for closing it after finishing decompression.
+ */
+
+GLOBAL(void)
+jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile)
+{
+  my_src_ptr src;
+
+  /* The source object and input buffer are made permanent so that a series
+   * of JPEG images can be read from the same file by calling jpeg_stdio_src
+   * only before the first one.  (If we discarded the buffer at the end of
+   * one image, we'd likely lose the start of the next one.)
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(my_source_mgr));
+    src = (my_src_ptr)cinfo->src;
+    src->buffer = (JOCTET *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  INPUT_BUF_SIZE * sizeof(JOCTET));
+  } else if (cinfo->src->init_source != init_source) {
+    /* It is unsafe to reuse the existing source manager unless it was created
+     * by this function.  Otherwise, there is no guarantee that the opaque
+     * structure is the right size.  Note that we could just create a new
+     * structure, but the old structure would not be freed until
+     * jpeg_destroy_decompress() was called.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  src = (my_src_ptr)cinfo->src;
+  src->pub.init_source = init_source;
+  src->pub.fill_input_buffer = fill_input_buffer;
+  src->pub.skip_input_data = skip_input_data;
+  src->pub.resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->pub.term_source = term_source;
+  src->infile = infile;
+  src->pub.bytes_in_buffer = 0; /* forces fill_input_buffer on first read */
+  src->pub.next_input_byte = NULL; /* until buffer loaded */
+}
+
+
+/*
+ * Prepare for input from a supplied memory buffer.
+ * The buffer must contain the whole JPEG data.
+ */
+
+GLOBAL(void)
+jpeg_mem_src(j_decompress_ptr cinfo, const unsigned char *inbuffer,
+             unsigned long insize)
+{
+  struct jpeg_source_mgr *src;
+
+  if (inbuffer == NULL || insize == 0)  /* Treat empty input as fatal error */
+    ERREXIT(cinfo, JERR_INPUT_EMPTY);
+
+  /* The source object is made permanent so that a series of JPEG images
+   * can be read from the same buffer by calling jpeg_mem_src only before
+   * the first one.
+   */
+  if (cinfo->src == NULL) {     /* first time for this JPEG object? */
+    cinfo->src = (struct jpeg_source_mgr *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                  sizeof(struct jpeg_source_mgr));
+  } else if (cinfo->src->init_source != init_mem_source) {
+    /* It is unsafe to reuse the existing source manager unless it was created
+     * by this function.
+     */
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  }
+
+  src = cinfo->src;
+  src->init_source = init_mem_source;
+  src->fill_input_buffer = fill_mem_input_buffer;
+  src->skip_input_data = skip_input_data;
+  src->resync_to_restart = jpeg_resync_to_restart; /* use default method */
+  src->term_source = term_source;
+  src->bytes_in_buffer = (size_t)insize;
+  src->next_input_byte = (const JOCTET *)inbuffer;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdcoefct.c b/thirdparty/libjpeg-turbo/src/jdcoefct.c
new file mode 100644
index 00000000000..40ce27259ba
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdcoefct.c
@@ -0,0 +1,885 @@
+/*
+ * jdcoefct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2010, 2015-2016, 2019-2020, 2022-2023, D. R. Commander.
+ * Copyright (C) 2015, 2020, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the coefficient buffer controller for decompression.
+ * This controller is the top level of the lossy JPEG decompressor proper.
+ * The coefficient buffer lies between entropy decoding and inverse-DCT steps.
+ *
+ * In buffered-image mode, this controller is the interface between
+ * input-oriented processing and output-oriented processing.
+ * Also, the input side (only) is used when reading a file for transcoding.
+ */
+
+#include "jinclude.h"
+#include "jdcoefct.h"
+#include "jpegapicomp.h"
+#include "jsamplecomp.h"
+
+
+/* Forward declarations */
+METHODDEF(int) decompress_onepass(j_decompress_ptr cinfo,
+                                  _JSAMPIMAGE output_buf);
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+METHODDEF(int) decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf);
+#endif
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+LOCAL(boolean) smoothing_ok(j_decompress_ptr cinfo);
+METHODDEF(int) decompress_smooth_data(j_decompress_ptr cinfo,
+                                      _JSAMPIMAGE output_buf);
+#endif
+
+
+/*
+ * Initialize for an input processing pass.
+ */
+
+METHODDEF(void)
+start_input_pass(j_decompress_ptr cinfo)
+{
+  cinfo->input_iMCU_row = 0;
+  start_iMCU_row(cinfo);
+}
+
+
+/*
+ * Initialize for an output processing pass.
+ */
+
+METHODDEF(void)
+start_output_pass(j_decompress_ptr cinfo)
+{
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  /* If multipass, check to see whether to use block smoothing on this pass */
+  if (coef->pub.coef_arrays != NULL) {
+    if (cinfo->do_block_smoothing && smoothing_ok(cinfo))
+      coef->pub._decompress_data = decompress_smooth_data;
+    else
+      coef->pub._decompress_data = decompress_data;
+  }
+#endif
+  cinfo->output_iMCU_row = 0;
+}
+
+
+/*
+ * Decompress and return some data in the single-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Input and output must run in lockstep since we have only a one-MCU buffer.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image,
+ * which we index according to the component's SOF position.
+ */
+
+METHODDEF(int)
+decompress_onepass(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  JDIMENSION last_MCU_col = cinfo->MCUs_per_row - 1;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  int blkn, ci, xindex, yindex, yoffset, useful_width;
+  _JSAMPARRAY output_ptr;
+  JDIMENSION start_col, output_col;
+  jpeg_component_info *compptr;
+  _inverse_DCT_method_ptr inverse_DCT;
+
+  /* Loop to process as much as one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num <= last_MCU_col;
+         MCU_col_num++) {
+      /* Try to fetch an MCU.  Entropy decoder expects buffer to be zeroed. */
+      jzero_far((void *)coef->MCU_buffer[0],
+                (size_t)(cinfo->blocks_in_MCU * sizeof(JBLOCK)));
+      if (!cinfo->entropy->insufficient_data)
+        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
+      if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
+      }
+
+      /* Only perform the IDCT on blocks that are contained within the desired
+       * cropping region.
+       */
+      if (MCU_col_num >= cinfo->master->first_iMCU_col &&
+          MCU_col_num <= cinfo->master->last_iMCU_col) {
+        /* Determine where data should go in output_buf and do the IDCT thing.
+         * We skip dummy blocks at the right and bottom edges (but blkn gets
+         * incremented past them!).  Note the inner loop relies on having
+         * allocated the MCU_buffer[] blocks sequentially.
+         */
+        blkn = 0;               /* index of current DCT block within MCU */
+        for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+          compptr = cinfo->cur_comp_info[ci];
+          /* Don't bother to IDCT an uninteresting component. */
+          if (!compptr->component_needed) {
+            blkn += compptr->MCU_blocks;
+            continue;
+          }
+          inverse_DCT = cinfo->idct->_inverse_DCT[compptr->component_index];
+          useful_width = (MCU_col_num < last_MCU_col) ?
+                         compptr->MCU_width : compptr->last_col_width;
+          output_ptr = output_buf[compptr->component_index] +
+                       yoffset * compptr->_DCT_scaled_size;
+          start_col = (MCU_col_num - cinfo->master->first_iMCU_col) *
+                      compptr->MCU_sample_width;
+          for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+            if (cinfo->input_iMCU_row < last_iMCU_row ||
+                yoffset + yindex < compptr->last_row_height) {
+              output_col = start_col;
+              for (xindex = 0; xindex < useful_width; xindex++) {
+                (*inverse_DCT) (cinfo, compptr,
+                                (JCOEFPTR)coef->MCU_buffer[blkn + xindex],
+                                output_ptr, output_col);
+                output_col += compptr->_DCT_scaled_size;
+              }
+            }
+            blkn += compptr->MCU_width;
+            output_ptr += compptr->_DCT_scaled_size;
+          }
+        }
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  cinfo->output_iMCU_row++;
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Dummy consume-input routine for single-pass operation.
+ */
+
+METHODDEF(int)
+dummy_consume_data(j_decompress_ptr cinfo)
+{
+  return JPEG_SUSPENDED;        /* Always indicate nothing was done */
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Consume input data and store it in the full-image coefficient buffer.
+ * We read as much as one fully interleaved MCU row ("iMCU" row) per call,
+ * ie, v_samp_factor block rows for each component in the scan.
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ */
+
+METHODDEF(int)
+consume_data(j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION MCU_col_num;       /* index of current MCU within row */
+  int blkn, ci, xindex, yindex, yoffset;
+  JDIMENSION start_col;
+  JBLOCKARRAY buffer[MAX_COMPS_IN_SCAN];
+  JBLOCKROW buffer_ptr;
+  jpeg_component_info *compptr;
+
+  /* Align the virtual buffers for the components used in this scan. */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    buffer[ci] = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef->whole_image[compptr->component_index],
+       cinfo->input_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    /* Note: entropy decoder expects buffer to be zeroed,
+     * but this is handled automatically by the memory manager
+     * because we requested a pre-zeroed array.
+     */
+  }
+
+  /* Loop to process one whole iMCU row */
+  for (yoffset = coef->MCU_vert_offset; yoffset < coef->MCU_rows_per_iMCU_row;
+       yoffset++) {
+    for (MCU_col_num = coef->MCU_ctr; MCU_col_num < cinfo->MCUs_per_row;
+         MCU_col_num++) {
+      /* Construct list of pointers to DCT blocks belonging to this MCU */
+      blkn = 0;                 /* index of current DCT block within MCU */
+      for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+        compptr = cinfo->cur_comp_info[ci];
+        start_col = MCU_col_num * compptr->MCU_width;
+        for (yindex = 0; yindex < compptr->MCU_height; yindex++) {
+          buffer_ptr = buffer[ci][yindex + yoffset] + start_col;
+          for (xindex = 0; xindex < compptr->MCU_width; xindex++) {
+            coef->MCU_buffer[blkn++] = buffer_ptr++;
+          }
+        }
+      }
+      if (!cinfo->entropy->insufficient_data)
+        cinfo->master->last_good_iMCU_row = cinfo->input_iMCU_row;
+      /* Try to fetch the MCU. */
+      if (!(*cinfo->entropy->decode_mcu) (cinfo, coef->MCU_buffer)) {
+        /* Suspension forced; update state counters and exit */
+        coef->MCU_vert_offset = yoffset;
+        coef->MCU_ctr = MCU_col_num;
+        return JPEG_SUSPENDED;
+      }
+    }
+    /* Completed an MCU row, but perhaps not an iMCU row */
+    coef->MCU_ctr = 0;
+  }
+  /* Completed the iMCU row, advance counters for next one */
+  if (++(cinfo->input_iMCU_row) < cinfo->total_iMCU_rows) {
+    start_iMCU_row(cinfo);
+    return JPEG_ROW_COMPLETED;
+  }
+  /* Completed the scan */
+  (*cinfo->inputctl->finish_input_pass) (cinfo);
+  return JPEG_SCAN_COMPLETED;
+}
+
+
+/*
+ * Decompress and return some data in the multi-pass case.
+ * Always attempts to emit one fully interleaved MCU row ("iMCU" row).
+ * Return value is JPEG_ROW_COMPLETED, JPEG_SCAN_COMPLETED, or JPEG_SUSPENDED.
+ *
+ * NB: output_buf contains a plane for each component in image.
+ */
+
+METHODDEF(int)
+decompress_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num;
+  int ci, block_row, block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr;
+  _JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  _inverse_DCT_method_ptr inverse_DCT;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number < cinfo->output_scan_number ||
+         (cinfo->input_scan_number == cinfo->output_scan_number &&
+          cinfo->input_iMCU_row <= cinfo->output_iMCU_row)) {
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (!compptr->component_needed)
+      continue;
+    /* Align the virtual buffer for this component. */
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef->whole_image[ci],
+       cinfo->output_iMCU_row * compptr->v_samp_factor,
+       (JDIMENSION)compptr->v_samp_factor, FALSE);
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row < last_iMCU_row)
+      block_rows = compptr->v_samp_factor;
+    else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+    }
+    inverse_DCT = cinfo->idct->_inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
+      output_col = 0;
+      for (block_num = cinfo->master->first_MCU_col[ci];
+           block_num <= cinfo->master->last_MCU_col[ci]; block_num++) {
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)buffer_ptr, output_ptr,
+                        output_col);
+        buffer_ptr++;
+        output_col += compptr->_DCT_scaled_size;
+      }
+      output_ptr += compptr->_DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+
+/*
+ * This code applies interblock smoothing; the first 9 AC coefficients are
+ * estimated from the DC values of a DCT block and its 24 neighboring blocks.
+ * We apply smoothing only for progressive JPEG decoding, and only if
+ * the coefficients it can estimate are not yet known to full precision.
+ */
+
+/* Natural-order array positions of the first 9 zigzag-order coefficients */
+#define Q01_POS  1
+#define Q10_POS  8
+#define Q20_POS  16
+#define Q11_POS  9
+#define Q02_POS  2
+#define Q03_POS  3
+#define Q12_POS  10
+#define Q21_POS  17
+#define Q30_POS  24
+
+/*
+ * Determine whether block smoothing is applicable and safe.
+ * We also latch the current states of the coef_bits[] entries for the
+ * AC coefficients; otherwise, if the input side of the decompressor
+ * advances into a new scan, we might think the coefficients are known
+ * more accurately than they really are.
+ */
+
+LOCAL(boolean)
+smoothing_ok(j_decompress_ptr cinfo)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  boolean smoothing_useful = FALSE;
+  int ci, coefi;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtable;
+  int *coef_bits, *prev_coef_bits;
+  int *coef_bits_latch, *prev_coef_bits_latch;
+
+  if (!cinfo->progressive_mode || cinfo->coef_bits == NULL)
+    return FALSE;
+
+  /* Allocate latch area if not already done */
+  if (coef->coef_bits_latch == NULL)
+    coef->coef_bits_latch = (int *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  cinfo->num_components * 2 *
+                                  (SAVED_COEFS * sizeof(int)));
+  coef_bits_latch = coef->coef_bits_latch;
+  prev_coef_bits_latch =
+    &coef->coef_bits_latch[cinfo->num_components * SAVED_COEFS];
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* All components' quantization values must already be latched. */
+    if ((qtable = compptr->quant_table) == NULL)
+      return FALSE;
+    /* Verify DC & first 9 AC quantizers are nonzero to avoid zero-divide. */
+    if (qtable->quantval[0] == 0 ||
+        qtable->quantval[Q01_POS] == 0 ||
+        qtable->quantval[Q10_POS] == 0 ||
+        qtable->quantval[Q20_POS] == 0 ||
+        qtable->quantval[Q11_POS] == 0 ||
+        qtable->quantval[Q02_POS] == 0 ||
+        qtable->quantval[Q03_POS] == 0 ||
+        qtable->quantval[Q12_POS] == 0 ||
+        qtable->quantval[Q21_POS] == 0 ||
+        qtable->quantval[Q30_POS] == 0)
+      return FALSE;
+    /* DC values must be at least partly known for all components. */
+    coef_bits = cinfo->coef_bits[ci];
+    prev_coef_bits = cinfo->coef_bits[ci + cinfo->num_components];
+    if (coef_bits[0] < 0)
+      return FALSE;
+    coef_bits_latch[0] = coef_bits[0];
+    /* Block smoothing is helpful if some AC coefficients remain inaccurate. */
+    for (coefi = 1; coefi < SAVED_COEFS; coefi++) {
+      if (cinfo->input_scan_number > 1)
+        prev_coef_bits_latch[coefi] = prev_coef_bits[coefi];
+      else
+        prev_coef_bits_latch[coefi] = -1;
+      coef_bits_latch[coefi] = coef_bits[coefi];
+      if (coef_bits[coefi] != 0)
+        smoothing_useful = TRUE;
+    }
+    coef_bits_latch += SAVED_COEFS;
+    prev_coef_bits_latch += SAVED_COEFS;
+  }
+
+  return smoothing_useful;
+}
+
+
+/*
+ * Variant of decompress_data for use when doing block smoothing.
+ */
+
+METHODDEF(int)
+decompress_smooth_data(j_decompress_ptr cinfo, _JSAMPIMAGE output_buf)
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+  JDIMENSION last_iMCU_row = cinfo->total_iMCU_rows - 1;
+  JDIMENSION block_num, last_block_column;
+  int ci, block_row, block_rows, access_rows, image_block_row,
+    image_block_rows;
+  JBLOCKARRAY buffer;
+  JBLOCKROW buffer_ptr, prev_prev_block_row, prev_block_row;
+  JBLOCKROW next_block_row, next_next_block_row;
+  _JSAMPARRAY output_ptr;
+  JDIMENSION output_col;
+  jpeg_component_info *compptr;
+  _inverse_DCT_method_ptr inverse_DCT;
+  boolean change_dc;
+  JCOEF *workspace;
+  int *coef_bits;
+  JQUANT_TBL *quanttbl;
+  JLONG Q00, Q01, Q02, Q03 = 0, Q10, Q11, Q12 = 0, Q20, Q21 = 0, Q30 = 0, num;
+  int DC01, DC02, DC03, DC04, DC05, DC06, DC07, DC08, DC09, DC10, DC11, DC12,
+      DC13, DC14, DC15, DC16, DC17, DC18, DC19, DC20, DC21, DC22, DC23, DC24,
+      DC25;
+  int Al, pred;
+
+  /* Keep a local variable to avoid looking it up more than once */
+  workspace = coef->workspace;
+
+  /* Force some input to be done if we are getting ahead of the input. */
+  while (cinfo->input_scan_number <= cinfo->output_scan_number &&
+         !cinfo->inputctl->eoi_reached) {
+    if (cinfo->input_scan_number == cinfo->output_scan_number) {
+      /* If input is working on current scan, we ordinarily want it to
+       * have completed the current row.  But if input scan is DC,
+       * we want it to keep two rows ahead so that next two block rows' DC
+       * values are up to date.
+       */
+      JDIMENSION delta = (cinfo->Ss == 0) ? 2 : 0;
+      if (cinfo->input_iMCU_row > cinfo->output_iMCU_row + delta)
+        break;
+    }
+    if ((*cinfo->inputctl->consume_input) (cinfo) == JPEG_SUSPENDED)
+      return JPEG_SUSPENDED;
+  }
+
+  /* OK, output from the virtual arrays. */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Don't bother to IDCT an uninteresting component. */
+    if (!compptr->component_needed)
+      continue;
+    /* Count non-dummy DCT block rows in this iMCU row. */
+    if (cinfo->output_iMCU_row + 1 < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 3; /* this and next two iMCU rows */
+    } else if (cinfo->output_iMCU_row < last_iMCU_row) {
+      block_rows = compptr->v_samp_factor;
+      access_rows = block_rows * 2; /* this and next iMCU row */
+    } else {
+      /* NB: can't use last_row_height here; it is input-side-dependent! */
+      block_rows = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+      if (block_rows == 0) block_rows = compptr->v_samp_factor;
+      access_rows = block_rows; /* this iMCU row only */
+    }
+    /* Align the virtual buffer for this component. */
+    if (cinfo->output_iMCU_row > 1) {
+      access_rows += 2 * compptr->v_samp_factor; /* prior two iMCU rows too */
+      buffer = (*cinfo->mem->access_virt_barray)
+        ((j_common_ptr)cinfo, coef->whole_image[ci],
+         (cinfo->output_iMCU_row - 2) * compptr->v_samp_factor,
+         (JDIMENSION)access_rows, FALSE);
+      buffer += 2 * compptr->v_samp_factor; /* point to current iMCU row */
+    } else if (cinfo->output_iMCU_row > 0) {
+      access_rows += compptr->v_samp_factor; /* prior iMCU row too */
+      buffer = (*cinfo->mem->access_virt_barray)
+        ((j_common_ptr)cinfo, coef->whole_image[ci],
+         (cinfo->output_iMCU_row - 1) * compptr->v_samp_factor,
+         (JDIMENSION)access_rows, FALSE);
+      buffer += compptr->v_samp_factor; /* point to current iMCU row */
+    } else {
+      buffer = (*cinfo->mem->access_virt_barray)
+        ((j_common_ptr)cinfo, coef->whole_image[ci],
+         (JDIMENSION)0, (JDIMENSION)access_rows, FALSE);
+    }
+    /* Fetch component-dependent info.
+     * If the current scan is incomplete, then we use the component-dependent
+     * info from the previous scan.
+     */
+    if (cinfo->output_iMCU_row > cinfo->master->last_good_iMCU_row)
+      coef_bits =
+        coef->coef_bits_latch + ((ci + cinfo->num_components) * SAVED_COEFS);
+    else
+      coef_bits = coef->coef_bits_latch + (ci * SAVED_COEFS);
+
+    /* We only do DC interpolation if no AC coefficient data is available. */
+    change_dc =
+      coef_bits[1] == -1 && coef_bits[2] == -1 && coef_bits[3] == -1 &&
+      coef_bits[4] == -1 && coef_bits[5] == -1 && coef_bits[6] == -1 &&
+      coef_bits[7] == -1 && coef_bits[8] == -1 && coef_bits[9] == -1;
+
+    quanttbl = compptr->quant_table;
+    Q00 = quanttbl->quantval[0];
+    Q01 = quanttbl->quantval[Q01_POS];
+    Q10 = quanttbl->quantval[Q10_POS];
+    Q20 = quanttbl->quantval[Q20_POS];
+    Q11 = quanttbl->quantval[Q11_POS];
+    Q02 = quanttbl->quantval[Q02_POS];
+    if (change_dc) {
+      Q03 = quanttbl->quantval[Q03_POS];
+      Q12 = quanttbl->quantval[Q12_POS];
+      Q21 = quanttbl->quantval[Q21_POS];
+      Q30 = quanttbl->quantval[Q30_POS];
+    }
+    inverse_DCT = cinfo->idct->_inverse_DCT[ci];
+    output_ptr = output_buf[ci];
+    /* Loop over all DCT blocks to be processed. */
+    image_block_rows = block_rows * cinfo->total_iMCU_rows;
+    for (block_row = 0; block_row < block_rows; block_row++) {
+      image_block_row = cinfo->output_iMCU_row * block_rows + block_row;
+      buffer_ptr = buffer[block_row] + cinfo->master->first_MCU_col[ci];
+
+      if (image_block_row > 0)
+        prev_block_row =
+          buffer[block_row - 1] + cinfo->master->first_MCU_col[ci];
+      else
+        prev_block_row = buffer_ptr;
+
+      if (image_block_row > 1)
+        prev_prev_block_row =
+          buffer[block_row - 2] + cinfo->master->first_MCU_col[ci];
+      else
+        prev_prev_block_row = prev_block_row;
+
+      if (image_block_row < image_block_rows - 1)
+        next_block_row =
+          buffer[block_row + 1] + cinfo->master->first_MCU_col[ci];
+      else
+        next_block_row = buffer_ptr;
+
+      if (image_block_row < image_block_rows - 2)
+        next_next_block_row =
+          buffer[block_row + 2] + cinfo->master->first_MCU_col[ci];
+      else
+        next_next_block_row = next_block_row;
+
+      /* We fetch the surrounding DC values using a sliding-register approach.
+       * Initialize all 25 here so as to do the right thing on narrow pics.
+       */
+      DC01 = DC02 = DC03 = DC04 = DC05 = (int)prev_prev_block_row[0][0];
+      DC06 = DC07 = DC08 = DC09 = DC10 = (int)prev_block_row[0][0];
+      DC11 = DC12 = DC13 = DC14 = DC15 = (int)buffer_ptr[0][0];
+      DC16 = DC17 = DC18 = DC19 = DC20 = (int)next_block_row[0][0];
+      DC21 = DC22 = DC23 = DC24 = DC25 = (int)next_next_block_row[0][0];
+      output_col = 0;
+      last_block_column = compptr->width_in_blocks - 1;
+      for (block_num = cinfo->master->first_MCU_col[ci];
+           block_num <= cinfo->master->last_MCU_col[ci]; block_num++) {
+        /* Fetch current DCT block into workspace so we can modify it. */
+        jcopy_block_row(buffer_ptr, (JBLOCKROW)workspace, (JDIMENSION)1);
+        /* Update DC values */
+        if (block_num == cinfo->master->first_MCU_col[ci] &&
+            block_num < last_block_column) {
+          DC04 = DC05 = (int)prev_prev_block_row[1][0];
+          DC09 = DC10 = (int)prev_block_row[1][0];
+          DC14 = DC15 = (int)buffer_ptr[1][0];
+          DC19 = DC20 = (int)next_block_row[1][0];
+          DC24 = DC25 = (int)next_next_block_row[1][0];
+        }
+        if (block_num + 1 < last_block_column) {
+          DC05 = (int)prev_prev_block_row[2][0];
+          DC10 = (int)prev_block_row[2][0];
+          DC15 = (int)buffer_ptr[2][0];
+          DC20 = (int)next_block_row[2][0];
+          DC25 = (int)next_next_block_row[2][0];
+        }
+        /* If DC interpolation is enabled, compute coefficient estimates using
+         * a Gaussian-like kernel, keeping the averages of the DC values.
+         *
+         * If DC interpolation is disabled, compute coefficient estimates using
+         * an algorithm similar to the one described in Section K.8 of the JPEG
+         * standard, except applied to a 5x5 window rather than a 3x3 window.
+         *
+         * An estimate is applied only if the coefficient is still zero and is
+         * not known to be fully accurate.
+         */
+        /* AC01 */
+        if ((Al = coef_bits[1]) != 0 && workspace[1] == 0) {
+          num = Q00 * (change_dc ?
+                (-DC01 - DC02 + DC04 + DC05 - 3 * DC06 + 13 * DC07 -
+                 13 * DC09 + 3 * DC10 - 3 * DC11 + 38 * DC12 - 38 * DC14 +
+                 3 * DC15 - 3 * DC16 + 13 * DC17 - 13 * DC19 + 3 * DC20 -
+                 DC21 - DC22 + DC24 + DC25) :
+                (-7 * DC11 + 50 * DC12 - 50 * DC14 + 7 * DC15));
+          if (num >= 0) {
+            pred = (int)(((Q01 << 7) + num) / (Q01 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+          } else {
+            pred = (int)(((Q01 << 7) - num) / (Q01 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+            pred = -pred;
+          }
+          workspace[1] = (JCOEF)pred;
+        }
+        /* AC10 */
+        if ((Al = coef_bits[2]) != 0 && workspace[8] == 0) {
+          num = Q00 * (change_dc ?
+                (-DC01 - 3 * DC02 - 3 * DC03 - 3 * DC04 - DC05 - DC06 +
+                 13 * DC07 + 38 * DC08 + 13 * DC09 - DC10 + DC16 -
+                 13 * DC17 - 38 * DC18 - 13 * DC19 + DC20 + DC21 +
+                 3 * DC22 + 3 * DC23 + 3 * DC24 + DC25) :
+                (-7 * DC03 + 50 * DC08 - 50 * DC18 + 7 * DC23));
+          if (num >= 0) {
+            pred = (int)(((Q10 << 7) + num) / (Q10 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+          } else {
+            pred = (int)(((Q10 << 7) - num) / (Q10 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+            pred = -pred;
+          }
+          workspace[8] = (JCOEF)pred;
+        }
+        /* AC20 */
+        if ((Al = coef_bits[3]) != 0 && workspace[16] == 0) {
+          num = Q00 * (change_dc ?
+                (DC03 + 2 * DC07 + 7 * DC08 + 2 * DC09 - 5 * DC12 - 14 * DC13 -
+                 5 * DC14 + 2 * DC17 + 7 * DC18 + 2 * DC19 + DC23) :
+                (-DC03 + 13 * DC08 - 24 * DC13 + 13 * DC18 - DC23));
+          if (num >= 0) {
+            pred = (int)(((Q20 << 7) + num) / (Q20 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+          } else {
+            pred = (int)(((Q20 << 7) - num) / (Q20 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+            pred = -pred;
+          }
+          workspace[16] = (JCOEF)pred;
+        }
+        /* AC11 */
+        if ((Al = coef_bits[4]) != 0 && workspace[9] == 0) {
+          num = Q00 * (change_dc ?
+                (-DC01 + DC05 + 9 * DC07 - 9 * DC09 - 9 * DC17 +
+                 9 * DC19 + DC21 - DC25) :
+                (DC10 + DC16 - 10 * DC17 + 10 * DC19 - DC02 - DC20 + DC22 -
+                 DC24 + DC04 - DC06 + 10 * DC07 - 10 * DC09));
+          if (num >= 0) {
+            pred = (int)(((Q11 << 7) + num) / (Q11 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+          } else {
+            pred = (int)(((Q11 << 7) - num) / (Q11 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+            pred = -pred;
+          }
+          workspace[9] = (JCOEF)pred;
+        }
+        /* AC02 */
+        if ((Al = coef_bits[5]) != 0 && workspace[2] == 0) {
+          num = Q00 * (change_dc ?
+                (2 * DC07 - 5 * DC08 + 2 * DC09 + DC11 + 7 * DC12 - 14 * DC13 +
+                 7 * DC14 + DC15 + 2 * DC17 - 5 * DC18 + 2 * DC19) :
+                (-DC11 + 13 * DC12 - 24 * DC13 + 13 * DC14 - DC15));
+          if (num >= 0) {
+            pred = (int)(((Q02 << 7) + num) / (Q02 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+          } else {
+            pred = (int)(((Q02 << 7) - num) / (Q02 << 8));
+            if (Al > 0 && pred >= (1 << Al))
+              pred = (1 << Al) - 1;
+            pred = -pred;
+          }
+          workspace[2] = (JCOEF)pred;
+        }
+        if (change_dc) {
+          /* AC03 */
+          if ((Al = coef_bits[6]) != 0 && workspace[3] == 0) {
+            num = Q00 * (DC07 - DC09 + 2 * DC12 - 2 * DC14 + DC17 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q03 << 7) + num) / (Q03 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q03 << 7) - num) / (Q03 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[3] = (JCOEF)pred;
+          }
+          /* AC12 */
+          if ((Al = coef_bits[7]) != 0 && workspace[10] == 0) {
+            num = Q00 * (DC07 - 3 * DC08 + DC09 - DC17 + 3 * DC18 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q12 << 7) + num) / (Q12 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q12 << 7) - num) / (Q12 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[10] = (JCOEF)pred;
+          }
+          /* AC21 */
+          if ((Al = coef_bits[8]) != 0 && workspace[17] == 0) {
+            num = Q00 * (DC07 - DC09 - 3 * DC12 + 3 * DC14 + DC17 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q21 << 7) + num) / (Q21 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q21 << 7) - num) / (Q21 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[17] = (JCOEF)pred;
+          }
+          /* AC30 */
+          if ((Al = coef_bits[9]) != 0 && workspace[24] == 0) {
+            num = Q00 * (DC07 + 2 * DC08 + DC09 - DC17 - 2 * DC18 - DC19);
+            if (num >= 0) {
+              pred = (int)(((Q30 << 7) + num) / (Q30 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+            } else {
+              pred = (int)(((Q30 << 7) - num) / (Q30 << 8));
+              if (Al > 0 && pred >= (1 << Al))
+                pred = (1 << Al) - 1;
+              pred = -pred;
+            }
+            workspace[24] = (JCOEF)pred;
+          }
+          /* coef_bits[0] is non-negative.  Otherwise this function would not
+           * be called.
+           */
+          num = Q00 *
+                (-2 * DC01 - 6 * DC02 - 8 * DC03 - 6 * DC04 - 2 * DC05 -
+                 6 * DC06 + 6 * DC07 + 42 * DC08 + 6 * DC09 - 6 * DC10 -
+                 8 * DC11 + 42 * DC12 + 152 * DC13 + 42 * DC14 - 8 * DC15 -
+                 6 * DC16 + 6 * DC17 + 42 * DC18 + 6 * DC19 - 6 * DC20 -
+                 2 * DC21 - 6 * DC22 - 8 * DC23 - 6 * DC24 - 2 * DC25);
+          if (num >= 0) {
+            pred = (int)(((Q00 << 7) + num) / (Q00 << 8));
+          } else {
+            pred = (int)(((Q00 << 7) - num) / (Q00 << 8));
+            pred = -pred;
+          }
+          workspace[0] = (JCOEF)pred;
+        }  /* change_dc */
+
+        /* OK, do the IDCT */
+        (*inverse_DCT) (cinfo, compptr, (JCOEFPTR)workspace, output_ptr,
+                        output_col);
+        /* Advance for next column */
+        DC01 = DC02;  DC02 = DC03;  DC03 = DC04;  DC04 = DC05;
+        DC06 = DC07;  DC07 = DC08;  DC08 = DC09;  DC09 = DC10;
+        DC11 = DC12;  DC12 = DC13;  DC13 = DC14;  DC14 = DC15;
+        DC16 = DC17;  DC17 = DC18;  DC18 = DC19;  DC19 = DC20;
+        DC21 = DC22;  DC22 = DC23;  DC23 = DC24;  DC24 = DC25;
+        buffer_ptr++, prev_block_row++, next_block_row++,
+          prev_prev_block_row++, next_next_block_row++;
+        output_col += compptr->_DCT_scaled_size;
+      }
+      output_ptr += compptr->_DCT_scaled_size;
+    }
+  }
+
+  if (++(cinfo->output_iMCU_row) < cinfo->total_iMCU_rows)
+    return JPEG_ROW_COMPLETED;
+  return JPEG_SCAN_COMPLETED;
+}
+
+#endif /* BLOCK_SMOOTHING_SUPPORTED */
+
+
+/*
+ * Initialize coefficient buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_d_coef_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_coef_ptr coef;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  coef = (my_coef_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_coef_controller));
+  cinfo->coef = (struct jpeg_d_coef_controller *)coef;
+  coef->pub.start_input_pass = start_input_pass;
+  coef->pub.start_output_pass = start_output_pass;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  coef->coef_bits_latch = NULL;
+#endif
+
+  /* Create the coefficient buffer. */
+  if (need_full_buffer) {
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+    /* Allocate a full-image virtual array for each component, */
+    /* padded to a multiple of samp_factor DCT blocks in each direction. */
+    /* Note we ask for a pre-zeroed array. */
+    int ci, access_rows;
+    jpeg_component_info *compptr;
+
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      access_rows = compptr->v_samp_factor;
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+      /* If block smoothing could be used, need a bigger window */
+      if (cinfo->progressive_mode)
+        access_rows *= 5;
+#endif
+      coef->whole_image[ci] = (*cinfo->mem->request_virt_barray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE, TRUE,
+         (JDIMENSION)jround_up((long)compptr->width_in_blocks,
+                               (long)compptr->h_samp_factor),
+         (JDIMENSION)jround_up((long)compptr->height_in_blocks,
+                               (long)compptr->v_samp_factor),
+         (JDIMENSION)access_rows);
+    }
+    coef->pub.consume_data = consume_data;
+    coef->pub._decompress_data = decompress_data;
+    coef->pub.coef_arrays = coef->whole_image; /* link to virtual arrays */
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* We only need a single-MCU buffer. */
+    JBLOCKROW buffer;
+    int i;
+
+    buffer = (JBLOCKROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  D_MAX_BLOCKS_IN_MCU * sizeof(JBLOCK));
+    for (i = 0; i < D_MAX_BLOCKS_IN_MCU; i++) {
+      coef->MCU_buffer[i] = buffer + i;
+    }
+    coef->pub.consume_data = dummy_consume_data;
+    coef->pub._decompress_data = decompress_onepass;
+    coef->pub.coef_arrays = NULL; /* flag for no virtual arrays */
+  }
+
+  /* Allocate the workspace buffer */
+  coef->workspace = (JCOEF *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(JCOEF) * DCTSIZE2);
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdcoefct.h b/thirdparty/libjpeg-turbo/src/jdcoefct.h
new file mode 100644
index 00000000000..bbe9e970515
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdcoefct.h
@@ -0,0 +1,88 @@
+/*
+ * jdcoefct.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2020, Google, Inc.
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/* Block smoothing is only applicable for progressive JPEG, so: */
+#ifndef D_PROGRESSIVE_SUPPORTED
+#undef BLOCK_SMOOTHING_SUPPORTED
+#endif
+
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_coef_controller pub; /* public fields */
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* In single-pass modes, it's sufficient to buffer just one MCU.
+   * We allocate a workspace of D_MAX_BLOCKS_IN_MCU coefficient blocks,
+   * and let the entropy decoder write into that workspace each time.
+   * In multi-pass modes, this array points to the current MCU's blocks
+   * within the virtual arrays; it is used only by the input side.
+   */
+  JBLOCKROW MCU_buffer[D_MAX_BLOCKS_IN_MCU];
+
+  /* Temporary workspace for one MCU */
+  JCOEF *workspace;
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* In multi-pass modes, we need a virtual block array for each component. */
+  jvirt_barray_ptr whole_image[MAX_COMPONENTS];
+#endif
+
+#ifdef BLOCK_SMOOTHING_SUPPORTED
+  /* When doing block smoothing, we latch coefficient Al values here */
+  int *coef_bits_latch;
+#define SAVED_COEFS  10         /* we save coef_bits[0..9] */
+#endif
+} my_coef_controller;
+
+typedef my_coef_controller *my_coef_ptr;
+
+
+LOCAL(void)
+start_iMCU_row(j_decompress_ptr cinfo)
+/* Reset within-iMCU-row counters for a new row (input side) */
+{
+  my_coef_ptr coef = (my_coef_ptr)cinfo->coef;
+
+  /* In an interleaved scan, an MCU row is the same as an iMCU row.
+   * In a noninterleaved scan, an iMCU row has v_samp_factor MCU rows.
+   * But at the bottom of the image, process only what's left.
+   */
+  if (cinfo->comps_in_scan > 1) {
+    coef->MCU_rows_per_iMCU_row = 1;
+  } else {
+    if (cinfo->input_iMCU_row < (cinfo->total_iMCU_rows - 1))
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->v_samp_factor;
+    else
+      coef->MCU_rows_per_iMCU_row = cinfo->cur_comp_info[0]->last_row_height;
+  }
+
+  coef->MCU_ctr = 0;
+  coef->MCU_vert_offset = 0;
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdcol565.c b/thirdparty/libjpeg-turbo/src/jdcol565.c
new file mode 100644
index 00000000000..2172d98fdaa
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdcol565.c
@@ -0,0 +1,392 @@
+/*
+ * jdcol565.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modifications:
+ * Copyright (C) 2013, Linaro Limited.
+ * Copyright (C) 2014-2015, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+/* This file is included by jdcolor.c */
+
+
+INLINE
+LOCAL(void)
+ycc_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                            JDIMENSION input_row, _JSAMPARRAY output_buf,
+                            int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int y, cb, cr;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  register int *Crrtab = cconvert->Cr_r_tab;
+  register int *Cbbtab = cconvert->Cb_b_tab;
+  register JLONG *Crgtab = cconvert->Cr_g_tab;
+  register JLONG *Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int r, g, b;
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      y  = *inptr0;
+      cb = *inptr1;
+      cr = *inptr2;
+      r = range_limit[y + Crrtab[cr]];
+      g = range_limit[y + ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                            SCALEBITS))];
+      b = range_limit[y + Cbbtab[cb]];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+INLINE
+LOCAL(void)
+ycc_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
+                             int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int y, cb, cr;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  register int *Crrtab = cconvert->Cr_r_tab;
+  register int *Cbbtab = cconvert->Cb_b_tab;
+  register JLONG *Crgtab = cconvert->Cr_g_tab;
+  register JLONG *Cbgtab = cconvert->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r, g, b);
+
+      y  = *inptr0++;
+      cb = *inptr1++;
+      cr = *inptr2++;
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      y  = *inptr0;
+      cb = *inptr1;
+      cr = *inptr2;
+      r = range_limit[DITHER_565_R(y + Crrtab[cr], d0)];
+      g = range_limit[DITHER_565_G(y +
+                                   ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                     SCALEBITS)), d0)];
+      b = range_limit[DITHER_565_B(y + Cbbtab[cb], d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+INLINE
+LOCAL(void)
+rgb_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                            JDIMENSION input_row, _JSAMPARRAY output_buf,
+                            int num_rows)
+{
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
+      rgb = PACK_SHORT_565(r, g, b);
+
+      r = *inptr0++;
+      g = *inptr1++;
+      b = *inptr2++;
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      r = *inptr0;
+      g = *inptr1;
+      b = *inptr2;
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+}
+
+
+INLINE
+LOCAL(void)
+rgb_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
+                             int num_rows)
+{
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int r, g, b;
+
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_SHORT_565(r, g, b);
+
+      r = range_limit[DITHER_565_R(*inptr0++, d0)];
+      g = range_limit[DITHER_565_G(*inptr1++, d0)];
+      b = range_limit[DITHER_565_B(*inptr2++, d0)];
+      d0 = DITHER_ROTATE(d0);
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      r = range_limit[DITHER_565_R(*inptr0, d0)];
+      g = range_limit[DITHER_565_G(*inptr1, d0)];
+      b = range_limit[DITHER_565_B(*inptr2, d0)];
+      rgb = PACK_SHORT_565(r, g, b);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+}
+
+
+INLINE
+LOCAL(void)
+gray_rgb565_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                             JDIMENSION input_row, _JSAMPARRAY output_buf,
+                             int num_rows)
+{
+  register _JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int g;
+
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      g = *inptr++;
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      g = *inptr++;
+      rgb = PACK_SHORT_565(g, g, g);
+      g = *inptr++;
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      g = *inptr;
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+}
+
+
+INLINE
+LOCAL(void)
+gray_rgb565D_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                              JDIMENSION input_row, _JSAMPARRAY output_buf,
+                              int num_rows)
+{
+  register _JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  JDIMENSION num_cols = cinfo->output_width;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+
+  while (--num_rows >= 0) {
+    JLONG rgb;
+    unsigned int g;
+
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    if (PACK_NEED_ALIGNMENT(outptr)) {
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16 *)outptr = (INT16)rgb;
+      outptr += 2;
+      num_cols--;
+    }
+    for (col = 0; col < (num_cols >> 1); col++) {
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      d0 = DITHER_ROTATE(d0);
+
+      g = *inptr++;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(g, g, g));
+      d0 = DITHER_ROTATE(d0);
+
+      WRITE_TWO_ALIGNED_PIXELS(outptr, rgb);
+      outptr += 4;
+    }
+    if (num_cols & 1) {
+      g = *inptr;
+      g = range_limit[DITHER_565_R(g, d0)];
+      rgb = PACK_SHORT_565(g, g, g);
+      *(INT16 *)outptr = (INT16)rgb;
+    }
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdcolext.c b/thirdparty/libjpeg-turbo/src/jdcolext.c
new file mode 100644
index 00000000000..f22e29d7224
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdcolext.c
@@ -0,0 +1,145 @@
+/*
+ * jdcolext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2011, 2015, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+
+/* This file is included by jdcolor.c */
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ *
+ * Note that we change from noninterleaved, one-plane-per-component format
+ * to interleaved-pixel format.  The output buffer is therefore three times
+ * as wide as the input buffer.
+ * A starting row offset is provided only for the input buffer.  The caller
+ * can easily adjust the passed output_buf value to accommodate any row
+ * offset required on that side.
+ */
+
+INLINE
+LOCAL(void)
+ycc_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
+                         int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int y, cb, cr;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  register int *Crrtab = cconvert->Cr_r_tab;
+  register int *Cbbtab = cconvert->Cb_b_tab;
+  register JLONG *Crgtab = cconvert->Cr_g_tab;
+  register JLONG *Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = inptr0[col];
+      cb = inptr1[col];
+      cr = inptr2[col];
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[RGB_RED] =   range_limit[y + Crrtab[cr]];
+      outptr[RGB_GREEN] = range_limit[y +
+                              ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                SCALEBITS))];
+      outptr[RGB_BLUE] =  range_limit[y + Cbbtab[cb]];
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert grayscale to RGB: just duplicate the graylevel three times.
+ * This is provided to support applications that don't want to cope
+ * with grayscale as a separate case.
+ */
+
+INLINE
+LOCAL(void)
+gray_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION input_row, _JSAMPARRAY output_buf,
+                          int num_rows)
+{
+  register _JSAMPROW inptr, outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr = input_buf[0][input_row++];
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[RGB_RED] = outptr[RGB_GREEN] = outptr[RGB_BLUE] = inptr[col];
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
+
+
+/*
+ * Convert RGB to extended RGB: just swap the order of source pixels
+ */
+
+INLINE
+LOCAL(void)
+rgb_rgb_convert_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, _JSAMPARRAY output_buf,
+                         int num_rows)
+{
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register _JSAMPROW outptr;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      outptr[RGB_RED] = inptr0[col];
+      outptr[RGB_GREEN] = inptr1[col];
+      outptr[RGB_BLUE] = inptr2[col];
+      /* Set unused byte to _MAXJSAMPLE so it can be interpreted as an */
+      /* opaque alpha channel value */
+#ifdef RGB_ALPHA
+      outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+      outptr += RGB_PIXELSIZE;
+    }
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdcolor.c b/thirdparty/libjpeg-turbo/src/jdcolor.c
new file mode 100644
index 00000000000..aecbd9dcd36
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdcolor.c
@@ -0,0 +1,946 @@
+/*
+ * jdcolor.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2011 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2009, 2011-2012, 2014-2015, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains output colorspace conversion routines.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsimd.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_deconverter pub; /* public fields */
+
+#if BITS_IN_JSAMPLE != 16
+  /* Private state for YCC->RGB conversion */
+  int *Cr_r_tab;                /* => table for Cr to R conversion */
+  int *Cb_b_tab;                /* => table for Cb to B conversion */
+  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
+  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
+
+  /* Private state for RGB->Y conversion */
+  JLONG *rgb_y_tab;             /* => table for RGB to Y conversion */
+#endif
+} my_color_deconverter;
+
+typedef my_color_deconverter *my_cconvert_ptr;
+
+
+/**************** YCbCr -> RGB conversion: most common case **************/
+/****************   RGB -> Y   conversion: less common case **************/
+
+/*
+ * YCbCr is defined per CCIR 601-1, except that Cb and Cr are
+ * normalized to the range 0.._MAXJSAMPLE rather than -0.5 .. 0.5.
+ * The conversion equations to be implemented are therefore
+ *
+ *      R = Y                + 1.40200 * Cr
+ *      G = Y - 0.34414 * Cb - 0.71414 * Cr
+ *      B = Y + 1.77200 * Cb
+ *
+ *      Y = 0.29900 * R + 0.58700 * G + 0.11400 * B
+ *
+ * where Cb and Cr represent the incoming values less _CENTERJSAMPLE.
+ * (These numbers are derived from TIFF 6.0 section 21, dated 3-June-92.)
+ *
+ * To avoid floating-point arithmetic, we represent the fractional constants
+ * as integers scaled up by 2^16 (about 4 digits precision); we have to divide
+ * the products by 2^16, with appropriate rounding, to get the correct answer.
+ * Notice that Y, being an integral input, does not contribute any fraction
+ * so it need not participate in the rounding.
+ *
+ * For even more speed, we avoid doing any multiplications in the inner loop
+ * by precalculating the constants times Cb and Cr for all possible values.
+ * For 8-bit samples this is very reasonable (only 256 entries per table);
+ * for 12-bit samples it is still acceptable.  It's not very reasonable for
+ * 16-bit samples, but if you want lossless storage you shouldn't be changing
+ * colorspace anyway.
+ * The Cr=>R and Cb=>B values can be rounded to integers in advance; the
+ * values for the G calculation are left scaled up, since we must add them
+ * together before rounding.
+ */
+
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
+#define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
+
+/* We allocate one big table for RGB->Y conversion and divide it up into
+ * three parts, instead of doing three alloc_small requests.  This lets us
+ * use a single table base address, which can be held in a register in the
+ * inner loops on many machines (more than can hold all three addresses,
+ * anyway).
+ */
+
+#define R_Y_OFF         0                       /* offset to R => Y section */
+#define G_Y_OFF         (1 * (_MAXJSAMPLE + 1)) /* offset to G => Y section */
+#define B_Y_OFF         (2 * (_MAXJSAMPLE + 1)) /* etc. */
+#define TABLE_SIZE      (3 * (_MAXJSAMPLE + 1))
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED  EXT_RGB_RED
+#define RGB_GREEN  EXT_RGB_GREEN
+#define RGB_BLUE  EXT_RGB_BLUE
+#define RGB_PIXELSIZE  EXT_RGB_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extrgb_convert_internal
+#define gray_rgb_convert_internal  gray_extrgb_convert_internal
+#define rgb_rgb_convert_internal  rgb_extrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_RGBX_RED
+#define RGB_GREEN  EXT_RGBX_GREEN
+#define RGB_BLUE  EXT_RGBX_BLUE
+#define RGB_ALPHA  3
+#define RGB_PIXELSIZE  EXT_RGBX_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extrgbx_convert_internal
+#define gray_rgb_convert_internal  gray_extrgbx_convert_internal
+#define rgb_rgb_convert_internal  rgb_extrgbx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_BGR_RED
+#define RGB_GREEN  EXT_BGR_GREEN
+#define RGB_BLUE  EXT_BGR_BLUE
+#define RGB_PIXELSIZE  EXT_BGR_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extbgr_convert_internal
+#define gray_rgb_convert_internal  gray_extbgr_convert_internal
+#define rgb_rgb_convert_internal  rgb_extbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_BGRX_RED
+#define RGB_GREEN  EXT_BGRX_GREEN
+#define RGB_BLUE  EXT_BGRX_BLUE
+#define RGB_ALPHA  3
+#define RGB_PIXELSIZE  EXT_BGRX_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extbgrx_convert_internal
+#define gray_rgb_convert_internal  gray_extbgrx_convert_internal
+#define rgb_rgb_convert_internal  rgb_extbgrx_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_XBGR_RED
+#define RGB_GREEN  EXT_XBGR_GREEN
+#define RGB_BLUE  EXT_XBGR_BLUE
+#define RGB_ALPHA  0
+#define RGB_PIXELSIZE  EXT_XBGR_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extxbgr_convert_internal
+#define gray_rgb_convert_internal  gray_extxbgr_convert_internal
+#define rgb_rgb_convert_internal  rgb_extxbgr_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+#define RGB_RED  EXT_XRGB_RED
+#define RGB_GREEN  EXT_XRGB_GREEN
+#define RGB_BLUE  EXT_XRGB_BLUE
+#define RGB_ALPHA  0
+#define RGB_PIXELSIZE  EXT_XRGB_PIXELSIZE
+#define ycc_rgb_convert_internal  ycc_extxrgb_convert_internal
+#define gray_rgb_convert_internal  gray_extxrgb_convert_internal
+#define rgb_rgb_convert_internal  rgb_extxrgb_convert_internal
+#include "jdcolext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef ycc_rgb_convert_internal
+#undef gray_rgb_convert_internal
+#undef rgb_rgb_convert_internal
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table(j_decompress_ptr cinfo)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  int i;
+  JLONG x;
+  SHIFT_TEMPS
+
+  cconvert->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(int));
+  cconvert->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(int));
+  cconvert->Cr_g_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
+  cconvert->Cb_g_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
+
+  for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    cconvert->Cr_r_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    cconvert->Cb_b_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    cconvert->Cr_g_tab[i] = (-FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    cconvert->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF;
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert some rows of samples to the output colorspace.
+ */
+
+METHODDEF(void)
+ycc_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  switch (cinfo->out_color_space) {
+  case JCS_EXT_RGB:
+    ycc_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    ycc_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGR:
+    ycc_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    ycc_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    ycc_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    ycc_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  default:
+    ycc_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                             num_rows);
+    break;
+  }
+}
+
+
+/**************** Cases other than YCbCr -> RGB **************/
+
+
+/*
+ * Initialize for RGB->grayscale colorspace conversion.
+ */
+
+LOCAL(void)
+build_rgb_y_table(j_decompress_ptr cinfo)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  JLONG *rgb_y_tab;
+  JLONG i;
+
+  /* Allocate and fill in the conversion tables. */
+  cconvert->rgb_y_tab = rgb_y_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (TABLE_SIZE * sizeof(JLONG)));
+
+  for (i = 0; i <= _MAXJSAMPLE; i++) {
+    rgb_y_tab[i + R_Y_OFF] = FIX(0.29900) * i;
+    rgb_y_tab[i + G_Y_OFF] = FIX(0.58700) * i;
+    rgb_y_tab[i + B_Y_OFF] = FIX(0.11400) * i + ONE_HALF;
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Convert RGB to grayscale.
+ */
+
+METHODDEF(void)
+rgb_gray_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                 JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int r, g, b;
+  register JLONG *ctab = cconvert->rgb_y_tab;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      r = inptr0[col];
+      g = inptr1[col];
+      b = inptr2[col];
+      /* Y */
+      outptr[col] = (_JSAMPLE)((ctab[r + R_Y_OFF] + ctab[g + G_Y_OFF] +
+                                ctab[b + B_Y_OFF]) >> SCALEBITS);
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * Color conversion for no colorspace change: just copy the data,
+ * converting from separate-planes to interleaved representation.
+ */
+
+METHODDEF(void)
+null_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+             JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  register _JSAMPROW inptr, inptr0, inptr1, inptr2, inptr3, outptr;
+  register JDIMENSION col;
+  register int num_components = cinfo->num_components;
+  JDIMENSION num_cols = cinfo->output_width;
+  int ci;
+
+  if (num_components == 3) {
+    while (--num_rows >= 0) {
+      inptr0 = input_buf[0][input_row];
+      inptr1 = input_buf[1][input_row];
+      inptr2 = input_buf[2][input_row];
+      input_row++;
+      outptr = *output_buf++;
+      for (col = 0; col < num_cols; col++) {
+        *outptr++ = inptr0[col];
+        *outptr++ = inptr1[col];
+        *outptr++ = inptr2[col];
+      }
+    }
+  } else if (num_components == 4) {
+    while (--num_rows >= 0) {
+      inptr0 = input_buf[0][input_row];
+      inptr1 = input_buf[1][input_row];
+      inptr2 = input_buf[2][input_row];
+      inptr3 = input_buf[3][input_row];
+      input_row++;
+      outptr = *output_buf++;
+      for (col = 0; col < num_cols; col++) {
+        *outptr++ = inptr0[col];
+        *outptr++ = inptr1[col];
+        *outptr++ = inptr2[col];
+        *outptr++ = inptr3[col];
+      }
+    }
+  } else {
+    while (--num_rows >= 0) {
+      for (ci = 0; ci < num_components; ci++) {
+        inptr = input_buf[ci][input_row];
+        outptr = *output_buf;
+        for (col = 0; col < num_cols; col++) {
+          outptr[ci] = inptr[col];
+          outptr += num_components;
+        }
+      }
+      output_buf++;
+      input_row++;
+    }
+  }
+}
+
+
+/*
+ * Color conversion for grayscale: just copy the data.
+ * This also works for YCbCr -> grayscale conversion, in which
+ * we just copy the Y (luminance) component and ignore chrominance.
+ */
+
+METHODDEF(void)
+grayscale_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                  JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  _jcopy_sample_rows(input_buf[0], (int)input_row, output_buf, 0, num_rows,
+                     cinfo->output_width);
+}
+
+
+/*
+ * Convert grayscale to RGB
+ */
+
+METHODDEF(void)
+gray_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                 JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  switch (cinfo->out_color_space) {
+  case JCS_EXT_RGB:
+    gray_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    gray_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+    break;
+  case JCS_EXT_BGR:
+    gray_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    gray_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    gray_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    gray_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                  num_rows);
+    break;
+  default:
+    gray_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                              num_rows);
+    break;
+  }
+}
+
+
+/*
+ * Convert plain RGB to extended RGB
+ */
+
+METHODDEF(void)
+rgb_rgb_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  switch (cinfo->out_color_space) {
+  case JCS_EXT_RGB:
+    rgb_extrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    rgb_extrgbx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_BGR:
+    rgb_extbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                num_rows);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    rgb_extbgrx_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    rgb_extxbgr_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    rgb_extxrgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                                 num_rows);
+    break;
+  default:
+    rgb_rgb_convert_internal(cinfo, input_buf, input_row, output_buf,
+                             num_rows);
+    break;
+  }
+}
+
+
+/*
+ * Adobe-style YCCK->CMYK conversion.
+ * We convert YCbCr to R=1-C, G=1-M, and B=1-Y using the same
+ * conversion as above, while passing K (black) unchanged.
+ * We assume build_ycc_rgb_table has been called.
+ */
+
+METHODDEF(void)
+ycck_cmyk_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                  JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+#if BITS_IN_JSAMPLE != 16
+  my_cconvert_ptr cconvert = (my_cconvert_ptr)cinfo->cconvert;
+  register int y, cb, cr;
+  register _JSAMPROW outptr;
+  register _JSAMPROW inptr0, inptr1, inptr2, inptr3;
+  register JDIMENSION col;
+  JDIMENSION num_cols = cinfo->output_width;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  register int *Crrtab = cconvert->Cr_r_tab;
+  register int *Cbbtab = cconvert->Cb_b_tab;
+  register JLONG *Crgtab = cconvert->Cr_g_tab;
+  register JLONG *Cbgtab = cconvert->Cb_g_tab;
+  SHIFT_TEMPS
+
+  while (--num_rows >= 0) {
+    inptr0 = input_buf[0][input_row];
+    inptr1 = input_buf[1][input_row];
+    inptr2 = input_buf[2][input_row];
+    inptr3 = input_buf[3][input_row];
+    input_row++;
+    outptr = *output_buf++;
+    for (col = 0; col < num_cols; col++) {
+      y  = inptr0[col];
+      cb = inptr1[col];
+      cr = inptr2[col];
+      /* Range-limiting is essential due to noise introduced by DCT losses. */
+      outptr[0] = range_limit[_MAXJSAMPLE - (y + Crrtab[cr])];  /* red */
+      outptr[1] = range_limit[_MAXJSAMPLE - (y +                /* green */
+                              ((int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr],
+                                                 SCALEBITS)))];
+      outptr[2] = range_limit[_MAXJSAMPLE - (y + Cbbtab[cb])];  /* blue */
+      /* K passes through unchanged */
+      outptr[3] = inptr3[col];
+      outptr += 4;
+    }
+  }
+#else
+  ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+}
+
+
+/*
+ * RGB565 conversion
+ */
+
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
+
+#define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
+#define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
+
+#define PACK_NEED_ALIGNMENT(ptr)    (((size_t)(ptr)) & 3)
+
+#define WRITE_TWO_ALIGNED_PIXELS(addr, pixels)  ((*(int *)(addr)) = pixels)
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficient for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const JLONG dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
+static INLINE boolean is_big_endian(void)
+{
+  int test_value = 1;
+  if (*(char *)&test_value != 1)
+    return TRUE;
+  return FALSE;
+}
+
+
+/* Include inline routines for RGB565 conversion */
+
+#define PACK_SHORT_565  PACK_SHORT_565_LE
+#define PACK_TWO_PIXELS  PACK_TWO_PIXELS_LE
+#define ycc_rgb565_convert_internal  ycc_rgb565_convert_le
+#define ycc_rgb565D_convert_internal  ycc_rgb565D_convert_le
+#define rgb_rgb565_convert_internal  rgb_rgb565_convert_le
+#define rgb_rgb565D_convert_internal  rgb_rgb565D_convert_le
+#define gray_rgb565_convert_internal  gray_rgb565_convert_le
+#define gray_rgb565D_convert_internal  gray_rgb565D_convert_le
+#include "jdcol565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef ycc_rgb565_convert_internal
+#undef ycc_rgb565D_convert_internal
+#undef rgb_rgb565_convert_internal
+#undef rgb_rgb565D_convert_internal
+#undef gray_rgb565_convert_internal
+#undef gray_rgb565D_convert_internal
+
+#define PACK_SHORT_565  PACK_SHORT_565_BE
+#define PACK_TWO_PIXELS  PACK_TWO_PIXELS_BE
+#define ycc_rgb565_convert_internal  ycc_rgb565_convert_be
+#define ycc_rgb565D_convert_internal  ycc_rgb565D_convert_be
+#define rgb_rgb565_convert_internal  rgb_rgb565_convert_be
+#define rgb_rgb565D_convert_internal  rgb_rgb565D_convert_be
+#define gray_rgb565_convert_internal  gray_rgb565_convert_be
+#define gray_rgb565D_convert_internal  gray_rgb565D_convert_be
+#include "jdcol565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef ycc_rgb565_convert_internal
+#undef ycc_rgb565D_convert_internal
+#undef rgb_rgb565_convert_internal
+#undef rgb_rgb565D_convert_internal
+#undef gray_rgb565_convert_internal
+#undef gray_rgb565D_convert_internal
+
+
+METHODDEF(void)
+ycc_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    ycc_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    ycc_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+ycc_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    ycc_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    ycc_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+rgb_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    rgb_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    rgb_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+rgb_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    rgb_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    rgb_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+gray_rgb565_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    gray_rgb565_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    gray_rgb565_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+METHODDEF(void)
+gray_rgb565D_convert(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION input_row, _JSAMPARRAY output_buf, int num_rows)
+{
+  if (is_big_endian())
+    gray_rgb565D_convert_be(cinfo, input_buf, input_row, output_buf, num_rows);
+  else
+    gray_rgb565D_convert_le(cinfo, input_buf, input_row, output_buf, num_rows);
+}
+
+
+/*
+ * Empty method for start_pass.
+ */
+
+METHODDEF(void)
+start_pass_dcolor(j_decompress_ptr cinfo)
+{
+  /* no work needed */
+}
+
+
+/*
+ * Module initialization routine for output colorspace conversion.
+ */
+
+GLOBAL(void)
+_jinit_color_deconverter(j_decompress_ptr cinfo)
+{
+  my_cconvert_ptr cconvert;
+  int ci;
+
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  cconvert = (my_cconvert_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_color_deconverter));
+  cinfo->cconvert = (struct jpeg_color_deconverter *)cconvert;
+  cconvert->pub.start_pass = start_pass_dcolor;
+
+  /* Make sure num_components agrees with jpeg_color_space */
+  switch (cinfo->jpeg_color_space) {
+  case JCS_GRAYSCALE:
+    if (cinfo->num_components != 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_RGB:
+  case JCS_YCbCr:
+    if (cinfo->num_components != 3)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  case JCS_CMYK:
+  case JCS_YCCK:
+    if (cinfo->num_components != 4)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+
+  default:                      /* JCS_UNKNOWN can be anything */
+    if (cinfo->num_components < 1)
+      ERREXIT(cinfo, JERR_BAD_J_COLORSPACE);
+    break;
+  }
+
+  /* Set out_color_components and conversion method based on requested space.
+   * Also clear the component_needed flags for any unused components,
+   * so that earlier pipeline stages can avoid useless computation.
+   * NOTE: We do not allow any lossy color conversion algorithms in lossless
+   * mode.
+   */
+
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+#ifdef D_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->jpeg_color_space != cinfo->out_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    cinfo->out_color_components = 1;
+    if (cinfo->jpeg_color_space == JCS_GRAYSCALE ||
+        cinfo->jpeg_color_space == JCS_YCbCr) {
+      cconvert->pub._color_convert = grayscale_convert;
+      /* For color->grayscale conversion, only the Y (0) component is needed */
+      for (ci = 1; ci < cinfo->num_components; ci++)
+        cinfo->comp_info[ci].component_needed = FALSE;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      cconvert->pub._color_convert = rgb_gray_convert;
+      build_rgb_y_table(cinfo);
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB:
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+#ifdef D_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless && cinfo->jpeg_color_space != JCS_RGB)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
+    if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#ifdef WITH_SIMD
+      if (jsimd_can_ycc_rgb())
+        cconvert->pub._color_convert = jsimd_ycc_rgb_convert;
+      else
+#endif
+      {
+        cconvert->pub._color_convert = ycc_rgb_convert;
+        build_ycc_rgb_table(cinfo);
+      }
+    } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+      cconvert->pub._color_convert = gray_rgb_convert;
+    } else if (cinfo->jpeg_color_space == JCS_RGB) {
+      if (rgb_red[cinfo->out_color_space] == 0 &&
+          rgb_green[cinfo->out_color_space] == 1 &&
+          rgb_blue[cinfo->out_color_space] == 2 &&
+          rgb_pixelsize[cinfo->out_color_space] == 3)
+        cconvert->pub._color_convert = null_convert;
+      else
+        cconvert->pub._color_convert = rgb_rgb_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  case JCS_RGB565:
+#ifdef D_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    cinfo->out_color_components = 3;
+    if (cinfo->dither_mode == JDITHER_NONE) {
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+#ifdef WITH_SIMD
+        if (jsimd_can_ycc_rgb565())
+          cconvert->pub._color_convert = jsimd_ycc_rgb565_convert;
+        else
+#endif
+        {
+          cconvert->pub._color_convert = ycc_rgb565_convert;
+          build_ycc_rgb_table(cinfo);
+        }
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub._color_convert = gray_rgb565_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub._color_convert = rgb_rgb565_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    } else {
+      /* only ordered dithering is supported */
+      if (cinfo->jpeg_color_space == JCS_YCbCr) {
+        cconvert->pub._color_convert = ycc_rgb565D_convert;
+        build_ycc_rgb_table(cinfo);
+      } else if (cinfo->jpeg_color_space == JCS_GRAYSCALE) {
+        cconvert->pub._color_convert = gray_rgb565D_convert;
+      } else if (cinfo->jpeg_color_space == JCS_RGB) {
+        cconvert->pub._color_convert = rgb_rgb565D_convert;
+      } else
+        ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    }
+    break;
+
+  case JCS_CMYK:
+#ifdef D_LOSSLESS_SUPPORTED
+    if (cinfo->master->lossless &&
+        cinfo->jpeg_color_space != cinfo->out_color_space)
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+#endif
+    cinfo->out_color_components = 4;
+    if (cinfo->jpeg_color_space == JCS_YCCK) {
+      cconvert->pub._color_convert = ycck_cmyk_convert;
+      build_ycc_rgb_table(cinfo);
+    } else if (cinfo->jpeg_color_space == JCS_CMYK) {
+      cconvert->pub._color_convert = null_convert;
+    } else
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+
+  default:
+    /* Permit null conversion to same output space */
+    if (cinfo->out_color_space == cinfo->jpeg_color_space) {
+      cinfo->out_color_components = cinfo->num_components;
+      cconvert->pub._color_convert = null_convert;
+    } else                      /* unsupported non-null conversion */
+      ERREXIT(cinfo, JERR_CONVERSION_NOTIMPL);
+    break;
+  }
+
+  if (cinfo->quantize_colors)
+    cinfo->output_components = 1; /* single colormapped output component */
+  else
+    cinfo->output_components = cinfo->out_color_components;
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdct.h b/thirdparty/libjpeg-turbo/src/jdct.h
new file mode 100644
index 00000000000..0411a79bc0b
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdct.h
@@ -0,0 +1,221 @@
+/*
+ * jdct.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This include file contains common declarations for the forward and
+ * inverse DCT modules.  These declarations are private to the DCT managers
+ * (jcdctmgr.c, jddctmgr.c) and the individual DCT algorithms.
+ * The individual DCT algorithms are kept in separate files to ease
+ * machine-dependent tuning (e.g., assembly coding).
+ */
+
+#include "jsamplecomp.h"
+
+
+/*
+ * A forward DCT routine is given a pointer to a work area of type DCTELEM[];
+ * the DCT is to be performed in-place in that buffer.  Type DCTELEM is int
+ * for 8-bit samples, JLONG for 12-bit samples.  (NOTE: Floating-point DCT
+ * implementations use an array of type FAST_FLOAT, instead.)
+ * The DCT inputs are expected to be signed (range +-_CENTERJSAMPLE).
+ * The DCT outputs are returned scaled up by a factor of 8; they therefore
+ * have a range of +-8K for 8-bit data, +-128K for 12-bit data.  This
+ * convention improves accuracy in integer implementations and saves some
+ * work in floating-point ones.
+ * Quantization of the output coefficients is done by jcdctmgr.c. This
+ * step requires an unsigned type and also one with twice the bits.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#ifndef WITH_SIMD
+typedef int DCTELEM;            /* 16 or 32 bits is fine */
+typedef unsigned int UDCTELEM;
+typedef unsigned long long UDCTELEM2;
+#else
+typedef short DCTELEM;          /* prefer 16 bit with SIMD for parellelism */
+typedef unsigned short UDCTELEM;
+typedef unsigned int UDCTELEM2;
+#endif
+#else
+typedef JLONG DCTELEM;          /* must have 32 bits */
+typedef unsigned long long UDCTELEM2;
+#endif
+
+
+/*
+ * An inverse DCT routine is given a pointer to the input JBLOCK and a pointer
+ * to an output sample array.  The routine must dequantize the input data as
+ * well as perform the IDCT; for dequantization, it uses the multiplier table
+ * pointed to by compptr->dct_table.  The output data is to be placed into the
+ * sample array starting at a specified column.  (Any row offset needed will
+ * be applied to the array pointer before it is passed to the IDCT code.)
+ * Note that the number of samples emitted by the IDCT routine is
+ * DCT_scaled_size * DCT_scaled_size.
+ */
+
+/* typedef inverse_DCT_method_ptr is declared in jpegint.h */
+
+/*
+ * Each IDCT routine has its own ideas about the best dct_table element type.
+ */
+
+typedef MULTIPLIER ISLOW_MULT_TYPE;  /* short or int, whichever is faster */
+#if BITS_IN_JSAMPLE == 8
+typedef MULTIPLIER IFAST_MULT_TYPE;  /* 16 bits is OK, use short if faster */
+#define IFAST_SCALE_BITS  2          /* fractional bits in scale factors */
+#else
+typedef JLONG IFAST_MULT_TYPE;       /* need 32 bits for scaled quantizers */
+#define IFAST_SCALE_BITS  13         /* fractional bits in scale factors */
+#endif
+typedef FAST_FLOAT FLOAT_MULT_TYPE;  /* preferred floating type */
+
+
+/*
+ * Each IDCT routine is responsible for range-limiting its results and
+ * converting them to unsigned form (0.._MAXJSAMPLE).  The raw outputs could
+ * be quite far out of range if the input data is corrupt, so a bulletproof
+ * range-limiting step is required.  We use a mask-and-table-lookup method
+ * to do the combined operations quickly.  See the comments with
+ * prepare_range_limit_table (in jdmaster.c) for more info.
+ */
+
+#define IDCT_range_limit(cinfo) \
+  ((_JSAMPLE *)((cinfo)->sample_range_limit) + _CENTERJSAMPLE)
+
+#define RANGE_MASK  (_MAXJSAMPLE * 4 + 3) /* 2 bits wider than legal samples */
+
+
+/* Extern declarations for the forward and inverse DCT routines. */
+
+EXTERN(void) _jpeg_fdct_islow(DCTELEM *data);
+EXTERN(void) _jpeg_fdct_ifast(DCTELEM *data);
+EXTERN(void) jpeg_fdct_float(FAST_FLOAT *data);
+
+EXTERN(void) _jpeg_idct_islow(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_ifast(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_float(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_7x7(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_6x6(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_5x5(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_4x4(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_3x3(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_2x2(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_1x1(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_9x9(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            _JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_10x10(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_11x11(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_12x12(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_13x13(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_14x14(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_15x15(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) _jpeg_idct_16x16(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+
+
+/*
+ * Macros for handling fixed-point arithmetic; these are used by many
+ * but not all of the DCT/IDCT modules.
+ *
+ * All values are expected to be of type JLONG.
+ * Fractional constants are scaled left by CONST_BITS bits.
+ * CONST_BITS is defined within each module using these macros,
+ * and may differ from one module to the next.
+ */
+
+#define ONE          ((JLONG)1)
+#define CONST_SCALE  (ONE << CONST_BITS)
+
+/* Convert a positive real constant to an integer scaled by CONST_SCALE.
+ * Caution: some C compilers fail to reduce "FIX(constant)" at compile time,
+ * thus causing a lot of useless floating-point operations at run time.
+ */
+
+#define FIX(x)  ((JLONG)((x) * CONST_SCALE + 0.5))
+
+/* Descale and correctly round a JLONG value that's scaled by N bits.
+ * We assume RIGHT_SHIFT rounds towards minus infinity, so adding
+ * the fudge factor is correct for either sign of X.
+ */
+
+#define DESCALE(x, n)  RIGHT_SHIFT((x) + (ONE << ((n) - 1)), n)
+
+/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result.
+ * This macro is used only when the two inputs will actually be no more than
+ * 16 bits wide, so that a 16x16->32 bit multiply can be used instead of a
+ * full 32x32 multiply.  This provides a useful speedup on many machines.
+ * Unfortunately there is no way to specify a 16x16->32 multiply portably
+ * in C, but some C compilers will do the right thing if you provide the
+ * correct combination of casts.
+ */
+
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
+#define MULTIPLY16C16(var, const)  (((INT16)(var)) * ((INT16)(const)))
+#endif
+#ifdef SHORTxLCONST_32          /* known to work with Microsoft C 6.0 */
+#define MULTIPLY16C16(var, const)  (((INT16)(var)) * ((JLONG)(const)))
+#endif
+
+#ifndef MULTIPLY16C16           /* default definition */
+#define MULTIPLY16C16(var, const)  ((var) * (const))
+#endif
+
+/* Same except both inputs are variables. */
+
+#ifdef SHORTxSHORT_32           /* may work if 'int' is 32 bits */
+#define MULTIPLY16V16(var1, var2)  (((INT16)(var1)) * ((INT16)(var2)))
+#endif
+
+#ifndef MULTIPLY16V16           /* default definition */
+#define MULTIPLY16V16(var1, var2)  ((var1) * (var2))
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jddctmgr.c b/thirdparty/libjpeg-turbo/src/jddctmgr.c
new file mode 100644
index 00000000000..0bd8c2b591d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jddctmgr.c
@@ -0,0 +1,365 @@
+/*
+ * jddctmgr.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * Modified 2002-2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2010, 2015, 2022, D. R. Commander.
+ * Copyright (C) 2013, MIPS Technologies, Inc., California.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the inverse-DCT management logic.
+ * This code selects a particular IDCT implementation to be used,
+ * and it performs related housekeeping chores.  No code in this file
+ * is executed per IDCT step, only during output pass setup.
+ *
+ * Note that the IDCT routines are responsible for performing coefficient
+ * dequantization as well as the IDCT proper.  This module sets up the
+ * dequantization multiplier table needed by the IDCT routine.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+#include "jsimddct.h"
+#include "jpegapicomp.h"
+
+
+/*
+ * The decompressor input side (jdinput.c) saves away the appropriate
+ * quantization table for each component at the start of the first scan
+ * involving that component.  (This is necessary in order to correctly
+ * decode files that reuse Q-table slots.)
+ * When we are ready to make an output pass, the saved Q-table is converted
+ * to a multiplier table that will actually be used by the IDCT routine.
+ * The multiplier table contents are IDCT-method-dependent.  To support
+ * application changes in IDCT method between scans, we can remake the
+ * multiplier tables if necessary.
+ * In buffered-image mode, the first output pass may occur before any data
+ * has been seen for some components, and thus before their Q-tables have
+ * been saved away.  To handle this case, multiplier tables are preset
+ * to zeroes; the result of the IDCT will be a neutral gray level.
+ */
+
+
+/* Private subobject for this module */
+
+typedef struct {
+  struct jpeg_inverse_dct pub;  /* public fields */
+
+  /* This array contains the IDCT method code that each multiplier table
+   * is currently set up for, or -1 if it's not yet set up.
+   * The actual multiplier tables are pointed to by dct_table in the
+   * per-component comp_info structures.
+   */
+  int cur_method[MAX_COMPONENTS];
+} my_idct_controller;
+
+typedef my_idct_controller *my_idct_ptr;
+
+
+/* Allocated multiplier tables: big enough for any supported variant */
+
+typedef union {
+  ISLOW_MULT_TYPE islow_array[DCTSIZE2];
+#ifdef DCT_IFAST_SUPPORTED
+  IFAST_MULT_TYPE ifast_array[DCTSIZE2];
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+  FLOAT_MULT_TYPE float_array[DCTSIZE2];
+#endif
+} multiplier_table;
+
+
+/* The current scaled-IDCT routines require ISLOW-style multiplier tables,
+ * so be sure to compile that code if either ISLOW or SCALING is requested.
+ */
+#ifdef DCT_ISLOW_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#else
+#ifdef IDCT_SCALING_SUPPORTED
+#define PROVIDE_ISLOW_TABLES
+#endif
+#endif
+
+
+/*
+ * Prepare for an output pass.
+ * Here we select the proper IDCT routine for each component and build
+ * a matching multiplier table.
+ */
+
+METHODDEF(void)
+start_pass(j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct = (my_idct_ptr)cinfo->idct;
+  int ci, i;
+  jpeg_component_info *compptr;
+  int method = 0;
+  _inverse_DCT_method_ptr method_ptr = NULL;
+  JQUANT_TBL *qtbl;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Select the proper IDCT routine for this component's scaling */
+    switch (compptr->_DCT_scaled_size) {
+#ifdef IDCT_SCALING_SUPPORTED
+    case 1:
+      method_ptr = _jpeg_idct_1x1;
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 2:
+#ifdef WITH_SIMD
+      if (jsimd_can_idct_2x2())
+        method_ptr = jsimd_idct_2x2;
+      else
+#endif
+        method_ptr = _jpeg_idct_2x2;
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 3:
+      method_ptr = _jpeg_idct_3x3;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 4:
+#ifdef WITH_SIMD
+      if (jsimd_can_idct_4x4())
+        method_ptr = jsimd_idct_4x4;
+      else
+#endif
+        method_ptr = _jpeg_idct_4x4;
+      method = JDCT_ISLOW;      /* jidctred uses islow-style table */
+      break;
+    case 5:
+      method_ptr = _jpeg_idct_5x5;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 6:
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_can_idct_6x6())
+        method_ptr = jsimd_idct_6x6;
+      else
+#endif
+      method_ptr = _jpeg_idct_6x6;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 7:
+      method_ptr = _jpeg_idct_7x7;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+#endif
+    case DCTSIZE:
+      switch (cinfo->dct_method) {
+#ifdef DCT_ISLOW_SUPPORTED
+      case JDCT_ISLOW:
+#ifdef WITH_SIMD
+        if (jsimd_can_idct_islow())
+          method_ptr = jsimd_idct_islow;
+        else
+#endif
+          method_ptr = _jpeg_idct_islow;
+        method = JDCT_ISLOW;
+        break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+      case JDCT_IFAST:
+#ifdef WITH_SIMD
+        if (jsimd_can_idct_ifast())
+          method_ptr = jsimd_idct_ifast;
+        else
+#endif
+          method_ptr = _jpeg_idct_ifast;
+        method = JDCT_IFAST;
+        break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+      case JDCT_FLOAT:
+#ifdef WITH_SIMD
+        if (jsimd_can_idct_float())
+          method_ptr = jsimd_idct_float;
+        else
+#endif
+          method_ptr = _jpeg_idct_float;
+        method = JDCT_FLOAT;
+        break;
+#endif
+      default:
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+        break;
+      }
+      break;
+#ifdef IDCT_SCALING_SUPPORTED
+    case 9:
+      method_ptr = _jpeg_idct_9x9;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 10:
+      method_ptr = _jpeg_idct_10x10;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 11:
+      method_ptr = _jpeg_idct_11x11;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 12:
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_can_idct_12x12())
+        method_ptr = jsimd_idct_12x12;
+      else
+#endif
+      method_ptr = _jpeg_idct_12x12;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 13:
+      method_ptr = _jpeg_idct_13x13;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 14:
+      method_ptr = _jpeg_idct_14x14;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 15:
+      method_ptr = _jpeg_idct_15x15;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+    case 16:
+      method_ptr = _jpeg_idct_16x16;
+      method = JDCT_ISLOW;      /* jidctint uses islow-style table */
+      break;
+#endif
+    default:
+      ERREXIT1(cinfo, JERR_BAD_DCTSIZE, compptr->_DCT_scaled_size);
+      break;
+    }
+    idct->pub._inverse_DCT[ci] = method_ptr;
+    /* Create multiplier table from quant table.
+     * However, we can skip this if the component is uninteresting
+     * or if we already built the table.  Also, if no quant table
+     * has yet been saved for the component, we leave the
+     * multiplier table all-zero; we'll be reading zeroes from the
+     * coefficient controller's buffer anyway.
+     */
+    if (!compptr->component_needed || idct->cur_method[ci] == method)
+      continue;
+    qtbl = compptr->quant_table;
+    if (qtbl == NULL)           /* happens if no data yet for component */
+      continue;
+    idct->cur_method[ci] = method;
+    switch (method) {
+#ifdef PROVIDE_ISLOW_TABLES
+    case JDCT_ISLOW:
+      {
+        /* For LL&M IDCT method, multipliers are equal to raw quantization
+         * coefficients, but are stored as ints to ensure access efficiency.
+         */
+        ISLOW_MULT_TYPE *ismtbl = (ISLOW_MULT_TYPE *)compptr->dct_table;
+        for (i = 0; i < DCTSIZE2; i++) {
+          ismtbl[i] = (ISLOW_MULT_TYPE)qtbl->quantval[i];
+        }
+      }
+      break;
+#endif
+#ifdef DCT_IFAST_SUPPORTED
+    case JDCT_IFAST:
+      {
+        /* For AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         * For integer operation, the multiplier table is to be scaled by
+         * IFAST_SCALE_BITS.
+         */
+        IFAST_MULT_TYPE *ifmtbl = (IFAST_MULT_TYPE *)compptr->dct_table;
+#define CONST_BITS  14
+        static const INT16 aanscales[DCTSIZE2] = {
+          /* precomputed values scaled up by 14 bits */
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
+          21407, 29692, 27969, 25172, 21407, 16819, 11585,  5906,
+          19266, 26722, 25172, 22654, 19266, 15137, 10426,  5315,
+          16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
+          12873, 17855, 16819, 15137, 12873, 10114,  6967,  3552,
+           8867, 12299, 11585, 10426,  8867,  6967,  4799,  2446,
+           4520,  6270,  5906,  5315,  4520,  3552,  2446,  1247
+        };
+        SHIFT_TEMPS
+
+        for (i = 0; i < DCTSIZE2; i++) {
+          ifmtbl[i] = (IFAST_MULT_TYPE)
+            DESCALE(MULTIPLY16V16((JLONG)qtbl->quantval[i],
+                                  (JLONG)aanscales[i]),
+                    CONST_BITS - IFAST_SCALE_BITS);
+        }
+      }
+      break;
+#endif
+#ifdef DCT_FLOAT_SUPPORTED
+    case JDCT_FLOAT:
+      {
+        /* For float AA&N IDCT method, multipliers are equal to quantization
+         * coefficients scaled by scalefactor[row]*scalefactor[col], where
+         *   scalefactor[0] = 1
+         *   scalefactor[k] = cos(k*PI/16) * sqrt(2)    for k=1..7
+         */
+        FLOAT_MULT_TYPE *fmtbl = (FLOAT_MULT_TYPE *)compptr->dct_table;
+        int row, col;
+        static const double aanscalefactor[DCTSIZE] = {
+          1.0, 1.387039845, 1.306562965, 1.175875602,
+          1.0, 0.785694958, 0.541196100, 0.275899379
+        };
+
+        i = 0;
+        for (row = 0; row < DCTSIZE; row++) {
+          for (col = 0; col < DCTSIZE; col++) {
+            fmtbl[i] = (FLOAT_MULT_TYPE)
+              ((double)qtbl->quantval[i] *
+               aanscalefactor[row] * aanscalefactor[col]);
+            i++;
+          }
+        }
+      }
+      break;
+#endif
+    default:
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+      break;
+    }
+  }
+}
+
+
+/*
+ * Initialize IDCT manager.
+ */
+
+GLOBAL(void)
+_jinit_inverse_dct(j_decompress_ptr cinfo)
+{
+  my_idct_ptr idct;
+  int ci;
+  jpeg_component_info *compptr;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  idct = (my_idct_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_idct_controller));
+  cinfo->idct = (struct jpeg_inverse_dct *)idct;
+  idct->pub.start_pass = start_pass;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Allocate and pre-zero a multiplier table for each component */
+    compptr->dct_table =
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(multiplier_table));
+    memset(compptr->dct_table, 0, sizeof(multiplier_table));
+    /* Mark multiplier table not yet set up for any method */
+    idct->cur_method[ci] = -1;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdhuff.c b/thirdparty/libjpeg-turbo/src/jdhuff.c
new file mode 100644
index 00000000000..cd8c0847a22
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdhuff.c
@@ -0,0 +1,836 @@
+/*
+ * jdhuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2016, 2018-2019, 2022, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy decoding routines.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"             /* Declarations shared with jd*huff.c */
+#include "jpegapicomp.h"
+#include "jstdhuff.c"
+
+
+/*
+ * Expanded entropy decoder object for Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  int last_dc_val[MAX_COMPS_IN_SCAN]; /* last DC coef for each component */
+} savable_state;
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl *dc_derived_tbls[NUM_HUFF_TBLS];
+  d_derived_tbl *ac_derived_tbls[NUM_HUFF_TBLS];
+
+  /* Precalculated info set up by start_pass for use in decode_mcu: */
+
+  /* Pointers to derived tables to be used for each block within an MCU */
+  d_derived_tbl *dc_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  d_derived_tbl *ac_cur_tbls[D_MAX_BLOCKS_IN_MCU];
+  /* Whether we care about the DC and AC coefficient values for each block */
+  boolean dc_needed[D_MAX_BLOCKS_IN_MCU];
+  boolean ac_needed[D_MAX_BLOCKS_IN_MCU];
+} huff_entropy_decoder;
+
+typedef huff_entropy_decoder *huff_entropy_ptr;
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_huff_decoder(j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int ci, blkn, dctbl, actbl;
+  d_derived_tbl **pdtbl;
+  jpeg_component_info *compptr;
+
+  /* Check that the scan parameters Ss, Se, Ah/Al are OK for sequential JPEG.
+   * This ought to be an error condition, but we make it a warning because
+   * there are some baseline files out there with all zeroes in these bytes.
+   */
+  if (cinfo->Ss != 0 || cinfo->Se != DCTSIZE2 - 1 ||
+      cinfo->Ah != 0 || cinfo->Al != 0)
+    WARNMS(cinfo, JWRN_NOT_SEQUENTIAL);
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    dctbl = compptr->dc_tbl_no;
+    actbl = compptr->ac_tbl_no;
+    /* Compute derived values for Huffman tables */
+    /* We may do this more than once for a table, but it's not expensive */
+    pdtbl = (d_derived_tbl **)(entropy->dc_derived_tbls) + dctbl;
+    jpeg_make_d_derived_tbl(cinfo, TRUE, dctbl, pdtbl);
+    pdtbl = (d_derived_tbl **)(entropy->ac_derived_tbls) + actbl;
+    jpeg_make_d_derived_tbl(cinfo, FALSE, actbl, pdtbl);
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Precalculate decoding info for each block in an MCU of this scan */
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    ci = cinfo->MCU_membership[blkn];
+    compptr = cinfo->cur_comp_info[ci];
+    /* Precalculate which table to use for each block */
+    entropy->dc_cur_tbls[blkn] = entropy->dc_derived_tbls[compptr->dc_tbl_no];
+    entropy->ac_cur_tbls[blkn] = entropy->ac_derived_tbls[compptr->ac_tbl_no];
+    /* Decide whether we really care about the coefficient values */
+    if (compptr->component_needed) {
+      entropy->dc_needed[blkn] = TRUE;
+      /* we don't need the ACs if producing a 1/8th-size image */
+      entropy->ac_needed[blkn] = (compptr->_DCT_scaled_size > 1);
+    } else {
+      entropy->dc_needed[blkn] = entropy->ac_needed[blkn] = FALSE;
+    }
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Compute the derived values for a Huffman table.
+ * This routine also performs some validation checks on the table.
+ *
+ * Note this is also used by jdphuff.c and jdlhuff.c.
+ */
+
+GLOBAL(void)
+jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC, int tblno,
+                        d_derived_tbl **pdtbl)
+{
+  JHUFF_TBL *htbl;
+  d_derived_tbl *dtbl;
+  int p, i, l, si, numsymbols;
+  int lookbits, ctr;
+  char huffsize[257];
+  unsigned int huffcode[257];
+  unsigned int code;
+
+  /* Note that huffsize[] and huffcode[] are filled in code-length order,
+   * paralleling the order of the symbols themselves in htbl->huffval[].
+   */
+
+  /* Find the input Huffman table */
+  if (tblno < 0 || tblno >= NUM_HUFF_TBLS)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+  htbl =
+    isDC ? cinfo->dc_huff_tbl_ptrs[tblno] : cinfo->ac_huff_tbl_ptrs[tblno];
+  if (htbl == NULL)
+    ERREXIT1(cinfo, JERR_NO_HUFF_TABLE, tblno);
+
+  /* Allocate a workspace if we haven't already done so. */
+  if (*pdtbl == NULL)
+    *pdtbl = (d_derived_tbl *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(d_derived_tbl));
+  dtbl = *pdtbl;
+  dtbl->pub = htbl;             /* fill in back link */
+
+  /* Figure C.1: make table of Huffman code length for each symbol */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    i = (int)htbl->bits[l];
+    if (i < 0 || p + i > 256)   /* protect against table overrun */
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    while (i--)
+      huffsize[p++] = (char)l;
+  }
+  huffsize[p] = 0;
+  numsymbols = p;
+
+  /* Figure C.2: generate the codes themselves */
+  /* We also validate that the counts represent a legal Huffman code tree. */
+
+  code = 0;
+  si = huffsize[0];
+  p = 0;
+  while (huffsize[p]) {
+    while (((int)huffsize[p]) == si) {
+      huffcode[p++] = code;
+      code++;
+    }
+    /* code is now 1 more than the last code used for codelength si; but
+     * it must still fit in si bits, since no code is allowed to be all ones.
+     */
+    if (((JLONG)code) >= (((JLONG)1) << si))
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    code <<= 1;
+    si++;
+  }
+
+  /* Figure F.15: generate decoding tables for bit-sequential decoding */
+
+  p = 0;
+  for (l = 1; l <= 16; l++) {
+    if (htbl->bits[l]) {
+      /* valoffset[l] = huffval[] index of 1st symbol of code length l,
+       * minus the minimum code of length l
+       */
+      dtbl->valoffset[l] = (JLONG)p - (JLONG)huffcode[p];
+      p += htbl->bits[l];
+      dtbl->maxcode[l] = huffcode[p - 1]; /* maximum code of length l */
+    } else {
+      dtbl->maxcode[l] = -1;    /* -1 if no codes of this length */
+    }
+  }
+  dtbl->valoffset[17] = 0;
+  dtbl->maxcode[17] = 0xFFFFFL; /* ensures jpeg_huff_decode terminates */
+
+  /* Compute lookahead tables to speed up decoding.
+   * First we set all the table entries to 0, indicating "too long";
+   * then we iterate through the Huffman codes that are short enough and
+   * fill in all the entries that correspond to bit sequences starting
+   * with that code.
+   */
+
+  for (i = 0; i < (1 << HUFF_LOOKAHEAD); i++)
+    dtbl->lookup[i] = (HUFF_LOOKAHEAD + 1) << HUFF_LOOKAHEAD;
+
+  p = 0;
+  for (l = 1; l <= HUFF_LOOKAHEAD; l++) {
+    for (i = 1; i <= (int)htbl->bits[l]; i++, p++) {
+      /* l = current code's length, p = its index in huffcode[] & huffval[]. */
+      /* Generate left-justified code followed by all possible bit sequences */
+      lookbits = huffcode[p] << (HUFF_LOOKAHEAD - l);
+      for (ctr = 1 << (HUFF_LOOKAHEAD - l); ctr > 0; ctr--) {
+        dtbl->lookup[lookbits] = (l << HUFF_LOOKAHEAD) | htbl->huffval[p];
+        lookbits++;
+      }
+    }
+  }
+
+  /* Validate symbols as being reasonable.
+   * For AC tables, we make no check, but accept all byte values 0..255.
+   * For DC tables, we require the symbols to be in range 0..15 in lossy mode
+   * and 0..16 in lossless mode.  (Tighter bounds could be applied depending on
+   * the data depth and mode, but this is sufficient to ensure safe decoding.)
+   */
+  if (isDC) {
+    for (i = 0; i < numsymbols; i++) {
+      int sym = htbl->huffval[i];
+      if (sym < 0 || sym > (cinfo->master->lossless ? 16 : 15))
+        ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+    }
+  }
+}
+
+
+/*
+ * Out-of-line code for bit fetching (shared with jdphuff.c and jdlhuff.c).
+ * See jdhuff.h for info about usage.
+ * Note: current values of get_buffer and bits_left are passed as parameters,
+ * but are returned in the corresponding fields of the state struct.
+ *
+ * On most machines MIN_GET_BITS should be 25 to allow the full 32-bit width
+ * of get_buffer to be used.  (On machines with wider words, an even larger
+ * buffer could be used.)  However, on some machines 32-bit shifts are
+ * quite slow and take time proportional to the number of places shifted.
+ * (This is true with most PC compilers, for instance.)  In this case it may
+ * be a win to set MIN_GET_BITS to the minimum value of 15.  This reduces the
+ * average shift distance at the cost of more calls to jpeg_fill_bit_buffer.
+ */
+
+#ifdef SLOW_SHIFT_32
+#define MIN_GET_BITS  15        /* minimum allowable value */
+#else
+#define MIN_GET_BITS  (BIT_BUF_SIZE - 7)
+#endif
+
+
+GLOBAL(boolean)
+jpeg_fill_bit_buffer(bitread_working_state *state,
+                     register bit_buf_type get_buffer, register int bits_left,
+                     int nbits)
+/* Load up the bit buffer to a depth of at least nbits */
+{
+  /* Copy heavily used state fields into locals (hopefully registers) */
+  register const JOCTET *next_input_byte = state->next_input_byte;
+  register size_t bytes_in_buffer = state->bytes_in_buffer;
+  j_decompress_ptr cinfo = state->cinfo;
+
+  /* Attempt to load at least MIN_GET_BITS bits into get_buffer. */
+  /* (It is assumed that no request will be for more than that many bits.) */
+  /* We fail to do so only if we hit a marker or are forced to suspend. */
+
+  if (cinfo->unread_marker == 0) {      /* cannot advance past a marker */
+    while (bits_left < MIN_GET_BITS) {
+      register int c;
+
+      /* Attempt to read a byte */
+      if (bytes_in_buffer == 0) {
+        if (!(*cinfo->src->fill_input_buffer) (cinfo))
+          return FALSE;
+        next_input_byte = cinfo->src->next_input_byte;
+        bytes_in_buffer = cinfo->src->bytes_in_buffer;
+      }
+      bytes_in_buffer--;
+      c = *next_input_byte++;
+
+      /* If it's 0xFF, check and discard stuffed zero byte */
+      if (c == 0xFF) {
+        /* Loop here to discard any padding FF's on terminating marker,
+         * so that we can save a valid unread_marker value.  NOTE: we will
+         * accept multiple FF's followed by a 0 as meaning a single FF data
+         * byte.  This data pattern is not valid according to the standard.
+         */
+        do {
+          if (bytes_in_buffer == 0) {
+            if (!(*cinfo->src->fill_input_buffer) (cinfo))
+              return FALSE;
+            next_input_byte = cinfo->src->next_input_byte;
+            bytes_in_buffer = cinfo->src->bytes_in_buffer;
+          }
+          bytes_in_buffer--;
+          c = *next_input_byte++;
+        } while (c == 0xFF);
+
+        if (c == 0) {
+          /* Found FF/00, which represents an FF data byte */
+          c = 0xFF;
+        } else {
+          /* Oops, it's actually a marker indicating end of compressed data.
+           * Save the marker code for later use.
+           * Fine point: it might appear that we should save the marker into
+           * bitread working state, not straight into permanent state.  But
+           * once we have hit a marker, we cannot need to suspend within the
+           * current MCU, because we will read no more bytes from the data
+           * source.  So it is OK to update permanent state right away.
+           */
+          cinfo->unread_marker = c;
+          /* See if we need to insert some fake zero bits. */
+          goto no_more_bytes;
+        }
+      }
+
+      /* OK, load c into get_buffer */
+      get_buffer = (get_buffer << 8) | c;
+      bits_left += 8;
+    } /* end while */
+  } else {
+no_more_bytes:
+    /* We get here if we've read the marker that terminates the compressed
+     * data segment.  There should be enough bits in the buffer register
+     * to satisfy the request; if so, no problem.
+     */
+    if (nbits > bits_left) {
+      /* Uh-oh.  Report corrupted data to user and stuff zeroes into
+       * the data stream, so that we can produce some kind of image.
+       * We use a nonvolatile flag to ensure that only one warning message
+       * appears per data segment.
+       */
+      if (!cinfo->entropy->insufficient_data) {
+        WARNMS(cinfo, JWRN_HIT_MARKER);
+        cinfo->entropy->insufficient_data = TRUE;
+      }
+      /* Fill the buffer with zero bits */
+      get_buffer <<= MIN_GET_BITS - bits_left;
+      bits_left = MIN_GET_BITS;
+    }
+  }
+
+  /* Unload the local registers */
+  state->next_input_byte = next_input_byte;
+  state->bytes_in_buffer = bytes_in_buffer;
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  return TRUE;
+}
+
+
+/* Macro version of the above, which performs much better but does not
+   handle markers.  We have to hand off any blocks with markers to the
+   slower routines. */
+
+#define GET_BYTE { \
+  register int c0, c1; \
+  c0 = *buffer++; \
+  c1 = *buffer; \
+  /* Pre-execute most common case */ \
+  get_buffer = (get_buffer << 8) | c0; \
+  bits_left += 8; \
+  if (c0 == 0xFF) { \
+    /* Pre-execute case of FF/00, which represents an FF data byte */ \
+    buffer++; \
+    if (c1 != 0) { \
+      /* Oops, it's actually a marker indicating end of compressed data. */ \
+      cinfo->unread_marker = c1; \
+      /* Back out pre-execution and fill the buffer with zero bits */ \
+      buffer -= 2; \
+      get_buffer &= ~0xFF; \
+    } \
+  } \
+}
+
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64) || (defined(__x86_64__) && defined(__ILP32__))
+
+/* Pre-fetch 48 bytes, because the holding register is 64-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left <= 16) { \
+    GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE GET_BYTE \
+  }
+
+#else
+
+/* Pre-fetch 16 bytes, because the holding register is 32-bit */
+#define FILL_BIT_BUFFER_FAST \
+  if (bits_left <= 16) { \
+    GET_BYTE GET_BYTE \
+  }
+
+#endif
+
+
+/*
+ * Out-of-line code for Huffman code decoding.
+ * See jdhuff.h for info about usage.
+ */
+
+GLOBAL(int)
+jpeg_huff_decode(bitread_working_state *state,
+                 register bit_buf_type get_buffer, register int bits_left,
+                 d_derived_tbl *htbl, int min_bits)
+{
+  register int l = min_bits;
+  register JLONG code;
+
+  /* HUFF_DECODE has determined that the code is at least min_bits */
+  /* bits long, so fetch that many bits in one swoop. */
+
+  CHECK_BIT_BUFFER(*state, l, return -1);
+  code = GET_BITS(l);
+
+  /* Collect the rest of the Huffman code one bit at a time. */
+  /* This is per Figure F.16. */
+
+  while (code > htbl->maxcode[l]) {
+    code <<= 1;
+    CHECK_BIT_BUFFER(*state, 1, return -1);
+    code |= GET_BITS(1);
+    l++;
+  }
+
+  /* Unload the local registers */
+  state->get_buffer = get_buffer;
+  state->bits_left = bits_left;
+
+  /* With garbage input we may reach the sentinel value l = 17. */
+
+  if (l > 16) {
+    WARNMS(state->cinfo, JWRN_HUFF_BAD_CODE);
+    return 0;                   /* fake a zero as the safest result */
+  }
+
+  return htbl->pub->huffval[(int)(code + htbl->valoffset[l])];
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#define AVOID_TABLES
+#ifdef AVOID_TABLES
+
+#define NEG_1  ((unsigned int)-1)
+#define HUFF_EXTEND(x, s) \
+  ((x) + ((((x) - (1 << ((s) - 1))) >> 31) & (((NEG_1) << (s)) + 1)))
+
+#else
+
+#define HUFF_EXTEND(x, s) \
+  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] = {   /* entry n is 2**(n-1) */
+  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
+};
+
+static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
+  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
+  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
+  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
+  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
+};
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart(j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (!(*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("signed-integer-overflow"),
+               no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+LOCAL(boolean)
+decode_mcu_slow(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  BITREAD_STATE_VARS;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+  state = entropy->saved;
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
+    d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r;
+
+    /* Decode a single block's worth of coefficients */
+
+    /* Section F.2.2.1: decode the DC coefficient difference */
+    HUFF_DECODE(s, br_state, dctbl, return FALSE, label1);
+    if (s) {
+      CHECK_BIT_BUFFER(br_state, s, return FALSE);
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      /* Convert DC difference to actual value, update last_dc_val */
+      int ci = cinfo->MCU_membership[blkn];
+      /* Certain malformed JPEG images produce repeated DC coefficient
+       * differences of 2047 or -2047, which causes state.last_dc_val[ci] to
+       * grow until it overflows or underflows a 32-bit signed integer.  This
+       * behavior is, to the best of our understanding, innocuous, and it is
+       * unclear how to work around it without potentially affecting
+       * performance.  Thus, we (hopefully temporarily) suppress UBSan integer
+       * overflow errors for this function and decode_mcu_fast().
+       */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      if (block) {
+        /* Output the DC coefficient (assumes jpeg_natural_order[0] = 0) */
+        (*block)[0] = (JCOEF)s;
+      }
+    }
+
+    if (entropy->ac_needed[blkn] && block) {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* Since zeroes are skipped, output area must be cleared beforehand */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label2);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Output coefficient in natural (dezigzagged) order.
+           * Note: the extra entries in jpeg_natural_order[] will save us
+           * if k >= DCTSIZE2, which could happen if the data is corrupted.
+           */
+          (*block)[jpeg_natural_order[k]] = (JCOEF)s;
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      /* Section F.2.2.2: decode the AC coefficients */
+      /* In this path we just discard the values */
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE(s, br_state, actbl, return FALSE, label3);
+
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          DROP_BITS(s);
+        } else {
+          if (r != 15)
+            break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+  entropy->saved = state;
+  return TRUE;
+}
+
+
+#if defined(__has_feature)
+#if __has_feature(undefined_behavior_sanitizer)
+__attribute__((no_sanitize("signed-integer-overflow"),
+               no_sanitize("unsigned-integer-overflow")))
+#endif
+#endif
+LOCAL(boolean)
+decode_mcu_fast(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  BITREAD_STATE_VARS;
+  JOCTET *buffer;
+  int blkn;
+  savable_state state;
+  /* Outer loop handles each block in the MCU */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+  buffer = (JOCTET *)br_state.next_input_byte;
+  state = entropy->saved;
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    JBLOCKROW block = MCU_data ? MCU_data[blkn] : NULL;
+    d_derived_tbl *dctbl = entropy->dc_cur_tbls[blkn];
+    d_derived_tbl *actbl = entropy->ac_cur_tbls[blkn];
+    register int s, k, r, l;
+
+    HUFF_DECODE_FAST(s, l, dctbl);
+    if (s) {
+      FILL_BIT_BUFFER_FAST
+      r = GET_BITS(s);
+      s = HUFF_EXTEND(r, s);
+    }
+
+    if (entropy->dc_needed[blkn]) {
+      int ci = cinfo->MCU_membership[blkn];
+      /* Refer to the comment in decode_mcu_slow() regarding the supression of
+       * a UBSan integer overflow error in this line of code.
+       */
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      if (block)
+        (*block)[0] = (JCOEF)s;
+    }
+
+    if (entropy->ac_needed[blkn] && block) {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          (*block)[jpeg_natural_order[k]] = (JCOEF)s;
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+
+    } else {
+
+      for (k = 1; k < DCTSIZE2; k++) {
+        HUFF_DECODE_FAST(s, l, actbl);
+        r = s >> 4;
+        s &= 15;
+
+        if (s) {
+          k += r;
+          FILL_BIT_BUFFER_FAST
+          DROP_BITS(s);
+        } else {
+          if (r != 15) break;
+          k += 15;
+        }
+      }
+    }
+  }
+
+  if (cinfo->unread_marker != 0) {
+    cinfo->unread_marker = 0;
+    return FALSE;
+  }
+
+  br_state.bytes_in_buffer -= (buffer - br_state.next_input_byte);
+  br_state.next_input_byte = buffer;
+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+  entropy->saved = state;
+  return TRUE;
+}
+
+
+/*
+ * Decode and return one MCU's worth of Huffman-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA HAS BEEN ZEROED BY THE CALLER.
+ * (Wholesale zeroing is usually a little faster than retail...)
+ *
+ * Returns FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * this module, since we'll just re-assign them on the next call.)
+ */
+
+#define BUFSIZE  (DCTSIZE2 * 8)
+
+METHODDEF(boolean)
+decode_mcu(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  huff_entropy_ptr entropy = (huff_entropy_ptr)cinfo->entropy;
+  int usefast = 1;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!process_restart(cinfo))
+        return FALSE;
+    usefast = 0;
+  }
+
+  if (cinfo->src->bytes_in_buffer < BUFSIZE * (size_t)cinfo->blocks_in_MCU ||
+      cinfo->unread_marker != 0)
+    usefast = 0;
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (!entropy->pub.insufficient_data) {
+
+    if (usefast) {
+      if (!decode_mcu_fast(cinfo, MCU_data)) goto use_slow;
+    } else {
+use_slow:
+      if (!decode_mcu_slow(cinfo, MCU_data)) return FALSE;
+    }
+
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * Module initialization routine for Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_huff_decoder(j_decompress_ptr cinfo)
+{
+  huff_entropy_ptr entropy;
+  int i;
+
+  /* Motion JPEG frames typically do not include the Huffman tables if they
+     are the default tables.  Thus, if the tables are not set by the time
+     the Huffman decoder is initialized (usually within the body of
+     jpeg_start_decompress()), we set them to default values. */
+  std_huff_tables((j_common_ptr)cinfo);
+
+  entropy = (huff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(huff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
+  entropy->pub.start_pass = start_pass_huff_decoder;
+  entropy->pub.decode_mcu = decode_mcu;
+
+  /* Mark tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->dc_derived_tbls[i] = entropy->ac_derived_tbls[i] = NULL;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdhuff.h b/thirdparty/libjpeg-turbo/src/jdhuff.h
new file mode 100644
index 00000000000..3eee002c020
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdhuff.h
@@ -0,0 +1,250 @@
+/*
+ * jdhuff.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010-2011, 2015-2016, 2021, D. R. Commander.
+ * Copyright (C) 2018, Matthias Räncker.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains declarations for Huffman entropy decoding routines
+ * that are shared between the sequential decoder (jdhuff.c), the progressive
+ * decoder (jdphuff.c), and the lossless decoder (jdlhuff.c).  No other modules
+ * need to see these.
+ */
+
+#include "jconfigint.h"
+
+
+/* Derived data constructed for each Huffman table */
+
+#define HUFF_LOOKAHEAD  8       /* # of bits of lookahead */
+
+typedef struct {
+  /* Basic tables: (element [0] of each array is unused) */
+  JLONG maxcode[18];            /* largest code of length k (-1 if none) */
+  /* (maxcode[17] is a sentinel to ensure jpeg_huff_decode terminates) */
+  JLONG valoffset[18];          /* huffval[] offset for codes of length k */
+  /* valoffset[k] = huffval[] index of 1st symbol of code length k, less
+   * the smallest code of length k; so given a code of length k, the
+   * corresponding symbol is huffval[code + valoffset[k]]
+   */
+
+  /* Link to public Huffman table (needed only in jpeg_huff_decode) */
+  JHUFF_TBL *pub;
+
+  /* Lookahead table: indexed by the next HUFF_LOOKAHEAD bits of
+   * the input data stream.  If the next Huffman code is no more
+   * than HUFF_LOOKAHEAD bits long, we can obtain its length and
+   * the corresponding symbol directly from this tables.
+   *
+   * The lower 8 bits of each table entry contain the number of
+   * bits in the corresponding Huffman code, or HUFF_LOOKAHEAD + 1
+   * if too long.  The next 8 bits of each entry contain the
+   * symbol.
+   */
+  int lookup[1 << HUFF_LOOKAHEAD];
+} d_derived_tbl;
+
+/* Expand a Huffman table definition into the derived format */
+EXTERN(void) jpeg_make_d_derived_tbl(j_decompress_ptr cinfo, boolean isDC,
+                                     int tblno, d_derived_tbl **pdtbl);
+
+
+/*
+ * Fetching the next N bits from the input stream is a time-critical operation
+ * for the Huffman decoders.  We implement it with a combination of inline
+ * macros and out-of-line subroutines.  Note that N (the number of bits
+ * demanded at one time) never exceeds 15 for JPEG use.
+ *
+ * We read source bytes into get_buffer and dole out bits as needed.
+ * If get_buffer already contains enough bits, they are fetched in-line
+ * by the macros CHECK_BIT_BUFFER and GET_BITS.  When there aren't enough
+ * bits, jpeg_fill_bit_buffer is called; it will attempt to fill get_buffer
+ * as full as possible (not just to the number of bits needed; this
+ * prefetching reduces the overhead cost of calling jpeg_fill_bit_buffer).
+ * Note that jpeg_fill_bit_buffer may return FALSE to indicate suspension.
+ * On TRUE return, jpeg_fill_bit_buffer guarantees that get_buffer contains
+ * at least the requested number of bits --- dummy zeroes are inserted if
+ * necessary.
+ */
+
+#if !defined(_WIN32) && !defined(SIZEOF_SIZE_T)
+#error Cannot determine word size
+#endif
+
+#if SIZEOF_SIZE_T == 8 || defined(_WIN64)
+
+typedef size_t bit_buf_type;            /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64                /* size of buffer in bits */
+
+#elif defined(__x86_64__) && defined(__ILP32__)
+
+typedef unsigned long long bit_buf_type; /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  64                 /* size of buffer in bits */
+
+#else
+
+typedef unsigned long bit_buf_type;     /* type of bit-extraction buffer */
+#define BIT_BUF_SIZE  32                /* size of buffer in bits */
+
+#endif
+
+/* If long is > 32 bits on your machine, and shifting/masking longs is
+ * reasonably fast, making bit_buf_type be long and setting BIT_BUF_SIZE
+ * appropriately should be a win.  Unfortunately we can't define the size
+ * with something like  #define BIT_BUF_SIZE (sizeof(bit_buf_type)*8)
+ * because not all machines measure sizeof in 8-bit bytes.
+ */
+
+typedef struct {                /* Bitreading state saved across MCUs */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
+} bitread_perm_state;
+
+typedef struct {                /* Bitreading working state within an MCU */
+  /* Current data source location */
+  /* We need a copy, rather than munging the original, in case of suspension */
+  const JOCTET *next_input_byte; /* => next byte to read from source */
+  size_t bytes_in_buffer;       /* # of bytes remaining in source buffer */
+  /* Bit input buffer --- note these values are kept in register variables,
+   * not in this struct, inside the inner loops.
+   */
+  bit_buf_type get_buffer;      /* current bit-extraction buffer */
+  int bits_left;                /* # of unused bits in it */
+  /* Pointer needed by jpeg_fill_bit_buffer. */
+  j_decompress_ptr cinfo;       /* back link to decompress master record */
+} bitread_working_state;
+
+/* Macros to declare and load/save bitread local variables. */
+#define BITREAD_STATE_VARS \
+  register bit_buf_type get_buffer; \
+  register int bits_left; \
+  bitread_working_state br_state
+
+#define BITREAD_LOAD_STATE(cinfop, permstate) \
+  br_state.cinfo = cinfop; \
+  br_state.next_input_byte = cinfop->src->next_input_byte; \
+  br_state.bytes_in_buffer = cinfop->src->bytes_in_buffer; \
+  get_buffer = permstate.get_buffer; \
+  bits_left = permstate.bits_left;
+
+#define BITREAD_SAVE_STATE(cinfop, permstate) \
+  cinfop->src->next_input_byte = br_state.next_input_byte; \
+  cinfop->src->bytes_in_buffer = br_state.bytes_in_buffer; \
+  permstate.get_buffer = get_buffer; \
+  permstate.bits_left = bits_left
+
+/*
+ * These macros provide the in-line portion of bit fetching.
+ * Use CHECK_BIT_BUFFER to ensure there are N bits in get_buffer
+ * before using GET_BITS, PEEK_BITS, or DROP_BITS.
+ * The variables get_buffer and bits_left are assumed to be locals,
+ * but the state struct might not be (jpeg_huff_decode needs this).
+ *      CHECK_BIT_BUFFER(state, n, action);
+ *              Ensure there are N bits in get_buffer; if suspend, take action.
+ *      val = GET_BITS(n);
+ *              Fetch next N bits.
+ *      val = PEEK_BITS(n);
+ *              Fetch next N bits without removing them from the buffer.
+ *      DROP_BITS(n);
+ *              Discard next N bits.
+ * The value N should be a simple variable, not an expression, because it
+ * is evaluated multiple times.
+ */
+
+#define CHECK_BIT_BUFFER(state, nbits, action) { \
+  if (bits_left < (nbits)) { \
+    if (!jpeg_fill_bit_buffer(&(state), get_buffer, bits_left, nbits)) \
+      { action; } \
+    get_buffer = (state).get_buffer;  bits_left = (state).bits_left; \
+  } \
+}
+
+#define GET_BITS(nbits) \
+  (((int)(get_buffer >> (bits_left -= (nbits)))) & ((1 << (nbits)) - 1))
+
+#define PEEK_BITS(nbits) \
+  (((int)(get_buffer >> (bits_left -  (nbits)))) & ((1 << (nbits)) - 1))
+
+#define DROP_BITS(nbits) \
+  (bits_left -= (nbits))
+
+/* Load up the bit buffer to a depth of at least nbits */
+EXTERN(boolean) jpeg_fill_bit_buffer(bitread_working_state *state,
+                                     register bit_buf_type get_buffer,
+                                     register int bits_left, int nbits);
+
+
+/*
+ * Code for extracting next Huffman-coded symbol from input bit stream.
+ * Again, this is time-critical and we make the main paths be macros.
+ *
+ * We use a lookahead table to process codes of up to HUFF_LOOKAHEAD bits
+ * without looping.  Usually, more than 95% of the Huffman codes will be 8
+ * or fewer bits long.  The few overlength codes are handled with a loop,
+ * which need not be inline code.
+ *
+ * Notes about the HUFF_DECODE macro:
+ * 1. Near the end of the data segment, we may fail to get enough bits
+ *    for a lookahead.  In that case, we do it the hard way.
+ * 2. If the lookahead table contains no entry, the next code must be
+ *    more than HUFF_LOOKAHEAD bits long.
+ * 3. jpeg_huff_decode returns -1 if forced to suspend.
+ */
+
+#define HUFF_DECODE(result, state, htbl, failaction, slowlabel) { \
+  register int nb, look; \
+  if (bits_left < HUFF_LOOKAHEAD) { \
+    if (!jpeg_fill_bit_buffer(&state, get_buffer, bits_left, 0)) \
+      { failaction; } \
+    get_buffer = state.get_buffer;  bits_left = state.bits_left; \
+    if (bits_left < HUFF_LOOKAHEAD) { \
+      nb = 1;  goto slowlabel; \
+    } \
+  } \
+  look = PEEK_BITS(HUFF_LOOKAHEAD); \
+  if ((nb = (htbl->lookup[look] >> HUFF_LOOKAHEAD)) <= HUFF_LOOKAHEAD) { \
+    DROP_BITS(nb); \
+    result = htbl->lookup[look] & ((1 << HUFF_LOOKAHEAD) - 1); \
+  } else { \
+slowlabel: \
+    if ((result = \
+         jpeg_huff_decode(&state, get_buffer, bits_left, htbl, nb)) < 0) \
+      { failaction; } \
+    get_buffer = state.get_buffer;  bits_left = state.bits_left; \
+  } \
+}
+
+#define HUFF_DECODE_FAST(s, nb, htbl) \
+  FILL_BIT_BUFFER_FAST; \
+  s = PEEK_BITS(HUFF_LOOKAHEAD); \
+  s = htbl->lookup[s]; \
+  nb = s >> HUFF_LOOKAHEAD; \
+  /* Pre-execute the common case of nb <= HUFF_LOOKAHEAD */ \
+  DROP_BITS(nb); \
+  s = s & ((1 << HUFF_LOOKAHEAD) - 1); \
+  if (nb > HUFF_LOOKAHEAD) { \
+    /* Equivalent of jpeg_huff_decode() */ \
+    /* Don't use GET_BITS() here because we don't want to modify bits_left */ \
+    s = (get_buffer >> bits_left) & ((1 << (nb)) - 1); \
+    while (s > htbl->maxcode[nb]) { \
+      s <<= 1; \
+      s |= GET_BITS(1); \
+      nb++; \
+    } \
+    if (nb > 16) \
+      s = 0; \
+    else \
+      s = htbl->pub->huffval[(int)(s + htbl->valoffset[nb]) & 0xFF]; \
+  }
+
+/* Out-of-line case for Huffman code fetching */
+EXTERN(int) jpeg_huff_decode(bitread_working_state *state,
+                             register bit_buf_type get_buffer,
+                             register int bits_left, d_derived_tbl *htbl,
+                             int min_bits);
diff --git a/thirdparty/libjpeg-turbo/src/jdicc.c b/thirdparty/libjpeg-turbo/src/jdicc.c
new file mode 100644
index 00000000000..50aa9a96767
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdicc.c
@@ -0,0 +1,167 @@
+/*
+ * jdicc.c
+ *
+ * Copyright (C) 1997-1998, Thomas G. Lane, Todd Newman.
+ * Copyright (C) 2017, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides code to read International Color Consortium (ICC) device
+ * profiles embedded in JFIF JPEG image files.  The ICC has defined a standard
+ * for including such data in JPEG "APP2" markers.  The code given here does
+ * not know anything about the internal structure of the ICC profile data; it
+ * just knows how to get the profile data from a JPEG file while reading it.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jerror.h"
+
+
+#define ICC_MARKER  (JPEG_APP0 + 2)     /* JPEG marker code for ICC */
+#define ICC_OVERHEAD_LEN  14            /* size of non-profile data in APP2 */
+
+
+/*
+ * Handy subroutine to test whether a saved marker is an ICC profile marker.
+ */
+
+LOCAL(boolean)
+marker_is_icc(jpeg_saved_marker_ptr marker)
+{
+  return
+    marker->marker == ICC_MARKER &&
+    marker->data_length >= ICC_OVERHEAD_LEN &&
+    /* verify the identifying string */
+    marker->data[0] == 0x49 &&
+    marker->data[1] == 0x43 &&
+    marker->data[2] == 0x43 &&
+    marker->data[3] == 0x5F &&
+    marker->data[4] == 0x50 &&
+    marker->data[5] == 0x52 &&
+    marker->data[6] == 0x4F &&
+    marker->data[7] == 0x46 &&
+    marker->data[8] == 0x49 &&
+    marker->data[9] == 0x4C &&
+    marker->data[10] == 0x45 &&
+    marker->data[11] == 0x0;
+}
+
+
+/*
+ * See if there was an ICC profile in the JPEG file being read; if so,
+ * reassemble and return the profile data.
+ *
+ * TRUE is returned if an ICC profile was found, FALSE if not.  If TRUE is
+ * returned, *icc_data_ptr is set to point to the returned data, and
+ * *icc_data_len is set to its length.
+ *
+ * IMPORTANT: the data at *icc_data_ptr is allocated with malloc() and must be
+ * freed by the caller with free() when the caller no longer needs it.
+ * (Alternatively, we could write this routine to use the IJG library's memory
+ * allocator, so that the data would be freed implicitly when
+ * jpeg_finish_decompress() is called.  But it seems likely that many
+ * applications will prefer to have the data stick around after decompression
+ * finishes.)
+ */
+
+GLOBAL(boolean)
+jpeg_read_icc_profile(j_decompress_ptr cinfo, JOCTET **icc_data_ptr,
+                      unsigned int *icc_data_len)
+{
+  jpeg_saved_marker_ptr marker;
+  int num_markers = 0;
+  int seq_no;
+  JOCTET *icc_data;
+  unsigned int total_length;
+#define MAX_SEQ_NO  255         /* sufficient since marker numbers are bytes */
+  char marker_present[MAX_SEQ_NO + 1];      /* 1 if marker found */
+  unsigned int data_length[MAX_SEQ_NO + 1]; /* size of profile data in marker */
+  unsigned int data_offset[MAX_SEQ_NO + 1]; /* offset for data in marker */
+
+  if (icc_data_ptr == NULL || icc_data_len == NULL)
+    ERREXIT(cinfo, JERR_BUFFER_SIZE);
+  if (cinfo->global_state < DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  *icc_data_ptr = NULL;         /* avoid confusion if FALSE return */
+  *icc_data_len = 0;
+
+  /* This first pass over the saved markers discovers whether there are
+   * any ICC markers and verifies the consistency of the marker numbering.
+   */
+
+  for (seq_no = 1; seq_no <= MAX_SEQ_NO; seq_no++)
+    marker_present[seq_no] = 0;
+
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      if (num_markers == 0)
+        num_markers = marker->data[13];
+      else if (num_markers != marker->data[13]) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* inconsistent num_markers fields */
+        return FALSE;
+      }
+      seq_no = marker->data[12];
+      if (seq_no <= 0 || seq_no > num_markers) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* bogus sequence number */
+        return FALSE;
+      }
+      if (marker_present[seq_no]) {
+        WARNMS(cinfo, JWRN_BOGUS_ICC);  /* duplicate sequence numbers */
+        return FALSE;
+      }
+      marker_present[seq_no] = 1;
+      data_length[seq_no] = marker->data_length - ICC_OVERHEAD_LEN;
+    }
+  }
+
+  if (num_markers == 0)
+    return FALSE;
+
+  /* Check for missing markers, count total space needed,
+   * compute offset of each marker's part of the data.
+   */
+
+  total_length = 0;
+  for (seq_no = 1; seq_no <= num_markers; seq_no++) {
+    if (marker_present[seq_no] == 0) {
+      WARNMS(cinfo, JWRN_BOGUS_ICC);  /* missing sequence number */
+      return FALSE;
+    }
+    data_offset[seq_no] = total_length;
+    total_length += data_length[seq_no];
+  }
+
+  if (total_length == 0) {
+    WARNMS(cinfo, JWRN_BOGUS_ICC);  /* found only empty markers? */
+    return FALSE;
+  }
+
+  /* Allocate space for assembled data */
+  icc_data = (JOCTET *)malloc(total_length * sizeof(JOCTET));
+  if (icc_data == NULL)
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 11);  /* oops, out of memory */
+
+  /* and fill it in */
+  for (marker = cinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (marker_is_icc(marker)) {
+      JOCTET FAR *src_ptr;
+      JOCTET *dst_ptr;
+      unsigned int length;
+      seq_no = marker->data[12];
+      dst_ptr = icc_data + data_offset[seq_no];
+      src_ptr = marker->data + ICC_OVERHEAD_LEN;
+      length = data_length[seq_no];
+      while (length--) {
+        *dst_ptr++ = *src_ptr++;
+      }
+    }
+  }
+
+  *icc_data_ptr = icc_data;
+  *icc_data_len = total_length;
+
+  return TRUE;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdinput.c b/thirdparty/libjpeg-turbo/src/jdinput.c
new file mode 100644
index 00000000000..33e3a820fd2
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdinput.c
@@ -0,0 +1,424 @@
+/*
+ * jdinput.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2016, 2018, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2015, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains input control logic for the JPEG decompressor.
+ * These routines are concerned with controlling the decompressor's input
+ * processing (marker reading and coefficient/difference decoding).
+ * The actual input reading is done in jdmarker.c, jdhuff.c, jdphuff.c,
+ * and jdlhuff.c.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_input_controller pub; /* public fields */
+
+  boolean inheaders;            /* TRUE until first SOS is reached */
+} my_input_controller;
+
+typedef my_input_controller *my_inputctl_ptr;
+
+
+/* Forward declarations */
+METHODDEF(int) consume_markers(j_decompress_ptr cinfo);
+
+
+/*
+ * Routines to calculate various quantities related to the size of the image.
+ */
+
+LOCAL(void)
+initial_setup(j_decompress_ptr cinfo)
+/* Called once, when first SOS marker is reached */
+{
+  int ci;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  /* Make sure image isn't bigger than I can handle */
+  if ((long)cinfo->image_height > (long)JPEG_MAX_DIMENSION ||
+      (long)cinfo->image_width > (long)JPEG_MAX_DIMENSION)
+    ERREXIT1(cinfo, JERR_IMAGE_TOO_BIG, (unsigned int)JPEG_MAX_DIMENSION);
+
+  /* Lossy JPEG images must have 8 or 12 bits per sample.  Lossless JPEG images
+   * can have 2 to 16 bits per sample.
+   */
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+    if (cinfo->data_precision < 2 || cinfo->data_precision > 16)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != 8 && cinfo->data_precision != 12)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  /* Check that number of components won't exceed internal array sizes */
+  if (cinfo->num_components > MAX_COMPONENTS)
+    ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->num_components,
+             MAX_COMPONENTS);
+
+  /* Compute maximum sampling factors; check factor validity */
+  cinfo->max_h_samp_factor = 1;
+  cinfo->max_v_samp_factor = 1;
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    if (compptr->h_samp_factor <= 0 ||
+        compptr->h_samp_factor > MAX_SAMP_FACTOR ||
+        compptr->v_samp_factor <= 0 ||
+        compptr->v_samp_factor > MAX_SAMP_FACTOR)
+      ERREXIT(cinfo, JERR_BAD_SAMPLING);
+    cinfo->max_h_samp_factor = MAX(cinfo->max_h_samp_factor,
+                                   compptr->h_samp_factor);
+    cinfo->max_v_samp_factor = MAX(cinfo->max_v_samp_factor,
+                                   compptr->v_samp_factor);
+  }
+
+#if JPEG_LIB_VERSION >= 80
+  cinfo->block_size = data_unit;
+  cinfo->natural_order = jpeg_natural_order;
+  cinfo->lim_Se = DCTSIZE2 - 1;
+#endif
+
+  /* We initialize DCT_scaled_size and min_DCT_scaled_size to DCTSIZE in lossy
+   * mode.  In the full decompressor, this will be overridden by jdmaster.c;
+   * but in the transcoder, jdmaster.c is not used, so we must do it here.
+   */
+#if JPEG_LIB_VERSION >= 70
+  cinfo->min_DCT_h_scaled_size = cinfo->min_DCT_v_scaled_size = data_unit;
+#else
+  cinfo->min_DCT_scaled_size = data_unit;
+#endif
+
+  /* Compute dimensions of components */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+#if JPEG_LIB_VERSION >= 70
+    compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = data_unit;
+#else
+    compptr->DCT_scaled_size = data_unit;
+#endif
+    /* Size in data units */
+    compptr->width_in_blocks = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor,
+                    (long)(cinfo->max_h_samp_factor * data_unit));
+    compptr->height_in_blocks = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor,
+                    (long)(cinfo->max_v_samp_factor * data_unit));
+    /* Set the first and last MCU columns to decompress from multi-scan images.
+     * By default, decompress all of the MCU columns.
+     */
+    cinfo->master->first_MCU_col[ci] = 0;
+    cinfo->master->last_MCU_col[ci] = compptr->width_in_blocks - 1;
+    /* downsampled_width and downsampled_height will also be overridden by
+     * jdmaster.c if we are doing full decompression.  The transcoder library
+     * doesn't use these values, but the calling application might.
+     */
+    /* Size in samples */
+    compptr->downsampled_width = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_width * (long)compptr->h_samp_factor,
+                    (long)cinfo->max_h_samp_factor);
+    compptr->downsampled_height = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_height * (long)compptr->v_samp_factor,
+                    (long)cinfo->max_v_samp_factor);
+    /* Mark component needed, until color conversion says otherwise */
+    compptr->component_needed = TRUE;
+    /* Mark no quantization table yet saved for component */
+    compptr->quant_table = NULL;
+  }
+
+  /* Compute number of fully interleaved MCU rows. */
+  cinfo->total_iMCU_rows = (JDIMENSION)
+    jdiv_round_up((long)cinfo->image_height,
+                  (long)(cinfo->max_v_samp_factor * data_unit));
+
+  /* Decide whether file contains multiple scans */
+  if (cinfo->comps_in_scan < cinfo->num_components || cinfo->progressive_mode)
+    cinfo->inputctl->has_multiple_scans = TRUE;
+  else
+    cinfo->inputctl->has_multiple_scans = FALSE;
+}
+
+
+LOCAL(void)
+per_scan_setup(j_decompress_ptr cinfo)
+/* Do computations that are needed before processing a JPEG scan */
+/* cinfo->comps_in_scan and cinfo->cur_comp_info[] were set from SOS marker */
+{
+  int ci, mcublks, tmp;
+  jpeg_component_info *compptr;
+  int data_unit = cinfo->master->lossless ? 1 : DCTSIZE;
+
+  if (cinfo->comps_in_scan == 1) {
+
+    /* Noninterleaved (single-component) scan */
+    compptr = cinfo->cur_comp_info[0];
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = compptr->width_in_blocks;
+    cinfo->MCU_rows_in_scan = compptr->height_in_blocks;
+
+    /* For noninterleaved scan, always one data unit per MCU */
+    compptr->MCU_width = 1;
+    compptr->MCU_height = 1;
+    compptr->MCU_blocks = 1;
+    compptr->MCU_sample_width = compptr->_DCT_scaled_size;
+    compptr->last_col_width = 1;
+    /* For noninterleaved scans, it is convenient to define last_row_height
+     * as the number of data unit rows present in the last iMCU row.
+     */
+    tmp = (int)(compptr->height_in_blocks % compptr->v_samp_factor);
+    if (tmp == 0) tmp = compptr->v_samp_factor;
+    compptr->last_row_height = tmp;
+
+    /* Prepare array describing MCU composition */
+    cinfo->blocks_in_MCU = 1;
+    cinfo->MCU_membership[0] = 0;
+
+  } else {
+
+    /* Interleaved (multi-component) scan */
+    if (cinfo->comps_in_scan <= 0 || cinfo->comps_in_scan > MAX_COMPS_IN_SCAN)
+      ERREXIT2(cinfo, JERR_COMPONENT_COUNT, cinfo->comps_in_scan,
+               MAX_COMPS_IN_SCAN);
+
+    /* Overall image size in MCUs */
+    cinfo->MCUs_per_row = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_width,
+                    (long)(cinfo->max_h_samp_factor * data_unit));
+    cinfo->MCU_rows_in_scan = (JDIMENSION)
+      jdiv_round_up((long)cinfo->image_height,
+                    (long)(cinfo->max_v_samp_factor * data_unit));
+
+    cinfo->blocks_in_MCU = 0;
+
+    for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+      compptr = cinfo->cur_comp_info[ci];
+      /* Sampling factors give # of data units of component in each MCU */
+      compptr->MCU_width = compptr->h_samp_factor;
+      compptr->MCU_height = compptr->v_samp_factor;
+      compptr->MCU_blocks = compptr->MCU_width * compptr->MCU_height;
+      compptr->MCU_sample_width = compptr->MCU_width *
+                                  compptr->_DCT_scaled_size;
+      /* Figure number of non-dummy data units in last MCU column & row */
+      tmp = (int)(compptr->width_in_blocks % compptr->MCU_width);
+      if (tmp == 0) tmp = compptr->MCU_width;
+      compptr->last_col_width = tmp;
+      tmp = (int)(compptr->height_in_blocks % compptr->MCU_height);
+      if (tmp == 0) tmp = compptr->MCU_height;
+      compptr->last_row_height = tmp;
+      /* Prepare array describing MCU composition */
+      mcublks = compptr->MCU_blocks;
+      if (cinfo->blocks_in_MCU + mcublks > D_MAX_BLOCKS_IN_MCU)
+        ERREXIT(cinfo, JERR_BAD_MCU_SIZE);
+      while (mcublks-- > 0) {
+        cinfo->MCU_membership[cinfo->blocks_in_MCU++] = ci;
+      }
+    }
+
+  }
+}
+
+
+/*
+ * Save away a copy of the Q-table referenced by each component present
+ * in the current scan, unless already saved during a prior scan.
+ *
+ * In a multiple-scan JPEG file, the encoder could assign different components
+ * the same Q-table slot number, but change table definitions between scans
+ * so that each component uses a different Q-table.  (The IJG encoder is not
+ * currently capable of doing this, but other encoders might.)  Since we want
+ * to be able to dequantize all the components at the end of the file, this
+ * means that we have to save away the table actually used for each component.
+ * We do this by copying the table at the start of the first scan containing
+ * the component.
+ * Rec. ITU-T T.81 | ISO/IEC 10918-1 prohibits the encoder from changing the
+ * contents of a Q-table slot between scans of a component using that slot.  If
+ * the encoder does so anyway, this decoder will simply use the Q-table values
+ * that were current at the start of the first scan for the component.
+ *
+ * The decompressor output side looks only at the saved quant tables,
+ * not at the current Q-table slots.
+ */
+
+LOCAL(void)
+latch_quant_tables(j_decompress_ptr cinfo)
+{
+  int ci, qtblno;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtbl;
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* No work if we already saved Q-table for this component */
+    if (compptr->quant_table != NULL)
+      continue;
+    /* Make sure specified quantization table is present */
+    qtblno = compptr->quant_tbl_no;
+    if (qtblno < 0 || qtblno >= NUM_QUANT_TBLS ||
+        cinfo->quant_tbl_ptrs[qtblno] == NULL)
+      ERREXIT1(cinfo, JERR_NO_QUANT_TABLE, qtblno);
+    /* OK, save away the quantization table */
+    qtbl = (JQUANT_TBL *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(JQUANT_TBL));
+    memcpy(qtbl, cinfo->quant_tbl_ptrs[qtblno], sizeof(JQUANT_TBL));
+    compptr->quant_table = qtbl;
+  }
+}
+
+
+/*
+ * Initialize the input modules to read a scan of compressed data.
+ * The first call to this is done by jdmaster.c after initializing
+ * the entire decompressor (during jpeg_start_decompress).
+ * Subsequent calls come from consume_markers, below.
+ */
+
+METHODDEF(void)
+start_input_pass(j_decompress_ptr cinfo)
+{
+  per_scan_setup(cinfo);
+  if (!cinfo->master->lossless)
+    latch_quant_tables(cinfo);
+  (*cinfo->entropy->start_pass) (cinfo);
+  (*cinfo->coef->start_input_pass) (cinfo);
+  cinfo->inputctl->consume_input = cinfo->coef->consume_data;
+}
+
+
+/*
+ * Finish up after inputting a compressed-data scan.
+ * This is called by the coefficient or difference controller after it's read
+ * all the expected data of the scan.
+ */
+
+METHODDEF(void)
+finish_input_pass(j_decompress_ptr cinfo)
+{
+  cinfo->inputctl->consume_input = consume_markers;
+}
+
+
+/*
+ * Read JPEG markers before, between, or after compressed-data scans.
+ * Change state as necessary when a new scan is reached.
+ * Return value is JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ *
+ * The consume_input method pointer points either here or to the
+ * coefficient or difference controller's consume_data routine, depending on
+ * whether we are reading a compressed data segment or inter-segment markers.
+ */
+
+METHODDEF(int)
+consume_markers(j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr)cinfo->inputctl;
+  int val;
+
+  if (inputctl->pub.eoi_reached) /* After hitting EOI, read no further */
+    return JPEG_REACHED_EOI;
+
+  val = (*cinfo->marker->read_markers) (cinfo);
+
+  switch (val) {
+  case JPEG_REACHED_SOS:        /* Found SOS */
+    if (inputctl->inheaders) {  /* 1st SOS */
+      initial_setup(cinfo);
+      inputctl->inheaders = FALSE;
+      /* Note: start_input_pass must be called by jdmaster.c
+       * before any more input can be consumed.  jdapimin.c is
+       * responsible for enforcing this sequencing.
+       */
+    } else {                    /* 2nd or later SOS marker */
+      if (!inputctl->pub.has_multiple_scans)
+        ERREXIT(cinfo, JERR_EOI_EXPECTED); /* Oops, I wasn't expecting this! */
+      start_input_pass(cinfo);
+    }
+    break;
+  case JPEG_REACHED_EOI:        /* Found EOI */
+    inputctl->pub.eoi_reached = TRUE;
+    if (inputctl->inheaders) {  /* Tables-only datastream, apparently */
+      if (cinfo->marker->saw_SOF)
+        ERREXIT(cinfo, JERR_SOF_NO_SOS);
+    } else {
+      /* Prevent infinite loop in coef ctlr's decompress_data routine
+       * if user set output_scan_number larger than number of scans.
+       */
+      if (cinfo->output_scan_number > cinfo->input_scan_number)
+        cinfo->output_scan_number = cinfo->input_scan_number;
+    }
+    break;
+  case JPEG_SUSPENDED:
+    break;
+  }
+
+  return val;
+}
+
+
+/*
+ * Reset state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_input_controller(j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl = (my_inputctl_ptr)cinfo->inputctl;
+
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+  /* Reset other modules */
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  (*cinfo->marker->reset_marker_reader) (cinfo);
+  /* Reset progression state -- would be cleaner if entropy decoder did this */
+  cinfo->coef_bits = NULL;
+}
+
+
+/*
+ * Initialize the input controller module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_input_controller(j_decompress_ptr cinfo)
+{
+  my_inputctl_ptr inputctl;
+
+  /* Create subobject in permanent pool */
+  inputctl = (my_inputctl_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                sizeof(my_input_controller));
+  cinfo->inputctl = (struct jpeg_input_controller *)inputctl;
+  /* Initialize method pointers */
+  inputctl->pub.consume_input = consume_markers;
+  inputctl->pub.reset_input_controller = reset_input_controller;
+  inputctl->pub.start_input_pass = start_input_pass;
+  inputctl->pub.finish_input_pass = finish_input_pass;
+  /* Initialize state: can't use reset_input_controller since we don't
+   * want to try to reset other modules yet.
+   */
+  inputctl->pub.has_multiple_scans = FALSE; /* "unknown" would be better */
+  inputctl->pub.eoi_reached = FALSE;
+  inputctl->inheaders = TRUE;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdmainct.c b/thirdparty/libjpeg-turbo/src/jdmainct.c
new file mode 100644
index 00000000000..fed1866ee98
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmainct.c
@@ -0,0 +1,482 @@
+/*
+ * jdmainct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2016, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the main buffer controller for decompression.
+ * The main buffer lies between the JPEG decompressor proper and the
+ * post-processor; it holds downsampled data in the JPEG colorspace.
+ *
+ * Note that this code is bypassed in raw-data mode, since the application
+ * supplies the equivalent of the main buffer in that case.
+ */
+
+#include "jinclude.h"
+#include "jdmainct.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/*
+ * In the current system design, the main buffer need never be a full-image
+ * buffer; any full-height buffers will be found inside the coefficient,
+ * difference, or postprocessing controllers.  Nonetheless, the main controller
+ * is not trivial.  Its responsibility is to provide context rows for
+ * upsampling/rescaling, and doing this in an efficient fashion is a bit
+ * tricky.
+ *
+ * Postprocessor input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  (We require DCT_scaled_size values to be
+ * chosen such that these numbers are integers.  In practice DCT_scaled_size
+ * values will likely be powers of two, so we actually have the stronger
+ * condition that DCT_scaled_size / min_DCT_scaled_size is an integer.)
+ * Upsampling will typically produce max_v_samp_factor pixel rows from each
+ * row group (times any additional scale factor that the upsampler is
+ * applying).
+ *
+ * The coefficient or difference controller will deliver data to us one iMCU
+ * row at a time; each iMCU row contains v_samp_factor * DCT_scaled_size sample
+ * rows, or exactly min_DCT_scaled_size row groups.  (This amount of data
+ * corresponds to one row of MCUs when the image is fully interleaved.)  Note
+ * that the number of sample rows varies across components, but the number of
+ * row groups does not.  Some garbage sample rows may be included in the last
+ * iMCU row at the bottom of the image.
+ *
+ * Depending on the vertical scaling algorithm used, the upsampler may need
+ * access to the sample row(s) above and below its current input row group.
+ * The upsampler is required to set need_context_rows TRUE at global selection
+ * time if so.  When need_context_rows is FALSE, this controller can simply
+ * obtain one iMCU row at a time from the coefficient or difference controller
+ * and dole it out as row groups to the postprocessor.
+ *
+ * When need_context_rows is TRUE, this controller guarantees that the buffer
+ * passed to postprocessing contains at least one row group's worth of samples
+ * above and below the row group(s) being processed.  Note that the context
+ * rows "above" the first passed row group appear at negative row offsets in
+ * the passed buffer.  At the top and bottom of the image, the required
+ * context rows are manufactured by duplicating the first or last real sample
+ * row; this avoids having special cases in the upsampling inner loops.
+ *
+ * The amount of context is fixed at one row group just because that's a
+ * convenient number for this controller to work with.  The existing
+ * upsamplers really only need one sample row of context.  An upsampler
+ * supporting arbitrary output rescaling might wish for more than one row
+ * group of context when shrinking the image; tough, we don't handle that.
+ * (This is justified by the assumption that downsizing will be handled mostly
+ * by adjusting the DCT_scaled_size values, so that the actual scale factor at
+ * the upsample step needn't be much less than one.)
+ *
+ * To provide the desired context, we have to retain the last two row groups
+ * of one iMCU row while reading in the next iMCU row.  (The last row group
+ * can't be processed until we have another row group for its below-context,
+ * and so we have to save the next-to-last group too for its above-context.)
+ * We could do this most simply by copying data around in our buffer, but
+ * that'd be very slow.  We can avoid copying any data by creating a rather
+ * strange pointer structure.  Here's how it works.  We allocate a workspace
+ * consisting of M+2 row groups (where M = min_DCT_scaled_size is the number
+ * of row groups per iMCU row).  We create two sets of redundant pointers to
+ * the workspace.  Labeling the physical row groups 0 to M+1, the synthesized
+ * pointer lists look like this:
+ *                   M+1                          M-1
+ * master pointer --> 0         master pointer --> 0
+ *                    1                            1
+ *                   ...                          ...
+ *                   M-3                          M-3
+ *                   M-2                           M
+ *                   M-1                          M+1
+ *                    M                           M-2
+ *                   M+1                          M-1
+ *                    0                            0
+ * We read alternate iMCU rows using each master pointer; thus the last two
+ * row groups of the previous iMCU row remain un-overwritten in the workspace.
+ * The pointer lists are set up so that the required context rows appear to
+ * be adjacent to the proper places when we pass the pointer lists to the
+ * upsampler.
+ *
+ * The above pictures describe the normal state of the pointer lists.
+ * At top and bottom of the image, we diddle the pointer lists to duplicate
+ * the first or last sample row as necessary (this is cheaper than copying
+ * sample rows around).
+ *
+ * This scheme breaks down if M < 2, ie, min_DCT_scaled_size is 1.  In that
+ * situation each iMCU row provides only one row group so the buffering logic
+ * must be different (eg, we must read two iMCU rows before we can emit the
+ * first row group).  For now, we simply do not support providing context
+ * rows when min_DCT_scaled_size is 1.  That combination seems unlikely to
+ * be worth providing --- if someone wants a 1/8th-size preview, they probably
+ * want it quick and dirty, so a context-free upsampler is sufficient.
+ */
+
+
+/* Forward declarations */
+METHODDEF(void) process_data_simple_main(j_decompress_ptr cinfo,
+                                         _JSAMPARRAY output_buf,
+                                         JDIMENSION *out_row_ctr,
+                                         JDIMENSION out_rows_avail);
+METHODDEF(void) process_data_context_main(j_decompress_ptr cinfo,
+                                          _JSAMPARRAY output_buf,
+                                          JDIMENSION *out_row_ctr,
+                                          JDIMENSION out_rows_avail);
+#ifdef QUANT_2PASS_SUPPORTED
+METHODDEF(void) process_data_crank_post(j_decompress_ptr cinfo,
+                                        _JSAMPARRAY output_buf,
+                                        JDIMENSION *out_row_ctr,
+                                        JDIMENSION out_rows_avail);
+#endif
+
+
+LOCAL(void)
+alloc_funny_pointers(j_decompress_ptr cinfo)
+/* Allocate space for the funny pointer lists.
+ * This is done only once, not once per pass.
+ */
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  int ci, rgroup;
+  int M = cinfo->_min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY xbuf;
+
+  /* Get top-level space for component array pointers.
+   * We alloc both arrays with one call to save a few cycles.
+   */
+  main_ptr->xbuffer[0] = (_JSAMPIMAGE)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                cinfo->num_components * 2 *
+                                sizeof(_JSAMPARRAY));
+  main_ptr->xbuffer[1] = main_ptr->xbuffer[0] + cinfo->num_components;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    /* Get space for pointer lists --- M+4 row groups in each list.
+     * We alloc both pointer lists with one call to save a few cycles.
+     */
+    xbuf = (_JSAMPARRAY)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  2 * (rgroup * (M + 4)) * sizeof(_JSAMPROW));
+    xbuf += rgroup;             /* want one row group at negative offsets */
+    main_ptr->xbuffer[0][ci] = xbuf;
+    xbuf += rgroup * (M + 4);
+    main_ptr->xbuffer[1][ci] = xbuf;
+  }
+}
+
+
+LOCAL(void)
+make_funny_pointers(j_decompress_ptr cinfo)
+/* Create the funny pointer lists discussed in the comments above.
+ * The actual workspace is already allocated (in main_ptr->buffer),
+ * and the space for the pointer lists is allocated too.
+ * This routine just fills in the curiously ordered lists.
+ * This will be repeated at the beginning of each pass.
+ */
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->_min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY buf, xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
+    /* First copy the workspace pointers as-is */
+    buf = main_ptr->buffer[ci];
+    for (i = 0; i < rgroup * (M + 2); i++) {
+      xbuf0[i] = xbuf1[i] = buf[i];
+    }
+    /* In the second list, put the last four row groups in swapped order */
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf1[rgroup * (M - 2) + i] = buf[rgroup * M + i];
+      xbuf1[rgroup * M + i] = buf[rgroup * (M - 2) + i];
+    }
+    /* The wraparound pointers at top and bottom will be filled later
+     * (see set_wraparound_pointers, below).  Initially we want the "above"
+     * pointers to duplicate the first actual data line.  This only needs
+     * to happen in xbuffer[0].
+     */
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[0];
+    }
+  }
+}
+
+
+LOCAL(void)
+set_bottom_pointers(j_decompress_ptr cinfo)
+/* Change the pointer lists to duplicate the last sample row at the bottom
+ * of the image.  whichptr indicates which xbuffer holds the final iMCU row.
+ * Also sets rowgroups_avail to indicate number of nondummy row groups in row.
+ */
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  int ci, i, rgroup, iMCUheight, rows_left;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY xbuf;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Count sample rows in one iMCU row and in one row group */
+    iMCUheight = compptr->v_samp_factor * compptr->_DCT_scaled_size;
+    rgroup = iMCUheight / cinfo->_min_DCT_scaled_size;
+    /* Count nondummy sample rows remaining for this component */
+    rows_left = (int)(compptr->downsampled_height % (JDIMENSION)iMCUheight);
+    if (rows_left == 0) rows_left = iMCUheight;
+    /* Count nondummy row groups.  Should get same answer for each component,
+     * so we need only do it once.
+     */
+    if (ci == 0) {
+      main_ptr->rowgroups_avail = (JDIMENSION)((rows_left - 1) / rgroup + 1);
+    }
+    /* Duplicate the last real sample row rgroup*2 times; this pads out the
+     * last partial rowgroup and ensures at least one full rowgroup of context.
+     */
+    xbuf = main_ptr->xbuffer[main_ptr->whichptr][ci];
+    for (i = 0; i < rgroup * 2; i++) {
+      xbuf[rows_left + i] = xbuf[rows_left - 1];
+    }
+  }
+}
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_main(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+    if (cinfo->upsample->need_context_rows) {
+      main_ptr->pub._process_data = process_data_context_main;
+      make_funny_pointers(cinfo); /* Create the xbuffer[] lists */
+      main_ptr->whichptr = 0;   /* Read first iMCU row into xbuffer[0] */
+      main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
+      main_ptr->iMCU_row_ctr = 0;
+    } else {
+      /* Simple case with no context needed */
+      main_ptr->pub._process_data = process_data_simple_main;
+    }
+    main_ptr->buffer_full = FALSE;      /* Mark buffer empty */
+    main_ptr->rowgroup_ctr = 0;
+    break;
+#ifdef QUANT_2PASS_SUPPORTED
+  case JBUF_CRANK_DEST:
+    /* For last pass of 2-pass quantization, just crank the postprocessor */
+    main_ptr->pub._process_data = process_data_crank_post;
+    break;
+#endif
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the simple case where no context is required.
+ */
+
+METHODDEF(void)
+process_data_simple_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
+                         JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  JDIMENSION rowgroups_avail;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (!main_ptr->buffer_full) {
+    if (!(*cinfo->coef->_decompress_data) (cinfo, main_ptr->buffer))
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
+  }
+
+  /* There are always min_DCT_scaled_size row groups in an iMCU row. */
+  rowgroups_avail = (JDIMENSION)cinfo->_min_DCT_scaled_size;
+  /* Note: at the bottom of the image, we may pass extra garbage row groups
+   * to the postprocessor.  The postprocessor has to check for bottom
+   * of image anyway (at row resolution), so no point in us doing it too.
+   */
+
+  /* Feed the postprocessor */
+  (*cinfo->post->_post_process_data) (cinfo, main_ptr->buffer,
+                                      &main_ptr->rowgroup_ctr, rowgroups_avail,
+                                      output_buf, out_row_ctr, out_rows_avail);
+
+  /* Has postprocessor consumed all the data yet? If so, mark buffer empty */
+  if (main_ptr->rowgroup_ctr >= rowgroups_avail) {
+    main_ptr->buffer_full = FALSE;
+    main_ptr->rowgroup_ctr = 0;
+  }
+}
+
+
+/*
+ * Process some data.
+ * This handles the case where context rows must be provided.
+ */
+
+METHODDEF(void)
+process_data_context_main(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
+                          JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+
+  /* Read input data if we haven't filled the main buffer yet */
+  if (!main_ptr->buffer_full) {
+    if (!(*cinfo->coef->_decompress_data) (cinfo,
+                                           main_ptr->xbuffer[main_ptr->whichptr]))
+      return;                   /* suspension forced, can do nothing more */
+    main_ptr->buffer_full = TRUE;       /* OK, we have an iMCU row to work with */
+    main_ptr->iMCU_row_ctr++;   /* count rows received */
+  }
+
+  /* Postprocessor typically will not swallow all the input data it is handed
+   * in one call (due to filling the output buffer first).  Must be prepared
+   * to exit and restart.  This switch lets us keep track of how far we got.
+   * Note that each case falls through to the next on successful completion.
+   */
+  switch (main_ptr->context_state) {
+  case CTX_POSTPONED_ROW:
+    /* Call postprocessor using previously set pointers for postponed row */
+    (*cinfo->post->_post_process_data) (cinfo,
+                                        main_ptr->xbuffer[main_ptr->whichptr],
+                                        &main_ptr->rowgroup_ctr,
+                                        main_ptr->rowgroups_avail, output_buf,
+                                        out_row_ctr, out_rows_avail);
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
+      return;                   /* Need to suspend */
+    main_ptr->context_state = CTX_PREPARE_FOR_IMCU;
+    if (*out_row_ctr >= out_rows_avail)
+      return;                   /* Postprocessor exactly filled output buf */
+    FALLTHROUGH                 /*FALLTHROUGH*/
+  case CTX_PREPARE_FOR_IMCU:
+    /* Prepare to process first M-1 row groups of this iMCU row */
+    main_ptr->rowgroup_ctr = 0;
+    main_ptr->rowgroups_avail = (JDIMENSION)(cinfo->_min_DCT_scaled_size - 1);
+    /* Check for bottom of image: if so, tweak pointers to "duplicate"
+     * the last sample row, and adjust rowgroups_avail to ignore padding rows.
+     */
+    if (main_ptr->iMCU_row_ctr == cinfo->total_iMCU_rows)
+      set_bottom_pointers(cinfo);
+    main_ptr->context_state = CTX_PROCESS_IMCU;
+    FALLTHROUGH                 /*FALLTHROUGH*/
+  case CTX_PROCESS_IMCU:
+    /* Call postprocessor using previously set pointers */
+    (*cinfo->post->_post_process_data) (cinfo,
+                                        main_ptr->xbuffer[main_ptr->whichptr],
+                                        &main_ptr->rowgroup_ctr,
+                                        main_ptr->rowgroups_avail, output_buf,
+                                        out_row_ctr, out_rows_avail);
+    if (main_ptr->rowgroup_ctr < main_ptr->rowgroups_avail)
+      return;                   /* Need to suspend */
+    /* After the first iMCU, change wraparound pointers to normal state */
+    if (main_ptr->iMCU_row_ctr == 1)
+      set_wraparound_pointers(cinfo);
+    /* Prepare to load new iMCU row using other xbuffer list */
+    main_ptr->whichptr ^= 1;    /* 0=>1 or 1=>0 */
+    main_ptr->buffer_full = FALSE;
+    /* Still need to process last row group of this iMCU row, */
+    /* which is saved at index M+1 of the other xbuffer */
+    main_ptr->rowgroup_ctr = (JDIMENSION)(cinfo->_min_DCT_scaled_size + 1);
+    main_ptr->rowgroups_avail = (JDIMENSION)(cinfo->_min_DCT_scaled_size + 2);
+    main_ptr->context_state = CTX_POSTPONED_ROW;
+  }
+}
+
+
+/*
+ * Process some data.
+ * Final pass of two-pass quantization: just call the postprocessor.
+ * Source data will be the postprocessor controller's internal buffer.
+ */
+
+#ifdef QUANT_2PASS_SUPPORTED
+
+METHODDEF(void)
+process_data_crank_post(j_decompress_ptr cinfo, _JSAMPARRAY output_buf,
+                        JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  (*cinfo->post->_post_process_data) (cinfo, (_JSAMPIMAGE)NULL,
+                                      (JDIMENSION *)NULL, (JDIMENSION)0,
+                                      output_buf, out_row_ctr, out_rows_avail);
+}
+
+#endif /* QUANT_2PASS_SUPPORTED */
+
+
+/*
+ * Initialize main buffer controller.
+ */
+
+GLOBAL(void)
+_jinit_d_main_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_main_ptr main_ptr;
+  int ci, rgroup, ngroups;
+  jpeg_component_info *compptr;
+
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  main_ptr = (my_main_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_main_controller));
+  cinfo->main = (struct jpeg_d_main_controller *)main_ptr;
+  main_ptr->pub.start_pass = start_pass_main;
+
+  if (need_full_buffer)         /* shouldn't happen */
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+
+  /* Allocate the workspace.
+   * ngroups is the number of row groups we need.
+   */
+  if (cinfo->upsample->need_context_rows) {
+    if (cinfo->_min_DCT_scaled_size < 2) /* unsupported, see comments above */
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    alloc_funny_pointers(cinfo); /* Alloc space for xbuffer[] lists */
+    ngroups = cinfo->_min_DCT_scaled_size + 2;
+  } else {
+    ngroups = cinfo->_min_DCT_scaled_size;
+  }
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    main_ptr->buffer[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+                        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                         compptr->width_in_blocks * compptr->_DCT_scaled_size,
+                         (JDIMENSION)(rgroup * ngroups));
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdmainct.h b/thirdparty/libjpeg-turbo/src/jdmainct.h
new file mode 100644
index 00000000000..914ad11f694
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmainct.h
@@ -0,0 +1,78 @@
+/*
+ * jdmainct.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_main_controller pub; /* public fields */
+
+  /* Pointer to allocated workspace (M or M+2 row groups). */
+  _JSAMPARRAY buffer[MAX_COMPONENTS];
+
+  boolean buffer_full;          /* Have we gotten an iMCU row from decoder? */
+  JDIMENSION rowgroup_ctr;      /* counts row groups output to postprocessor */
+
+  /* Remaining fields are only used in the context case. */
+
+  /* These are the master pointers to the funny-order pointer lists. */
+  _JSAMPIMAGE xbuffer[2];       /* pointers to weird pointer lists */
+
+  int whichptr;                 /* indicates which pointer set is now in use */
+  int context_state;            /* process_data state machine status */
+  JDIMENSION rowgroups_avail;   /* row groups available to postprocessor */
+  JDIMENSION iMCU_row_ctr;      /* counts iMCU rows to detect image top/bot */
+} my_main_controller;
+
+typedef my_main_controller *my_main_ptr;
+
+
+/* context_state values: */
+#define CTX_PREPARE_FOR_IMCU    0       /* need to prepare for MCU row */
+#define CTX_PROCESS_IMCU        1       /* feeding iMCU to postprocessor */
+#define CTX_POSTPONED_ROW       2       /* feeding postponed row group */
+
+
+LOCAL(void)
+set_wraparound_pointers(j_decompress_ptr cinfo)
+/* Set up the "wraparound" pointers at top and bottom of the pointer lists.
+ * This changes the pointer list state from top-of-image to the normal state.
+ */
+{
+  my_main_ptr main_ptr = (my_main_ptr)cinfo->main;
+  int ci, i, rgroup;
+  int M = cinfo->_min_DCT_scaled_size;
+  jpeg_component_info *compptr;
+  _JSAMPARRAY xbuf0, xbuf1;
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    rgroup = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+      cinfo->_min_DCT_scaled_size; /* height of a row group of component */
+    xbuf0 = main_ptr->xbuffer[0][ci];
+    xbuf1 = main_ptr->xbuffer[1][ci];
+    for (i = 0; i < rgroup; i++) {
+      xbuf0[i - rgroup] = xbuf0[rgroup * (M + 1) + i];
+      xbuf1[i - rgroup] = xbuf1[rgroup * (M + 1) + i];
+      xbuf0[rgroup * (M + 2) + i] = xbuf0[i];
+      xbuf1[rgroup * (M + 2) + i] = xbuf1[i];
+    }
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdmarker.c b/thirdparty/libjpeg-turbo/src/jdmarker.c
new file mode 100644
index 00000000000..f918ee4db52
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmarker.c
@@ -0,0 +1,1384 @@
+/*
+ * jdmarker.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2012, 2015, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to decode JPEG datastream markers.
+ * Most of the complexity arises from our desire to support input
+ * suspension: if not all of the data for a marker is available,
+ * we must exit back to the application.  On resumption, we reprocess
+ * the marker.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+
+
+typedef enum {                  /* JPEG marker codes */
+  M_SOF0  = 0xc0,
+  M_SOF1  = 0xc1,
+  M_SOF2  = 0xc2,
+  M_SOF3  = 0xc3,
+
+  M_SOF5  = 0xc5,
+  M_SOF6  = 0xc6,
+  M_SOF7  = 0xc7,
+
+  M_JPG   = 0xc8,
+  M_SOF9  = 0xc9,
+  M_SOF10 = 0xca,
+  M_SOF11 = 0xcb,
+
+  M_SOF13 = 0xcd,
+  M_SOF14 = 0xce,
+  M_SOF15 = 0xcf,
+
+  M_DHT   = 0xc4,
+
+  M_DAC   = 0xcc,
+
+  M_RST0  = 0xd0,
+  M_RST1  = 0xd1,
+  M_RST2  = 0xd2,
+  M_RST3  = 0xd3,
+  M_RST4  = 0xd4,
+  M_RST5  = 0xd5,
+  M_RST6  = 0xd6,
+  M_RST7  = 0xd7,
+
+  M_SOI   = 0xd8,
+  M_EOI   = 0xd9,
+  M_SOS   = 0xda,
+  M_DQT   = 0xdb,
+  M_DNL   = 0xdc,
+  M_DRI   = 0xdd,
+  M_DHP   = 0xde,
+  M_EXP   = 0xdf,
+
+  M_APP0  = 0xe0,
+  M_APP1  = 0xe1,
+  M_APP2  = 0xe2,
+  M_APP3  = 0xe3,
+  M_APP4  = 0xe4,
+  M_APP5  = 0xe5,
+  M_APP6  = 0xe6,
+  M_APP7  = 0xe7,
+  M_APP8  = 0xe8,
+  M_APP9  = 0xe9,
+  M_APP10 = 0xea,
+  M_APP11 = 0xeb,
+  M_APP12 = 0xec,
+  M_APP13 = 0xed,
+  M_APP14 = 0xee,
+  M_APP15 = 0xef,
+
+  M_JPG0  = 0xf0,
+  M_JPG13 = 0xfd,
+  M_COM   = 0xfe,
+
+  M_TEM   = 0x01,
+
+  M_ERROR = 0x100
+} JPEG_MARKER;
+
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_marker_reader pub; /* public fields */
+
+  /* Application-overridable marker processing methods */
+  jpeg_marker_parser_method process_COM;
+  jpeg_marker_parser_method process_APPn[16];
+
+  /* Limit on marker data length to save for each marker type */
+  unsigned int length_limit_COM;
+  unsigned int length_limit_APPn[16];
+
+  /* Status of COM/APPn marker saving */
+  jpeg_saved_marker_ptr cur_marker;     /* NULL if not processing a marker */
+  unsigned int bytes_read;              /* data bytes read so far in marker */
+  /* Note: cur_marker is not linked into marker_list until it's all read. */
+} my_marker_reader;
+
+typedef my_marker_reader *my_marker_ptr;
+
+
+/*
+ * Macros for fetching data from the data source module.
+ *
+ * At all times, cinfo->src->next_input_byte and ->bytes_in_buffer reflect
+ * the current restart point; we update them only when we have reached a
+ * suitable place to restart if a suspension occurs.
+ */
+
+/* Declare and initialize local copies of input pointer/count */
+#define INPUT_VARS(cinfo) \
+  struct jpeg_source_mgr *datasrc = (cinfo)->src; \
+  const JOCTET *next_input_byte = datasrc->next_input_byte; \
+  size_t bytes_in_buffer = datasrc->bytes_in_buffer
+
+/* Unload the local copies --- do this only at a restart boundary */
+#define INPUT_SYNC(cinfo) \
+  ( datasrc->next_input_byte = next_input_byte, \
+    datasrc->bytes_in_buffer = bytes_in_buffer )
+
+/* Reload the local copies --- used only in MAKE_BYTE_AVAIL */
+#define INPUT_RELOAD(cinfo) \
+  ( next_input_byte = datasrc->next_input_byte, \
+    bytes_in_buffer = datasrc->bytes_in_buffer )
+
+/* Internal macro for INPUT_BYTE and INPUT_2BYTES: make a byte available.
+ * Note we do *not* do INPUT_SYNC before calling fill_input_buffer,
+ * but we must reload the local copies after a successful fill.
+ */
+#define MAKE_BYTE_AVAIL(cinfo, action) \
+  if (bytes_in_buffer == 0) { \
+    if (!(*datasrc->fill_input_buffer) (cinfo)) \
+      { action; } \
+    INPUT_RELOAD(cinfo); \
+  }
+
+/* Read a byte into variable V.
+ * If must suspend, take the specified action (typically "return FALSE").
+ */
+#define INPUT_BYTE(cinfo, V, action) \
+  MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
+            bytes_in_buffer--; \
+            V = *next_input_byte++; )
+
+/* As above, but read two bytes interpreted as an unsigned 16-bit integer.
+ * V should be declared unsigned int or perhaps JLONG.
+ */
+#define INPUT_2BYTES(cinfo, V, action) \
+  MAKESTMT( MAKE_BYTE_AVAIL(cinfo, action); \
+            bytes_in_buffer--; \
+            V = ((unsigned int)(*next_input_byte++)) << 8; \
+            MAKE_BYTE_AVAIL(cinfo, action); \
+            bytes_in_buffer--; \
+            V += *next_input_byte++; )
+
+
+/*
+ * Routines to process JPEG markers.
+ *
+ * Entry condition: JPEG marker itself has been read and its code saved
+ *   in cinfo->unread_marker; input restart point is just after the marker.
+ *
+ * Exit: if return TRUE, have read and processed any parameters, and have
+ *   updated the restart point to point after the parameters.
+ *   If return FALSE, was forced to suspend before reaching end of
+ *   marker parameters; restart point has not been moved.  Same routine
+ *   will be called again after application supplies more input data.
+ *
+ * This approach to suspension assumes that all of a marker's parameters
+ * can fit into a single input bufferload.  This should hold for "normal"
+ * markers.  Some COM/APPn markers might have large parameter segments
+ * that might not fit.  If we are simply dropping such a marker, we use
+ * skip_input_data to get past it, and thereby put the problem on the
+ * source manager's shoulders.  If we are saving the marker's contents
+ * into memory, we use a slightly different convention: when forced to
+ * suspend, the marker processor updates the restart point to the end of
+ * what it's consumed (ie, the end of the buffer) before returning FALSE.
+ * On resumption, cinfo->unread_marker still contains the marker code,
+ * but the data source will point to the next chunk of marker data.
+ * The marker processor must retain internal state to deal with this.
+ *
+ * Note that we don't bother to avoid duplicate trace messages if a
+ * suspension occurs within marker parameters.  Other side effects
+ * require more care.
+ */
+
+
+LOCAL(boolean)
+get_soi(j_decompress_ptr cinfo)
+/* Process an SOI marker */
+{
+  int i;
+
+  TRACEMS(cinfo, 1, JTRC_SOI);
+
+  if (cinfo->marker->saw_SOI)
+    ERREXIT(cinfo, JERR_SOI_DUPLICATE);
+
+  /* Reset all parameters that are defined to be reset by SOI */
+
+  for (i = 0; i < NUM_ARITH_TBLS; i++) {
+    cinfo->arith_dc_L[i] = 0;
+    cinfo->arith_dc_U[i] = 1;
+    cinfo->arith_ac_K[i] = 5;
+  }
+  cinfo->restart_interval = 0;
+
+  /* Set initial assumptions for colorspace etc */
+
+  cinfo->jpeg_color_space = JCS_UNKNOWN;
+  cinfo->CCIR601_sampling = FALSE; /* Assume non-CCIR sampling??? */
+
+  cinfo->saw_JFIF_marker = FALSE;
+  cinfo->JFIF_major_version = 1; /* set default JFIF APP0 values */
+  cinfo->JFIF_minor_version = 1;
+  cinfo->density_unit = 0;
+  cinfo->X_density = 1;
+  cinfo->Y_density = 1;
+  cinfo->saw_Adobe_marker = FALSE;
+  cinfo->Adobe_transform = 0;
+
+  cinfo->marker->saw_SOI = TRUE;
+
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sof(j_decompress_ptr cinfo, boolean is_prog, boolean is_lossless,
+        boolean is_arith)
+/* Process a SOFn marker */
+{
+  JLONG length;
+  int c, ci;
+  jpeg_component_info *compptr;
+  INPUT_VARS(cinfo);
+
+  if (cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOF_DUPLICATE);
+
+  cinfo->progressive_mode = is_prog;
+  cinfo->master->lossless = is_lossless;
+  cinfo->arith_code = is_arith;
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, cinfo->data_precision, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_height, return FALSE);
+  INPUT_2BYTES(cinfo, cinfo->image_width, return FALSE);
+  INPUT_BYTE(cinfo, cinfo->num_components, return FALSE);
+
+  length -= 8;
+
+  TRACEMS4(cinfo, 1, JTRC_SOF, cinfo->unread_marker,
+           (int)cinfo->image_width, (int)cinfo->image_height,
+           cinfo->num_components);
+
+  /* We don't support files in which the image height is initially specified */
+  /* as 0 and is later redefined by DNL.  As long as we have to check that,  */
+  /* might as well have a general sanity check. */
+  if (cinfo->image_height <= 0 || cinfo->image_width <= 0 ||
+      cinfo->num_components <= 0)
+    ERREXIT(cinfo, JERR_EMPTY_IMAGE);
+
+  if (length != (cinfo->num_components * 3))
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  if (cinfo->comp_info == NULL) /* do only once, even if suspend */
+    cinfo->comp_info = (jpeg_component_info *)(*cinfo->mem->alloc_small)
+                        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                         cinfo->num_components * sizeof(jpeg_component_info));
+
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    compptr->component_index = ci;
+    INPUT_BYTE(cinfo, compptr->component_id, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+    compptr->h_samp_factor = (c >> 4) & 15;
+    compptr->v_samp_factor = (c     ) & 15;
+    INPUT_BYTE(cinfo, compptr->quant_tbl_no, return FALSE);
+
+    TRACEMS4(cinfo, 1, JTRC_SOF_COMPONENT,
+             compptr->component_id, compptr->h_samp_factor,
+             compptr->v_samp_factor, compptr->quant_tbl_no);
+  }
+
+  cinfo->marker->saw_SOF = TRUE;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_sos(j_decompress_ptr cinfo)
+/* Process a SOS marker */
+{
+  JLONG length;
+  int i, ci, n, c, cc, pi;
+  jpeg_component_info *compptr;
+  INPUT_VARS(cinfo);
+
+  if (!cinfo->marker->saw_SOF)
+    ERREXIT(cinfo, JERR_SOS_NO_SOF);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  INPUT_BYTE(cinfo, n, return FALSE); /* Number of components */
+
+  TRACEMS1(cinfo, 1, JTRC_SOS, n);
+
+  if (length != (n * 2 + 6) || n < 1 || n > MAX_COMPS_IN_SCAN)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  cinfo->comps_in_scan = n;
+
+  /* Collect the component-spec parameters */
+
+  for (i = 0; i < MAX_COMPS_IN_SCAN; i++)
+    cinfo->cur_comp_info[i] = NULL;
+
+  for (i = 0; i < n; i++) {
+    INPUT_BYTE(cinfo, cc, return FALSE);
+    INPUT_BYTE(cinfo, c, return FALSE);
+
+    for (ci = 0, compptr = cinfo->comp_info;
+         ci < cinfo->num_components && ci < MAX_COMPS_IN_SCAN;
+         ci++, compptr++) {
+      if (cc == compptr->component_id && !cinfo->cur_comp_info[ci])
+        goto id_found;
+    }
+
+    ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+
+id_found:
+
+    cinfo->cur_comp_info[i] = compptr;
+    compptr->dc_tbl_no = (c >> 4) & 15;
+    compptr->ac_tbl_no = (c     ) & 15;
+
+    TRACEMS3(cinfo, 1, JTRC_SOS_COMPONENT, cc,
+             compptr->dc_tbl_no, compptr->ac_tbl_no);
+
+    /* This CSi (cc) should differ from the previous CSi */
+    for (pi = 0; pi < i; pi++) {
+      if (cinfo->cur_comp_info[pi] == compptr) {
+        ERREXIT1(cinfo, JERR_BAD_COMPONENT_ID, cc);
+      }
+    }
+  }
+
+  /* Collect the additional scan parameters Ss, Se, Ah/Al. */
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ss = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Se = c;
+  INPUT_BYTE(cinfo, c, return FALSE);
+  cinfo->Ah = (c >> 4) & 15;
+  cinfo->Al = (c     ) & 15;
+
+  TRACEMS4(cinfo, 1, JTRC_SOS_PARAMS, cinfo->Ss, cinfo->Se,
+           cinfo->Ah, cinfo->Al);
+
+  /* Prepare to scan data & restart markers */
+  cinfo->marker->next_restart_num = 0;
+
+  /* Count another SOS marker */
+  cinfo->input_scan_number++;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+#ifdef D_ARITH_CODING_SUPPORTED
+
+LOCAL(boolean)
+get_dac(j_decompress_ptr cinfo)
+/* Process a DAC marker */
+{
+  JLONG length;
+  int index, val;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+    INPUT_BYTE(cinfo, val, return FALSE);
+
+    length -= 2;
+
+    TRACEMS2(cinfo, 1, JTRC_DAC, index, val);
+
+    if (index < 0 || index >= (2 * NUM_ARITH_TBLS))
+      ERREXIT1(cinfo, JERR_DAC_INDEX, index);
+
+    if (index >= NUM_ARITH_TBLS) { /* define AC table */
+      cinfo->arith_ac_K[index - NUM_ARITH_TBLS] = (UINT8)val;
+    } else {                    /* define DC table */
+      cinfo->arith_dc_L[index] = (UINT8)(val & 0x0F);
+      cinfo->arith_dc_U[index] = (UINT8)(val >> 4);
+      if (cinfo->arith_dc_L[index] > cinfo->arith_dc_U[index])
+        ERREXIT1(cinfo, JERR_DAC_VALUE, val);
+    }
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+#else /* !D_ARITH_CODING_SUPPORTED */
+
+#define get_dac(cinfo)  skip_variable(cinfo)
+
+#endif /* D_ARITH_CODING_SUPPORTED */
+
+
+LOCAL(boolean)
+get_dht(j_decompress_ptr cinfo)
+/* Process a DHT marker */
+{
+  JLONG length;
+  UINT8 bits[17];
+  UINT8 huffval[256];
+  int i, index, count;
+  JHUFF_TBL **htblptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 16) {
+    INPUT_BYTE(cinfo, index, return FALSE);
+
+    TRACEMS1(cinfo, 1, JTRC_DHT, index);
+
+    bits[0] = 0;
+    count = 0;
+    for (i = 1; i <= 16; i++) {
+      INPUT_BYTE(cinfo, bits[i], return FALSE);
+      count += bits[i];
+    }
+
+    length -= 1 + 16;
+
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+             bits[1], bits[2], bits[3], bits[4],
+             bits[5], bits[6], bits[7], bits[8]);
+    TRACEMS8(cinfo, 2, JTRC_HUFFBITS,
+             bits[9], bits[10], bits[11], bits[12],
+             bits[13], bits[14], bits[15], bits[16]);
+
+    /* Here we just do minimal validation of the counts to avoid walking
+     * off the end of our table space.  jdhuff.c will check more carefully.
+     */
+    if (count > 256 || ((JLONG)count) > length)
+      ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+    for (i = 0; i < count; i++)
+      INPUT_BYTE(cinfo, huffval[i], return FALSE);
+
+    memset(&huffval[count], 0, (256 - count) * sizeof(UINT8));
+
+    length -= count;
+
+    if (index & 0x10) {         /* AC table definition */
+      index -= 0x10;
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+      htblptr = &cinfo->ac_huff_tbl_ptrs[index];
+    } else {                    /* DC table definition */
+      if (index < 0 || index >= NUM_HUFF_TBLS)
+        ERREXIT1(cinfo, JERR_DHT_INDEX, index);
+      htblptr = &cinfo->dc_huff_tbl_ptrs[index];
+    }
+
+    if (*htblptr == NULL)
+      *htblptr = jpeg_alloc_huff_table((j_common_ptr)cinfo);
+
+    memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+    memcpy((*htblptr)->huffval, huffval, sizeof((*htblptr)->huffval));
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dqt(j_decompress_ptr cinfo)
+/* Process a DQT marker */
+{
+  JLONG length;
+  int n, i, prec;
+  unsigned int tmp;
+  JQUANT_TBL *quant_ptr;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  while (length > 0) {
+    INPUT_BYTE(cinfo, n, return FALSE);
+    prec = n >> 4;
+    n &= 0x0F;
+
+    TRACEMS2(cinfo, 1, JTRC_DQT, n, prec);
+
+    if (n >= NUM_QUANT_TBLS)
+      ERREXIT1(cinfo, JERR_DQT_INDEX, n);
+
+    if (cinfo->quant_tbl_ptrs[n] == NULL)
+      cinfo->quant_tbl_ptrs[n] = jpeg_alloc_quant_table((j_common_ptr)cinfo);
+    quant_ptr = cinfo->quant_tbl_ptrs[n];
+
+    for (i = 0; i < DCTSIZE2; i++) {
+      if (prec)
+        INPUT_2BYTES(cinfo, tmp, return FALSE);
+      else
+        INPUT_BYTE(cinfo, tmp, return FALSE);
+      /* We convert the zigzag-order table to natural array order. */
+      quant_ptr->quantval[jpeg_natural_order[i]] = (UINT16)tmp;
+    }
+
+    if (cinfo->err->trace_level >= 2) {
+      for (i = 0; i < DCTSIZE2; i += 8) {
+        TRACEMS8(cinfo, 2, JTRC_QUANTVALS,
+                 quant_ptr->quantval[i],     quant_ptr->quantval[i + 1],
+                 quant_ptr->quantval[i + 2], quant_ptr->quantval[i + 3],
+                 quant_ptr->quantval[i + 4], quant_ptr->quantval[i + 5],
+                 quant_ptr->quantval[i + 6], quant_ptr->quantval[i + 7]);
+      }
+    }
+
+    length -= DCTSIZE2 + 1;
+    if (prec) length -= DCTSIZE2;
+  }
+
+  if (length != 0)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+get_dri(j_decompress_ptr cinfo)
+/* Process a DRI marker */
+{
+  JLONG length;
+  unsigned int tmp;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+
+  if (length != 4)
+    ERREXIT(cinfo, JERR_BAD_LENGTH);
+
+  INPUT_2BYTES(cinfo, tmp, return FALSE);
+
+  TRACEMS1(cinfo, 1, JTRC_DRI, tmp);
+
+  cinfo->restart_interval = tmp;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Routines for processing APPn and COM markers.
+ * These are either saved in memory or discarded, per application request.
+ * APP0 and APP14 are specially checked to see if they are
+ * JFIF and Adobe markers, respectively.
+ */
+
+#define APP0_DATA_LEN   14      /* Length of interesting data in APP0 */
+#define APP14_DATA_LEN  12      /* Length of interesting data in APP14 */
+#define APPN_DATA_LEN   14      /* Must be the largest of the above!! */
+
+
+LOCAL(void)
+examine_app0(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
+             JLONG remaining)
+/* Examine first few bytes from an APP0.
+ * Take appropriate action if it is a JFIF marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  JLONG totallen = (JLONG)datalen + remaining;
+
+  if (datalen >= APP0_DATA_LEN &&
+      data[0] == 0x4A &&
+      data[1] == 0x46 &&
+      data[2] == 0x49 &&
+      data[3] == 0x46 &&
+      data[4] == 0) {
+    /* Found JFIF APP0 marker: save info */
+    cinfo->saw_JFIF_marker = TRUE;
+    cinfo->JFIF_major_version = data[5];
+    cinfo->JFIF_minor_version = data[6];
+    cinfo->density_unit = data[7];
+    cinfo->X_density = (data[8] << 8) + data[9];
+    cinfo->Y_density = (data[10] << 8) + data[11];
+    /* Check version.
+     * Major version must be 1, anything else signals an incompatible change.
+     * (We used to treat this as an error, but now it's a nonfatal warning,
+     * because some bozo at Hijaak couldn't read the spec.)
+     * Minor version should be 0..2, but process anyway if newer.
+     */
+    if (cinfo->JFIF_major_version != 1)
+      WARNMS2(cinfo, JWRN_JFIF_MAJOR,
+              cinfo->JFIF_major_version, cinfo->JFIF_minor_version);
+    /* Generate trace messages */
+    TRACEMS5(cinfo, 1, JTRC_JFIF,
+             cinfo->JFIF_major_version, cinfo->JFIF_minor_version,
+             cinfo->X_density, cinfo->Y_density, cinfo->density_unit);
+    /* Validate thumbnail dimensions and issue appropriate messages */
+    if (data[12] | data[13])
+      TRACEMS2(cinfo, 1, JTRC_JFIF_THUMBNAIL, data[12], data[13]);
+    totallen -= APP0_DATA_LEN;
+    if (totallen != ((JLONG)data[12] * (JLONG)data[13] * (JLONG)3))
+      TRACEMS1(cinfo, 1, JTRC_JFIF_BADTHUMBNAILSIZE, (int)totallen);
+  } else if (datalen >= 6 &&
+             data[0] == 0x4A &&
+             data[1] == 0x46 &&
+             data[2] == 0x58 &&
+             data[3] == 0x58 &&
+             data[4] == 0) {
+    /* Found JFIF "JFXX" extension APP0 marker */
+    /* The library doesn't actually do anything with these,
+     * but we try to produce a helpful trace message.
+     */
+    switch (data[5]) {
+    case 0x10:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_JPEG, (int)totallen);
+      break;
+    case 0x11:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_PALETTE, (int)totallen);
+      break;
+    case 0x13:
+      TRACEMS1(cinfo, 1, JTRC_THUMB_RGB, (int)totallen);
+      break;
+    default:
+      TRACEMS2(cinfo, 1, JTRC_JFIF_EXTENSION, data[5], (int)totallen);
+      break;
+    }
+  } else {
+    /* Start of APP0 does not match "JFIF" or "JFXX", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP0, (int)totallen);
+  }
+}
+
+
+LOCAL(void)
+examine_app14(j_decompress_ptr cinfo, JOCTET *data, unsigned int datalen,
+              JLONG remaining)
+/* Examine first few bytes from an APP14.
+ * Take appropriate action if it is an Adobe marker.
+ * datalen is # of bytes at data[], remaining is length of rest of marker data.
+ */
+{
+  unsigned int version, flags0, flags1, transform;
+
+  if (datalen >= APP14_DATA_LEN &&
+      data[0] == 0x41 &&
+      data[1] == 0x64 &&
+      data[2] == 0x6F &&
+      data[3] == 0x62 &&
+      data[4] == 0x65) {
+    /* Found Adobe APP14 marker */
+    version = (data[5] << 8) + data[6];
+    flags0 = (data[7] << 8) + data[8];
+    flags1 = (data[9] << 8) + data[10];
+    transform = data[11];
+    TRACEMS4(cinfo, 1, JTRC_ADOBE, version, flags0, flags1, transform);
+    cinfo->saw_Adobe_marker = TRUE;
+    cinfo->Adobe_transform = (UINT8)transform;
+  } else {
+    /* Start of APP14 does not match "Adobe", or too short */
+    TRACEMS1(cinfo, 1, JTRC_APP14, (int)(datalen + remaining));
+  }
+}
+
+
+METHODDEF(boolean)
+get_interesting_appn(j_decompress_ptr cinfo)
+/* Process an APP0 or APP14 marker without saving it */
+{
+  JLONG length;
+  JOCTET b[APPN_DATA_LEN];
+  unsigned int i, numtoread;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  /* get the interesting part of the marker data */
+  if (length >= APPN_DATA_LEN)
+    numtoread = APPN_DATA_LEN;
+  else if (length > 0)
+    numtoread = (unsigned int)length;
+  else
+    numtoread = 0;
+  for (i = 0; i < numtoread; i++)
+    INPUT_BYTE(cinfo, b[i], return FALSE);
+  length -= numtoread;
+
+  /* process it */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, (JOCTET *)b, numtoread, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, (JOCTET *)b, numtoread, length);
+    break;
+  default:
+    /* can't get here unless jpeg_save_markers chooses wrong processor */
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long)length);
+
+  return TRUE;
+}
+
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+METHODDEF(boolean)
+save_marker(j_decompress_ptr cinfo)
+/* Save an APPn or COM marker into the marker list */
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+  jpeg_saved_marker_ptr cur_marker = marker->cur_marker;
+  unsigned int bytes_read, data_length;
+  JOCTET *data;
+  JLONG length = 0;
+  INPUT_VARS(cinfo);
+
+  if (cur_marker == NULL) {
+    /* begin reading a marker */
+    INPUT_2BYTES(cinfo, length, return FALSE);
+    length -= 2;
+    if (length >= 0) {          /* watch out for bogus length word */
+      /* figure out how much we want to save */
+      unsigned int limit;
+      if (cinfo->unread_marker == (int)M_COM)
+        limit = marker->length_limit_COM;
+      else
+        limit = marker->length_limit_APPn[cinfo->unread_marker - (int)M_APP0];
+      if ((unsigned int)length < limit)
+        limit = (unsigned int)length;
+      /* allocate and initialize the marker item */
+      cur_marker = (jpeg_saved_marker_ptr)
+        (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                    sizeof(struct jpeg_marker_struct) + limit);
+      cur_marker->next = NULL;
+      cur_marker->marker = (UINT8)cinfo->unread_marker;
+      cur_marker->original_length = (unsigned int)length;
+      cur_marker->data_length = limit;
+      /* data area is just beyond the jpeg_marker_struct */
+      data = cur_marker->data = (JOCTET *)(cur_marker + 1);
+      marker->cur_marker = cur_marker;
+      marker->bytes_read = 0;
+      bytes_read = 0;
+      data_length = limit;
+    } else {
+      /* deal with bogus length word */
+      bytes_read = data_length = 0;
+      data = NULL;
+    }
+  } else {
+    /* resume reading a marker */
+    bytes_read = marker->bytes_read;
+    data_length = cur_marker->data_length;
+    data = cur_marker->data + bytes_read;
+  }
+
+  while (bytes_read < data_length) {
+    INPUT_SYNC(cinfo);          /* move the restart point to here */
+    marker->bytes_read = bytes_read;
+    /* If there's not at least one byte in buffer, suspend */
+    MAKE_BYTE_AVAIL(cinfo, return FALSE);
+    /* Copy bytes with reasonable rapidity */
+    while (bytes_read < data_length && bytes_in_buffer > 0) {
+      *data++ = *next_input_byte++;
+      bytes_in_buffer--;
+      bytes_read++;
+    }
+  }
+
+  /* Done reading what we want to read */
+  if (cur_marker != NULL) {     /* will be NULL if bogus length word */
+    /* Add new marker to end of list */
+    if (cinfo->marker_list == NULL || cinfo->master->marker_list_end == NULL) {
+      cinfo->marker_list = cinfo->master->marker_list_end = cur_marker;
+    } else {
+      cinfo->master->marker_list_end->next = cur_marker;
+      cinfo->master->marker_list_end = cur_marker;
+    }
+    /* Reset pointer & calc remaining data length */
+    data = cur_marker->data;
+    length = cur_marker->original_length - data_length;
+  }
+  /* Reset to initial state for next marker */
+  marker->cur_marker = NULL;
+
+  /* Process the marker if interesting; else just make a generic trace msg */
+  switch (cinfo->unread_marker) {
+  case M_APP0:
+    examine_app0(cinfo, data, data_length, length);
+    break;
+  case M_APP14:
+    examine_app14(cinfo, data, data_length, length);
+    break;
+  default:
+    TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker,
+             (int)(data_length + length));
+    break;
+  }
+
+  /* skip any remaining data -- could be lots */
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long)length);
+
+  return TRUE;
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+METHODDEF(boolean)
+skip_variable(j_decompress_ptr cinfo)
+/* Skip over an unknown or uninteresting variable-length marker */
+{
+  JLONG length;
+  INPUT_VARS(cinfo);
+
+  INPUT_2BYTES(cinfo, length, return FALSE);
+  length -= 2;
+
+  TRACEMS2(cinfo, 1, JTRC_MISC_MARKER, cinfo->unread_marker, (int)length);
+
+  INPUT_SYNC(cinfo);            /* do before skip_input_data */
+  if (length > 0)
+    (*cinfo->src->skip_input_data) (cinfo, (long)length);
+
+  return TRUE;
+}
+
+
+/*
+ * Find the next JPEG marker, save it in cinfo->unread_marker.
+ * Returns FALSE if had to suspend before reaching a marker;
+ * in that case cinfo->unread_marker is unchanged.
+ *
+ * Note that the result might not be a valid marker code,
+ * but it will never be 0 or FF.
+ */
+
+LOCAL(boolean)
+next_marker(j_decompress_ptr cinfo)
+{
+  int c;
+  INPUT_VARS(cinfo);
+
+  for (;;) {
+    INPUT_BYTE(cinfo, c, return FALSE);
+    /* Skip any non-FF bytes.
+     * This may look a bit inefficient, but it will not occur in a valid file.
+     * We sync after each discarded byte so that a suspending data source
+     * can discard the byte from its buffer.
+     */
+    while (c != 0xFF) {
+      cinfo->marker->discarded_bytes++;
+      INPUT_SYNC(cinfo);
+      INPUT_BYTE(cinfo, c, return FALSE);
+    }
+    /* This loop swallows any duplicate FF bytes.  Extra FFs are legal as
+     * pad bytes, so don't count them in discarded_bytes.  We assume there
+     * will not be so many consecutive FF bytes as to overflow a suspending
+     * data source's input buffer.
+     */
+    do {
+      INPUT_BYTE(cinfo, c, return FALSE);
+    } while (c == 0xFF);
+    if (c != 0)
+      break;                    /* found a valid marker, exit loop */
+    /* Reach here if we found a stuffed-zero data sequence (FF/00).
+     * Discard it and loop back to try again.
+     */
+    cinfo->marker->discarded_bytes += 2;
+    INPUT_SYNC(cinfo);
+  }
+
+  if (cinfo->marker->discarded_bytes != 0) {
+    WARNMS2(cinfo, JWRN_EXTRANEOUS_DATA, cinfo->marker->discarded_bytes, c);
+    cinfo->marker->discarded_bytes = 0;
+  }
+
+  cinfo->unread_marker = c;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+LOCAL(boolean)
+first_marker(j_decompress_ptr cinfo)
+/* Like next_marker, but used to obtain the initial SOI marker. */
+/* For this marker, we do not allow preceding garbage or fill; otherwise,
+ * we might well scan an entire input file before realizing it ain't JPEG.
+ * If an application wants to process non-JFIF files, it must seek to the
+ * SOI before calling the JPEG library.
+ */
+{
+  int c, c2;
+  INPUT_VARS(cinfo);
+
+  INPUT_BYTE(cinfo, c, return FALSE);
+  INPUT_BYTE(cinfo, c2, return FALSE);
+  if (c != 0xFF || c2 != (int)M_SOI)
+    ERREXIT2(cinfo, JERR_NO_SOI, c, c2);
+
+  cinfo->unread_marker = c2;
+
+  INPUT_SYNC(cinfo);
+  return TRUE;
+}
+
+
+/*
+ * Read markers until SOS or EOI.
+ *
+ * Returns same codes as are defined for jpeg_consume_input:
+ * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+ */
+
+METHODDEF(int)
+read_markers(j_decompress_ptr cinfo)
+{
+  /* Outer loop repeats once for each marker. */
+  for (;;) {
+    /* Collect the marker proper, unless we already did. */
+    /* NB: first_marker() enforces the requirement that SOI appear first. */
+    if (cinfo->unread_marker == 0) {
+      if (!cinfo->marker->saw_SOI) {
+        if (!first_marker(cinfo))
+          return JPEG_SUSPENDED;
+      } else {
+        if (!next_marker(cinfo))
+          return JPEG_SUSPENDED;
+      }
+    }
+    /* At this point cinfo->unread_marker contains the marker code and the
+     * input point is just past the marker proper, but before any parameters.
+     * A suspension will cause us to return with this state still true.
+     */
+    switch (cinfo->unread_marker) {
+    case M_SOI:
+      if (!get_soi(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF0:                /* Baseline */
+    case M_SOF1:                /* Extended sequential, Huffman */
+      if (!get_sof(cinfo, FALSE, FALSE, FALSE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF2:                /* Progressive, Huffman */
+      if (!get_sof(cinfo, TRUE, FALSE, FALSE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF3:                /* Lossless, Huffman */
+      if (!get_sof(cinfo, FALSE, TRUE, FALSE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF9:                /* Extended sequential, arithmetic */
+      if (!get_sof(cinfo, FALSE, FALSE, TRUE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF10:               /* Progressive, arithmetic */
+      if (!get_sof(cinfo, TRUE, FALSE, TRUE))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_SOF11:               /* Lossless, arithmetic */
+      if (!get_sof(cinfo, FALSE, TRUE, TRUE))
+        return JPEG_SUSPENDED;
+      break;
+
+    /* Currently unsupported SOFn types */
+    case M_SOF5:                /* Differential sequential, Huffman */
+    case M_SOF6:                /* Differential progressive, Huffman */
+    case M_SOF7:                /* Differential lossless, Huffman */
+    case M_JPG:                 /* Reserved for JPEG extensions */
+    case M_SOF13:               /* Differential sequential, arithmetic */
+    case M_SOF14:               /* Differential progressive, arithmetic */
+    case M_SOF15:               /* Differential lossless, arithmetic */
+      ERREXIT1(cinfo, JERR_SOF_UNSUPPORTED, cinfo->unread_marker);
+      break;
+
+    case M_SOS:
+      if (!get_sos(cinfo))
+        return JPEG_SUSPENDED;
+      cinfo->unread_marker = 0; /* processed the marker */
+      return JPEG_REACHED_SOS;
+
+    case M_EOI:
+      TRACEMS(cinfo, 1, JTRC_EOI);
+      cinfo->unread_marker = 0; /* processed the marker */
+      return JPEG_REACHED_EOI;
+
+    case M_DAC:
+      if (!get_dac(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_DHT:
+      if (!get_dht(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_DQT:
+      if (!get_dqt(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_DRI:
+      if (!get_dri(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_APP0:
+    case M_APP1:
+    case M_APP2:
+    case M_APP3:
+    case M_APP4:
+    case M_APP5:
+    case M_APP6:
+    case M_APP7:
+    case M_APP8:
+    case M_APP9:
+    case M_APP10:
+    case M_APP11:
+    case M_APP12:
+    case M_APP13:
+    case M_APP14:
+    case M_APP15:
+      if (!(*((my_marker_ptr)cinfo->marker)->process_APPn[
+               cinfo->unread_marker - (int)M_APP0]) (cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_COM:
+      if (!(*((my_marker_ptr)cinfo->marker)->process_COM) (cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    case M_RST0:                /* these are all parameterless */
+    case M_RST1:
+    case M_RST2:
+    case M_RST3:
+    case M_RST4:
+    case M_RST5:
+    case M_RST6:
+    case M_RST7:
+    case M_TEM:
+      TRACEMS1(cinfo, 1, JTRC_PARMLESS_MARKER, cinfo->unread_marker);
+      break;
+
+    case M_DNL:                 /* Ignore DNL ... perhaps the wrong thing */
+      if (!skip_variable(cinfo))
+        return JPEG_SUSPENDED;
+      break;
+
+    default:                    /* must be DHP, EXP, JPGn, or RESn */
+      /* For now, we treat the reserved markers as fatal errors since they are
+       * likely to be used to signal incompatible JPEG Part 3 extensions.
+       * Once the JPEG 3 version-number marker is well defined, this code
+       * ought to change!
+       */
+      ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, cinfo->unread_marker);
+      break;
+    }
+    /* Successfully processed marker, so reset state variable */
+    cinfo->unread_marker = 0;
+  } /* end loop */
+}
+
+
+/*
+ * Read a restart marker, which is expected to appear next in the datastream;
+ * if the marker is not there, take appropriate recovery action.
+ * Returns FALSE if suspension is required.
+ *
+ * This is called by the entropy decoder after it has read an appropriate
+ * number of MCUs.  cinfo->unread_marker may be nonzero if the entropy decoder
+ * has already read a marker from the data source.  Under normal conditions
+ * cinfo->unread_marker will be reset to 0 before returning; if not reset,
+ * it holds a marker which the decoder will be unable to read past.
+ */
+
+METHODDEF(boolean)
+read_restart_marker(j_decompress_ptr cinfo)
+{
+  /* Obtain a marker unless we already did. */
+  /* Note that next_marker will complain if it skips any data. */
+  if (cinfo->unread_marker == 0) {
+    if (!next_marker(cinfo))
+      return FALSE;
+  }
+
+  if (cinfo->unread_marker ==
+      ((int)M_RST0 + cinfo->marker->next_restart_num)) {
+    /* Normal case --- swallow the marker and let entropy decoder continue */
+    TRACEMS1(cinfo, 3, JTRC_RST, cinfo->marker->next_restart_num);
+    cinfo->unread_marker = 0;
+  } else {
+    /* Uh-oh, the restart markers have been messed up. */
+    /* Let the data source manager determine how to resync. */
+    if (!(*cinfo->src->resync_to_restart) (cinfo,
+                                           cinfo->marker->next_restart_num))
+      return FALSE;
+  }
+
+  /* Update next-restart state */
+  cinfo->marker->next_restart_num = (cinfo->marker->next_restart_num + 1) & 7;
+
+  return TRUE;
+}
+
+
+/*
+ * This is the default resync_to_restart method for data source managers
+ * to use if they don't have any better approach.  Some data source managers
+ * may be able to back up, or may have additional knowledge about the data
+ * which permits a more intelligent recovery strategy; such managers would
+ * presumably supply their own resync method.
+ *
+ * read_restart_marker calls resync_to_restart if it finds a marker other than
+ * the restart marker it was expecting.  (This code is *not* used unless
+ * a nonzero restart interval has been declared.)  cinfo->unread_marker is
+ * the marker code actually found (might be anything, except 0 or FF).
+ * The desired restart marker number (0..7) is passed as a parameter.
+ * This routine is supposed to apply whatever error recovery strategy seems
+ * appropriate in order to position the input stream to the next data segment.
+ * Note that cinfo->unread_marker is treated as a marker appearing before
+ * the current data-source input point; usually it should be reset to zero
+ * before returning.
+ * Returns FALSE if suspension is required.
+ *
+ * This implementation is substantially constrained by wanting to treat the
+ * input as a data stream; this means we can't back up.  Therefore, we have
+ * only the following actions to work with:
+ *   1. Simply discard the marker and let the entropy decoder resume at next
+ *      byte of file.
+ *   2. Read forward until we find another marker, discarding intervening
+ *      data.  (In theory we could look ahead within the current bufferload,
+ *      without having to discard data if we don't find the desired marker.
+ *      This idea is not implemented here, in part because it makes behavior
+ *      dependent on buffer size and chance buffer-boundary positions.)
+ *   3. Leave the marker unread (by failing to zero cinfo->unread_marker).
+ *      This will cause the entropy decoder to process an empty data segment,
+ *      inserting dummy zeroes, and then we will reprocess the marker.
+ *
+ * #2 is appropriate if we think the desired marker lies ahead, while #3 is
+ * appropriate if the found marker is a future restart marker (indicating
+ * that we have missed the desired restart marker, probably because it got
+ * corrupted).
+ * We apply #2 or #3 if the found marker is a restart marker no more than
+ * two counts behind or ahead of the expected one.  We also apply #2 if the
+ * found marker is not a legal JPEG marker code (it's certainly bogus data).
+ * If the found marker is a restart marker more than 2 counts away, we do #1
+ * (too much risk that the marker is erroneous; with luck we will be able to
+ * resync at some future point).
+ * For any valid non-restart JPEG marker, we apply #3.  This keeps us from
+ * overrunning the end of a scan.  An implementation limited to single-scan
+ * files might find it better to apply #2 for markers other than EOI, since
+ * any other marker would have to be bogus data in that case.
+ */
+
+GLOBAL(boolean)
+jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired)
+{
+  int marker = cinfo->unread_marker;
+  int action = 1;
+
+  /* Always put up a warning. */
+  WARNMS2(cinfo, JWRN_MUST_RESYNC, marker, desired);
+
+  /* Outer loop handles repeated decision after scanning forward. */
+  for (;;) {
+    if (marker < (int)M_SOF0)
+      action = 2;               /* invalid marker */
+    else if (marker < (int)M_RST0 || marker > (int)M_RST7)
+      action = 3;               /* valid non-restart marker */
+    else {
+      if (marker == ((int)M_RST0 + ((desired + 1) & 7)) ||
+          marker == ((int)M_RST0 + ((desired + 2) & 7)))
+        action = 3;             /* one of the next two expected restarts */
+      else if (marker == ((int)M_RST0 + ((desired - 1) & 7)) ||
+               marker == ((int)M_RST0 + ((desired - 2) & 7)))
+        action = 2;             /* a prior restart, so advance */
+      else
+        action = 1;             /* desired restart or too far away */
+    }
+    TRACEMS2(cinfo, 4, JTRC_RECOVERY_ACTION, marker, action);
+    switch (action) {
+    case 1:
+      /* Discard marker and let entropy decoder resume processing. */
+      cinfo->unread_marker = 0;
+      return TRUE;
+    case 2:
+      /* Scan to the next marker, and repeat the decision loop. */
+      if (!next_marker(cinfo))
+        return FALSE;
+      marker = cinfo->unread_marker;
+      break;
+    case 3:
+      /* Return without advancing past this marker. */
+      /* Entropy decoder will be forced to process an empty segment. */
+      return TRUE;
+    }
+  } /* end loop */
+}
+
+
+/*
+ * Reset marker processing state to begin a fresh datastream.
+ */
+
+METHODDEF(void)
+reset_marker_reader(j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+
+  cinfo->comp_info = NULL;              /* until allocated by get_sof */
+  cinfo->input_scan_number = 0;         /* no SOS seen yet */
+  cinfo->unread_marker = 0;             /* no pending marker */
+  marker->pub.saw_SOI = FALSE;          /* set internal state too */
+  marker->pub.saw_SOF = FALSE;
+  marker->pub.discarded_bytes = 0;
+  marker->cur_marker = NULL;
+}
+
+
+/*
+ * Initialize the marker reader module.
+ * This is called only once, when the decompression object is created.
+ */
+
+GLOBAL(void)
+jinit_marker_reader(j_decompress_ptr cinfo)
+{
+  my_marker_ptr marker;
+  int i;
+
+  /* Create subobject in permanent pool */
+  marker = (my_marker_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_PERMANENT,
+                                sizeof(my_marker_reader));
+  cinfo->marker = (struct jpeg_marker_reader *)marker;
+  /* Initialize public method pointers */
+  marker->pub.reset_marker_reader = reset_marker_reader;
+  marker->pub.read_markers = read_markers;
+  marker->pub.read_restart_marker = read_restart_marker;
+  /* Initialize COM/APPn processing.
+   * By default, we examine and then discard APP0 and APP14,
+   * but simply discard COM and all other APPn.
+   */
+  marker->process_COM = skip_variable;
+  marker->length_limit_COM = 0;
+  for (i = 0; i < 16; i++) {
+    marker->process_APPn[i] = skip_variable;
+    marker->length_limit_APPn[i] = 0;
+  }
+  marker->process_APPn[0] = get_interesting_appn;
+  marker->process_APPn[14] = get_interesting_appn;
+  /* Reset marker processing state */
+  reset_marker_reader(cinfo);
+}
+
+
+/*
+ * Control saving of COM and APPn markers into marker_list.
+ */
+
+#ifdef SAVE_MARKERS_SUPPORTED
+
+GLOBAL(void)
+jpeg_save_markers(j_decompress_ptr cinfo, int marker_code,
+                  unsigned int length_limit)
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+  long maxlength;
+  jpeg_marker_parser_method processor;
+
+  /* Length limit mustn't be larger than what we can allocate
+   * (should only be a concern in a 16-bit environment).
+   */
+  maxlength = cinfo->mem->max_alloc_chunk - sizeof(struct jpeg_marker_struct);
+  if (((long)length_limit) > maxlength)
+    length_limit = (unsigned int)maxlength;
+
+  /* Choose processor routine to use.
+   * APP0/APP14 have special requirements.
+   */
+  if (length_limit) {
+    processor = save_marker;
+    /* If saving APP0/APP14, save at least enough for our internal use. */
+    if (marker_code == (int)M_APP0 && length_limit < APP0_DATA_LEN)
+      length_limit = APP0_DATA_LEN;
+    else if (marker_code == (int)M_APP14 && length_limit < APP14_DATA_LEN)
+      length_limit = APP14_DATA_LEN;
+  } else {
+    processor = skip_variable;
+    /* If discarding APP0/APP14, use our regular on-the-fly processor. */
+    if (marker_code == (int)M_APP0 || marker_code == (int)M_APP14)
+      processor = get_interesting_appn;
+  }
+
+  if (marker_code == (int)M_COM) {
+    marker->process_COM = processor;
+    marker->length_limit_COM = length_limit;
+  } else if (marker_code >= (int)M_APP0 && marker_code <= (int)M_APP15) {
+    marker->process_APPn[marker_code - (int)M_APP0] = processor;
+    marker->length_limit_APPn[marker_code - (int)M_APP0] = length_limit;
+  } else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
+
+#endif /* SAVE_MARKERS_SUPPORTED */
+
+
+/*
+ * Install a special processing method for COM or APPn markers.
+ */
+
+GLOBAL(void)
+jpeg_set_marker_processor(j_decompress_ptr cinfo, int marker_code,
+                          jpeg_marker_parser_method routine)
+{
+  my_marker_ptr marker = (my_marker_ptr)cinfo->marker;
+
+  if (marker_code == (int)M_COM)
+    marker->process_COM = routine;
+  else if (marker_code >= (int)M_APP0 && marker_code <= (int)M_APP15)
+    marker->process_APPn[marker_code - (int)M_APP0] = routine;
+  else
+    ERREXIT1(cinfo, JERR_UNKNOWN_MARKER, marker_code);
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdmaster.c b/thirdparty/libjpeg-turbo/src/jdmaster.c
new file mode 100644
index 00000000000..4085c229186
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmaster.c
@@ -0,0 +1,893 @@
+/*
+ * jdmaster.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 2002-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2016, 2019, 2022-2024, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
+ * Copyright (C) 2015, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains master control logic for the JPEG decompressor.
+ * These routines are concerned with selecting the modules to be executed
+ * and with determining the number of passes and the work to be done in each
+ * pass.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+#include "jdmaster.h"
+
+
+/*
+ * Determine whether merged upsample/color conversion should be used.
+ * CRUCIAL: this must match the actual capabilities of jdmerge.c!
+ */
+
+LOCAL(boolean)
+use_merged_upsample(j_decompress_ptr cinfo)
+{
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+  /* Colorspace conversion is not supported with lossless JPEG images */
+  if (cinfo->master->lossless)
+    return FALSE;
+  /* Merging is the equivalent of plain box-filter upsampling */
+  if (cinfo->do_fancy_upsampling || cinfo->CCIR601_sampling)
+    return FALSE;
+  /* jdmerge.c only supports YCC=>RGB and YCC=>RGB565 color conversion */
+  if (cinfo->jpeg_color_space != JCS_YCbCr || cinfo->num_components != 3 ||
+      (cinfo->out_color_space != JCS_RGB &&
+       cinfo->out_color_space != JCS_RGB565 &&
+       cinfo->out_color_space != JCS_EXT_RGB &&
+       cinfo->out_color_space != JCS_EXT_RGBX &&
+       cinfo->out_color_space != JCS_EXT_BGR &&
+       cinfo->out_color_space != JCS_EXT_BGRX &&
+       cinfo->out_color_space != JCS_EXT_XBGR &&
+       cinfo->out_color_space != JCS_EXT_XRGB &&
+       cinfo->out_color_space != JCS_EXT_RGBA &&
+       cinfo->out_color_space != JCS_EXT_BGRA &&
+       cinfo->out_color_space != JCS_EXT_ABGR &&
+       cinfo->out_color_space != JCS_EXT_ARGB))
+    return FALSE;
+  if ((cinfo->out_color_space == JCS_RGB565 &&
+       cinfo->out_color_components != 3) ||
+      (cinfo->out_color_space != JCS_RGB565 &&
+       cinfo->out_color_components != rgb_pixelsize[cinfo->out_color_space]))
+    return FALSE;
+  /* and it only handles 2h1v or 2h2v sampling ratios */
+  if (cinfo->comp_info[0].h_samp_factor != 2 ||
+      cinfo->comp_info[1].h_samp_factor != 1 ||
+      cinfo->comp_info[2].h_samp_factor != 1 ||
+      cinfo->comp_info[0].v_samp_factor >  2 ||
+      cinfo->comp_info[1].v_samp_factor != 1 ||
+      cinfo->comp_info[2].v_samp_factor != 1)
+    return FALSE;
+  /* furthermore, it doesn't work if we've scaled the IDCTs differently */
+  if (cinfo->comp_info[0]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
+      cinfo->comp_info[1]._DCT_scaled_size != cinfo->_min_DCT_scaled_size ||
+      cinfo->comp_info[2]._DCT_scaled_size != cinfo->_min_DCT_scaled_size)
+    return FALSE;
+  /* ??? also need to test for upsample-time rescaling, when & if supported */
+  return TRUE;                  /* by golly, it'll work... */
+#else
+  return FALSE;
+#endif
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ */
+
+#if JPEG_LIB_VERSION >= 80
+GLOBAL(void)
+#else
+LOCAL(void)
+#endif
+jpeg_core_output_dimensions(j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase.
+ * This function is used for transcoding and full decompression.
+ */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+
+  if (!cinfo->master->lossless) {
+    /* Compute actual output image dimensions and DCT scaling choices. */
+    if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom) {
+      /* Provide 1/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 1;
+      cinfo->_min_DCT_v_scaled_size = 1;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 2) {
+      /* Provide 2/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 2L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 2L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 2;
+      cinfo->_min_DCT_v_scaled_size = 2;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 3) {
+      /* Provide 3/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 3L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 3L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 3;
+      cinfo->_min_DCT_v_scaled_size = 3;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 4) {
+      /* Provide 4/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 4L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 4L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 4;
+      cinfo->_min_DCT_v_scaled_size = 4;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 5) {
+      /* Provide 5/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 5L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 5L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 5;
+      cinfo->_min_DCT_v_scaled_size = 5;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 6) {
+      /* Provide 6/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 6L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 6L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 6;
+      cinfo->_min_DCT_v_scaled_size = 6;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 7) {
+      /* Provide 7/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 7L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 7L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 7;
+      cinfo->_min_DCT_v_scaled_size = 7;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 8) {
+      /* Provide 8/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 8L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 8L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 8;
+      cinfo->_min_DCT_v_scaled_size = 8;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 9) {
+      /* Provide 9/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 9L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 9L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 9;
+      cinfo->_min_DCT_v_scaled_size = 9;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 10) {
+      /* Provide 10/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 10L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 10L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 10;
+      cinfo->_min_DCT_v_scaled_size = 10;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 11) {
+      /* Provide 11/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 11L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 11L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 11;
+      cinfo->_min_DCT_v_scaled_size = 11;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 12) {
+      /* Provide 12/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 12L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 12L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 12;
+      cinfo->_min_DCT_v_scaled_size = 12;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 13) {
+      /* Provide 13/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 13L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 13L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 13;
+      cinfo->_min_DCT_v_scaled_size = 13;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 14) {
+      /* Provide 14/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 14L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 14L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 14;
+      cinfo->_min_DCT_v_scaled_size = 14;
+    } else if (cinfo->scale_num * DCTSIZE <= cinfo->scale_denom * 15) {
+      /* Provide 15/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 15L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 15L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 15;
+      cinfo->_min_DCT_v_scaled_size = 15;
+    } else {
+      /* Provide 16/block_size scaling */
+      cinfo->output_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width * 16L, (long)DCTSIZE);
+      cinfo->output_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height * 16L, (long)DCTSIZE);
+      cinfo->_min_DCT_h_scaled_size = 16;
+      cinfo->_min_DCT_v_scaled_size = 16;
+    }
+
+    /* Recompute dimensions of components */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      compptr->_DCT_h_scaled_size = cinfo->_min_DCT_h_scaled_size;
+      compptr->_DCT_v_scaled_size = cinfo->_min_DCT_v_scaled_size;
+    }
+  } else
+#endif /* !IDCT_SCALING_SUPPORTED */
+  {
+    /* Hardwire it to "no scaling" */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    /* jdinput.c has already initialized DCT_scaled_size,
+     * and has computed unscaled downsampled_width and downsampled_height.
+     */
+  }
+}
+
+
+/*
+ * Compute output image dimensions and related values.
+ * NOTE: this is exported for possible use by application.
+ * Hence it mustn't do anything that can't be done twice.
+ * Also note that it may be called before the master module is initialized!
+ */
+
+GLOBAL(void)
+jpeg_calc_output_dimensions(j_decompress_ptr cinfo)
+/* Do computations that are needed before master selection phase */
+{
+#ifdef IDCT_SCALING_SUPPORTED
+  int ci;
+  jpeg_component_info *compptr;
+#endif
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_READY)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  /* Compute core output image dimensions and DCT scaling choices. */
+  jpeg_core_output_dimensions(cinfo);
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+  if (!cinfo->master->lossless) {
+    /* In selecting the actual DCT scaling for each component, we try to
+     * scale up the chroma components via IDCT scaling rather than upsampling.
+     * This saves time if the upsampler gets to use 1:1 scaling.
+     * Note this code adapts subsampling ratios which are powers of 2.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      int ssize = cinfo->_min_DCT_scaled_size;
+      while (ssize < DCTSIZE &&
+             ((cinfo->max_h_samp_factor * cinfo->_min_DCT_scaled_size) %
+              (compptr->h_samp_factor * ssize * 2) == 0) &&
+             ((cinfo->max_v_samp_factor * cinfo->_min_DCT_scaled_size) %
+              (compptr->v_samp_factor * ssize * 2) == 0)) {
+        ssize = ssize * 2;
+      }
+#if JPEG_LIB_VERSION >= 70
+      compptr->DCT_h_scaled_size = compptr->DCT_v_scaled_size = ssize;
+#else
+      compptr->DCT_scaled_size = ssize;
+#endif
+    }
+
+    /* Recompute downsampled dimensions of components;
+     * application needs to know these if using raw downsampled data.
+     */
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      /* Size in samples, after IDCT scaling */
+      compptr->downsampled_width = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_width *
+                      (long)(compptr->h_samp_factor *
+                             compptr->_DCT_scaled_size),
+                      (long)(cinfo->max_h_samp_factor * DCTSIZE));
+      compptr->downsampled_height = (JDIMENSION)
+        jdiv_round_up((long)cinfo->image_height *
+                      (long)(compptr->v_samp_factor *
+                             compptr->_DCT_scaled_size),
+                      (long)(cinfo->max_v_samp_factor * DCTSIZE));
+    }
+  } else
+#endif /* IDCT_SCALING_SUPPORTED */
+  {
+    /* Hardwire it to "no scaling" */
+    cinfo->output_width = cinfo->image_width;
+    cinfo->output_height = cinfo->image_height;
+    /* jdinput.c has already initialized DCT_scaled_size to DCTSIZE,
+     * and has computed unscaled downsampled_width and downsampled_height.
+     */
+  }
+
+  /* Report number of components in selected colorspace. */
+  /* Probably this should be in the color conversion module... */
+  switch (cinfo->out_color_space) {
+  case JCS_GRAYSCALE:
+    cinfo->out_color_components = 1;
+    break;
+  case JCS_RGB:
+  case JCS_EXT_RGB:
+  case JCS_EXT_RGBX:
+  case JCS_EXT_BGR:
+  case JCS_EXT_BGRX:
+  case JCS_EXT_XBGR:
+  case JCS_EXT_XRGB:
+  case JCS_EXT_RGBA:
+  case JCS_EXT_BGRA:
+  case JCS_EXT_ABGR:
+  case JCS_EXT_ARGB:
+    cinfo->out_color_components = rgb_pixelsize[cinfo->out_color_space];
+    break;
+  case JCS_YCbCr:
+  case JCS_RGB565:
+    cinfo->out_color_components = 3;
+    break;
+  case JCS_CMYK:
+  case JCS_YCCK:
+    cinfo->out_color_components = 4;
+    break;
+  default:                      /* else must be same colorspace as in file */
+    cinfo->out_color_components = cinfo->num_components;
+    break;
+  }
+  cinfo->output_components = (cinfo->quantize_colors ? 1 :
+                              cinfo->out_color_components);
+
+  /* See if upsampler will want to emit more than one row at a time */
+  if (use_merged_upsample(cinfo))
+    cinfo->rec_outbuf_height = cinfo->max_v_samp_factor;
+  else
+    cinfo->rec_outbuf_height = 1;
+}
+
+
+/*
+ * Several decompression processes need to range-limit values to the range
+ * 0..MAXJSAMPLE; the input value may fall somewhat outside this range
+ * due to noise introduced by quantization, roundoff error, etc.  These
+ * processes are inner loops and need to be as fast as possible.  On most
+ * machines, particularly CPUs with pipelines or instruction prefetch,
+ * a (subscript-check-less) C table lookup
+ *              x = sample_range_limit[x];
+ * is faster than explicit tests
+ *              if (x < 0)  x = 0;
+ *              else if (x > MAXJSAMPLE)  x = MAXJSAMPLE;
+ * These processes all use a common table prepared by the routine below.
+ *
+ * For most steps we can mathematically guarantee that the initial value
+ * of x is within MAXJSAMPLE+1 of the legal range, so a table running from
+ * -(MAXJSAMPLE+1) to 2*MAXJSAMPLE+1 is sufficient.  But for the initial
+ * limiting step (just after the IDCT), a wildly out-of-range value is
+ * possible if the input data is corrupt.  To avoid any chance of indexing
+ * off the end of memory and getting a bad-pointer trap, we perform the
+ * post-IDCT limiting thus:
+ *              x = range_limit[x & MASK];
+ * where MASK is 2 bits wider than legal sample data, ie 10 bits for 8-bit
+ * samples.  Under normal circumstances this is more than enough range and
+ * a correct output will be generated; with bogus input data the mask will
+ * cause wraparound, and we will safely generate a bogus-but-in-range output.
+ * For the post-IDCT step, we want to convert the data from signed to unsigned
+ * representation by adding CENTERJSAMPLE at the same time that we limit it.
+ * So the post-IDCT limiting table ends up looking like this:
+ *   CENTERJSAMPLE,CENTERJSAMPLE+1,...,MAXJSAMPLE,
+ *   MAXJSAMPLE (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0          (repeat 2*(MAXJSAMPLE+1)-CENTERJSAMPLE times),
+ *   0,1,...,CENTERJSAMPLE-1
+ * Negative inputs select values from the upper half of the table after
+ * masking.
+ *
+ * We can save some space by overlapping the start of the post-IDCT table
+ * with the simpler range limiting table.  The post-IDCT table begins at
+ * sample_range_limit + CENTERJSAMPLE.
+ */
+
+LOCAL(void)
+prepare_range_limit_table(j_decompress_ptr cinfo)
+/* Allocate and fill in the sample_range_limit table */
+{
+  JSAMPLE *table;
+  J12SAMPLE *table12;
+#ifdef D_LOSSLESS_SUPPORTED
+  J16SAMPLE *table16;
+#endif
+  int i;
+
+  if (cinfo->data_precision <= 8) {
+    table = (JSAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJSAMPLE + 1) + CENTERJSAMPLE) * sizeof(JSAMPLE));
+    table += (MAXJSAMPLE + 1);  /* allow negative subscripts of simple table */
+    cinfo->sample_range_limit = table;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table - (MAXJSAMPLE + 1), 0, (MAXJSAMPLE + 1) * sizeof(JSAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJSAMPLE; i++)
+      table[i] = (JSAMPLE)i;
+    table += CENTERJSAMPLE;     /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJSAMPLE; i < 2 * (MAXJSAMPLE + 1); i++)
+      table[i] = MAXJSAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table + (2 * (MAXJSAMPLE + 1)), 0,
+           (2 * (MAXJSAMPLE + 1) - CENTERJSAMPLE) * sizeof(JSAMPLE));
+    memcpy(table + (4 * (MAXJSAMPLE + 1) - CENTERJSAMPLE),
+           cinfo->sample_range_limit, CENTERJSAMPLE * sizeof(JSAMPLE));
+  } else if (cinfo->data_precision <= 12) {
+    table12 = (J12SAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJ12SAMPLE + 1) + CENTERJ12SAMPLE) *
+                  sizeof(J12SAMPLE));
+    table12 += (MAXJ12SAMPLE + 1);  /* allow negative subscripts of simple
+                                       table */
+    cinfo->sample_range_limit = (JSAMPLE *)table12;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table12 - (MAXJ12SAMPLE + 1), 0,
+           (MAXJ12SAMPLE + 1) * sizeof(J12SAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJ12SAMPLE; i++)
+      table12[i] = (J12SAMPLE)i;
+    table12 += CENTERJ12SAMPLE; /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJ12SAMPLE; i < 2 * (MAXJ12SAMPLE + 1); i++)
+      table12[i] = MAXJ12SAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table12 + (2 * (MAXJ12SAMPLE + 1)), 0,
+           (2 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE) * sizeof(J12SAMPLE));
+    memcpy(table12 + (4 * (MAXJ12SAMPLE + 1) - CENTERJ12SAMPLE),
+           cinfo->sample_range_limit, CENTERJ12SAMPLE * sizeof(J12SAMPLE));
+  } else {
+#ifdef D_LOSSLESS_SUPPORTED
+    table16 = (J16SAMPLE *)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                  (5 * (MAXJ16SAMPLE + 1) + CENTERJ16SAMPLE) *
+                  sizeof(J16SAMPLE));
+    table16 += (MAXJ16SAMPLE + 1);  /* allow negative subscripts of simple
+                                       table */
+    cinfo->sample_range_limit = (JSAMPLE *)table16;
+    /* First segment of "simple" table: limit[x] = 0 for x < 0 */
+    memset(table16 - (MAXJ16SAMPLE + 1), 0,
+           (MAXJ16SAMPLE + 1) * sizeof(J16SAMPLE));
+    /* Main part of "simple" table: limit[x] = x */
+    for (i = 0; i <= MAXJ16SAMPLE; i++)
+      table16[i] = (J16SAMPLE)i;
+    table16 += CENTERJ16SAMPLE; /* Point to where post-IDCT table starts */
+    /* End of simple table, rest of first half of post-IDCT table */
+    for (i = CENTERJ16SAMPLE; i < 2 * (MAXJ16SAMPLE + 1); i++)
+      table16[i] = MAXJ16SAMPLE;
+    /* Second half of post-IDCT table */
+    memset(table16 + (2 * (MAXJ16SAMPLE + 1)), 0,
+           (2 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE) * sizeof(J16SAMPLE));
+    memcpy(table16 + (4 * (MAXJ16SAMPLE + 1) - CENTERJ16SAMPLE),
+           cinfo->sample_range_limit, CENTERJ16SAMPLE * sizeof(J16SAMPLE));
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+  }
+}
+
+
+/*
+ * Master selection of decompression modules.
+ * This is done once at jpeg_start_decompress time.  We determine
+ * which modules will be used and give them appropriate initialization calls.
+ * We also initialize the decompressor input side to begin consuming data.
+ *
+ * Since jpeg_read_header has finished, we know what is in the SOF
+ * and (first) SOS markers.  We also have all the application parameter
+ * settings.
+ */
+
+LOCAL(void)
+master_selection(j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+  boolean use_c_buffer;
+  long samplesperrow;
+  JDIMENSION jd_samplesperrow;
+
+  /* Disable IDCT scaling and raw (downsampled) data output in lossless mode.
+   * IDCT scaling is not useful in lossless mode, and it must be disabled in
+   * order to properly calculate the output dimensions.  Raw data output isn't
+   * particularly useful without subsampling and has not been tested in
+   * lossless mode.
+   */
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+    cinfo->raw_data_out = FALSE;
+    cinfo->scale_num = cinfo->scale_denom = 1;
+  }
+#endif
+
+  /* Initialize dimensions and other stuff */
+  jpeg_calc_output_dimensions(cinfo);
+  prepare_range_limit_table(cinfo);
+
+  /* Width of an output scanline must be representable as JDIMENSION. */
+  samplesperrow = (long)cinfo->output_width *
+                  (long)cinfo->out_color_components;
+  jd_samplesperrow = (JDIMENSION)samplesperrow;
+  if ((long)jd_samplesperrow != samplesperrow)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+
+  /* Initialize my private state */
+  master->pass_number = 0;
+  master->using_merged_upsample = use_merged_upsample(cinfo);
+
+  /* Color quantizer selection */
+  master->quantizer_1pass = NULL;
+  master->quantizer_2pass = NULL;
+  /* No mode changes if not using buffered-image mode. */
+  if (!cinfo->quantize_colors || !cinfo->buffered_image) {
+    cinfo->enable_1pass_quant = FALSE;
+    cinfo->enable_external_quant = FALSE;
+    cinfo->enable_2pass_quant = FALSE;
+  }
+  if (cinfo->quantize_colors) {
+    if (cinfo->raw_data_out)
+      ERREXIT(cinfo, JERR_NOTIMPL);
+    /* 2-pass quantizer only works in 3-component color space. */
+    if (cinfo->out_color_components != 3 ||
+        cinfo->out_color_space == JCS_RGB565) {
+      cinfo->enable_1pass_quant = TRUE;
+      cinfo->enable_external_quant = FALSE;
+      cinfo->enable_2pass_quant = FALSE;
+      cinfo->colormap = NULL;
+    } else if (cinfo->colormap != NULL) {
+      cinfo->enable_external_quant = TRUE;
+    } else if (cinfo->two_pass_quantize) {
+      cinfo->enable_2pass_quant = TRUE;
+    } else {
+      cinfo->enable_1pass_quant = TRUE;
+    }
+
+    if (cinfo->enable_1pass_quant) {
+#ifdef QUANT_1PASS_SUPPORTED
+      if (cinfo->data_precision == 8)
+        jinit_1pass_quantizer(cinfo);
+      else if (cinfo->data_precision == 12)
+        j12init_1pass_quantizer(cinfo);
+      else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      master->quantizer_1pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+
+    /* We use the 2-pass code to map to external colormaps. */
+    if (cinfo->enable_2pass_quant || cinfo->enable_external_quant) {
+#ifdef QUANT_2PASS_SUPPORTED
+      if (cinfo->data_precision == 8)
+        jinit_2pass_quantizer(cinfo);
+      else if (cinfo->data_precision == 12)
+        j12init_2pass_quantizer(cinfo);
+      else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+      master->quantizer_2pass = cinfo->cquantize;
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    }
+    /* If both quantizers are initialized, the 2-pass one is left active;
+     * this is necessary for starting with quantization to an external map.
+     */
+  }
+
+  /* Post-processing: in particular, color conversion first */
+  if (!cinfo->raw_data_out) {
+    if (master->using_merged_upsample) {
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+      if (cinfo->data_precision == 8)
+        jinit_merged_upsampler(cinfo); /* does color conversion too */
+      else if (cinfo->data_precision == 12)
+        j12init_merged_upsampler(cinfo); /* does color conversion too */
+      else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else {
+      if (cinfo->data_precision <= 8) {
+        jinit_color_deconverter(cinfo);
+        jinit_upsampler(cinfo);
+      } else if (cinfo->data_precision <= 12) {
+        j12init_color_deconverter(cinfo);
+        j12init_upsampler(cinfo);
+      } else {
+#ifdef D_LOSSLESS_SUPPORTED
+        j16init_color_deconverter(cinfo);
+        j16init_upsampler(cinfo);
+#else
+        ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+      }
+    }
+    if (cinfo->data_precision <= 8)
+      jinit_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+    else if (cinfo->data_precision <= 12)
+      j12init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+    else
+#ifdef D_LOSSLESS_SUPPORTED
+      j16init_d_post_controller(cinfo, cinfo->enable_2pass_quant);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+  }
+
+  if (cinfo->master->lossless) {
+#ifdef D_LOSSLESS_SUPPORTED
+    /* Prediction, sample undifferencing, point transform, and sample size
+     * scaling
+     */
+    if (cinfo->data_precision <= 8)
+      jinit_lossless_decompressor(cinfo);
+    else if (cinfo->data_precision <= 12)
+      j12init_lossless_decompressor(cinfo);
+    else
+      j16init_lossless_decompressor(cinfo);
+    /* Entropy decoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+    } else {
+      jinit_lhuff_decoder(cinfo);
+    }
+
+    /* Initialize principal buffer controllers. */
+    use_c_buffer = cinfo->inputctl->has_multiple_scans ||
+                   cinfo->buffered_image;
+    if (cinfo->data_precision <= 8)
+      jinit_d_diff_controller(cinfo, use_c_buffer);
+    else if (cinfo->data_precision <= 12)
+      j12init_d_diff_controller(cinfo, use_c_buffer);
+    else
+      j16init_d_diff_controller(cinfo, use_c_buffer);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+  } else {
+    /* Inverse DCT */
+    if (cinfo->data_precision == 8)
+      jinit_inverse_dct(cinfo);
+    else if (cinfo->data_precision == 12)
+      j12init_inverse_dct(cinfo);
+    else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+    /* Entropy decoding: either Huffman or arithmetic coding. */
+    if (cinfo->arith_code) {
+#ifdef D_ARITH_CODING_SUPPORTED
+      jinit_arith_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+    } else {
+      if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+        jinit_phuff_decoder(cinfo);
+#else
+        ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+      } else
+        jinit_huff_decoder(cinfo);
+    }
+
+    /* Initialize principal buffer controllers. */
+    use_c_buffer = cinfo->inputctl->has_multiple_scans ||
+                   cinfo->buffered_image;
+    if (cinfo->data_precision == 12)
+      j12init_d_coef_controller(cinfo, use_c_buffer);
+    else
+      jinit_d_coef_controller(cinfo, use_c_buffer);
+  }
+
+  if (!cinfo->raw_data_out) {
+    if (cinfo->data_precision <= 8)
+      jinit_d_main_controller(cinfo, FALSE /* never need full buffer here */);
+    else if (cinfo->data_precision <= 12)
+      j12init_d_main_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+    else
+#ifdef D_LOSSLESS_SUPPORTED
+      j16init_d_main_controller(cinfo,
+                                FALSE /* never need full buffer here */);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+#endif
+  }
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Set the first and last iMCU columns to decompress from single-scan images.
+   * By default, decompress all of the iMCU columns.
+   */
+  cinfo->master->first_iMCU_col = 0;
+  cinfo->master->last_iMCU_col = cinfo->MCUs_per_row - 1;
+  cinfo->master->last_good_iMCU_row = 0;
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+  /* If jpeg_start_decompress will read the whole file, initialize
+   * progress monitoring appropriately.  The input step is counted
+   * as one pass.
+   */
+  if (cinfo->progress != NULL && !cinfo->buffered_image &&
+      cinfo->inputctl->has_multiple_scans) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = (cinfo->enable_2pass_quant ? 3 : 2);
+    /* Count the input pass as done */
+    master->pass_number++;
+  }
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+}
+
+
+/*
+ * Per-pass setup.
+ * This is called at the beginning of each output pass.  We determine which
+ * modules will be active during this pass and give them appropriate
+ * start_pass calls.  We also set is_dummy_pass to indicate whether this
+ * is a "real" output pass or a dummy pass for color quantization.
+ * (In the latter case, jdapistd.c will crank the pass to completion.)
+ */
+
+METHODDEF(void)
+prepare_for_output_pass(j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (master->pub.is_dummy_pass) {
+#ifdef QUANT_2PASS_SUPPORTED
+    /* Final pass of 2-pass quantization */
+    master->pub.is_dummy_pass = FALSE;
+    (*cinfo->cquantize->start_pass) (cinfo, FALSE);
+    (*cinfo->post->start_pass) (cinfo, JBUF_CRANK_DEST);
+    (*cinfo->main->start_pass) (cinfo, JBUF_CRANK_DEST);
+#else
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif /* QUANT_2PASS_SUPPORTED */
+  } else {
+    if (cinfo->quantize_colors && cinfo->colormap == NULL) {
+      /* Select new quantization method */
+      if (cinfo->two_pass_quantize && cinfo->enable_2pass_quant) {
+        cinfo->cquantize = master->quantizer_2pass;
+        master->pub.is_dummy_pass = TRUE;
+      } else if (cinfo->enable_1pass_quant) {
+        cinfo->cquantize = master->quantizer_1pass;
+      } else {
+        ERREXIT(cinfo, JERR_MODE_CHANGE);
+      }
+    }
+    (*cinfo->idct->start_pass) (cinfo);
+    (*cinfo->coef->start_output_pass) (cinfo);
+    if (!cinfo->raw_data_out) {
+      if (!master->using_merged_upsample)
+        (*cinfo->cconvert->start_pass) (cinfo);
+      (*cinfo->upsample->start_pass) (cinfo);
+      if (cinfo->quantize_colors)
+        (*cinfo->cquantize->start_pass) (cinfo, master->pub.is_dummy_pass);
+      (*cinfo->post->start_pass) (cinfo,
+            (master->pub.is_dummy_pass ? JBUF_SAVE_AND_PASS : JBUF_PASS_THRU));
+      (*cinfo->main->start_pass) (cinfo, JBUF_PASS_THRU);
+    }
+  }
+
+  /* Set up progress monitor's pass info if present */
+  if (cinfo->progress != NULL) {
+    cinfo->progress->completed_passes = master->pass_number;
+    cinfo->progress->total_passes = master->pass_number +
+                                    (master->pub.is_dummy_pass ? 2 : 1);
+    /* In buffered-image mode, we assume one more output pass if EOI not
+     * yet reached, but no more passes if EOI has been reached.
+     */
+    if (cinfo->buffered_image && !cinfo->inputctl->eoi_reached) {
+      cinfo->progress->total_passes += (cinfo->enable_2pass_quant ? 2 : 1);
+    }
+  }
+}
+
+
+/*
+ * Finish up at end of an output pass.
+ */
+
+METHODDEF(void)
+finish_output_pass(j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  if (cinfo->quantize_colors)
+    (*cinfo->cquantize->finish_pass) (cinfo);
+  master->pass_number++;
+}
+
+
+#ifdef D_MULTISCAN_FILES_SUPPORTED
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+GLOBAL(void)
+jpeg_new_colormap(j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  /* Prevent application from calling me at wrong times */
+  if (cinfo->global_state != DSTATE_BUFIMAGE)
+    ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+
+  if (cinfo->quantize_colors && cinfo->enable_external_quant &&
+      cinfo->colormap != NULL) {
+    /* Select 2-pass quantizer for external colormap use */
+    cinfo->cquantize = master->quantizer_2pass;
+    /* Notify quantizer of colormap change */
+    (*cinfo->cquantize->new_color_map) (cinfo);
+    master->pub.is_dummy_pass = FALSE; /* just in case */
+  } else
+    ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+#endif /* D_MULTISCAN_FILES_SUPPORTED */
+
+
+/*
+ * Initialize master decompression control and select active modules.
+ * This is performed at the start of jpeg_start_decompress.
+ */
+
+GLOBAL(void)
+jinit_master_decompress(j_decompress_ptr cinfo)
+{
+  my_master_ptr master = (my_master_ptr)cinfo->master;
+
+  master->pub.prepare_for_output_pass = prepare_for_output_pass;
+  master->pub.finish_output_pass = finish_output_pass;
+
+  master->pub.is_dummy_pass = FALSE;
+  master->pub.jinit_upsampler_no_alloc = FALSE;
+
+  master_selection(cinfo);
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdmaster.h b/thirdparty/libjpeg-turbo/src/jdmaster.h
new file mode 100644
index 00000000000..76897e2820f
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmaster.h
@@ -0,0 +1,28 @@
+/*
+ * jdmaster.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1995, Thomas G. Lane.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the master control structure for the JPEG decompressor.
+ */
+
+/* Private state */
+
+typedef struct {
+  struct jpeg_decomp_master pub; /* public fields */
+
+  int pass_number;              /* # of passes completed */
+
+  boolean using_merged_upsample; /* TRUE if using merged upsample/cconvert */
+
+  /* Saved references to initialized quantizer modules,
+   * in case we need to switch modes.
+   */
+  struct jpeg_color_quantizer *quantizer_1pass;
+  struct jpeg_color_quantizer *quantizer_2pass;
+} my_decomp_master;
+
+typedef my_decomp_master *my_master_ptr;
diff --git a/thirdparty/libjpeg-turbo/src/jdmerge.c b/thirdparty/libjpeg-turbo/src/jdmerge.c
new file mode 100644
index 00000000000..49f2006fc02
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmerge.c
@@ -0,0 +1,594 @@
+/*
+ * jdmerge.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2009, 2011, 2014-2015, 2020, 2022, D. R. Commander.
+ * Copyright (C) 2013, Linaro Limited.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ *
+ * This file combines functions from jdsample.c and jdcolor.c;
+ * read those files first to understand what's going on.
+ *
+ * When the chroma components are to be upsampled by simple replication
+ * (ie, box filtering), we can save some work in color conversion by
+ * calculating all the output pixels corresponding to a pair of chroma
+ * samples at one time.  In the conversion equations
+ *      R = Y           + K1 * Cr
+ *      G = Y + K2 * Cb + K3 * Cr
+ *      B = Y + K4 * Cb
+ * only the Y term varies among the group of pixels corresponding to a pair
+ * of chroma samples, so the rest of the terms can be calculated just once.
+ * At typical sampling ratios, this eliminates half or three-quarters of the
+ * multiplications needed for color conversion.
+ *
+ * This file currently provides implementations for the following cases:
+ *      YCbCr => RGB color conversion only.
+ *      Sampling ratios of 2h1v or 2h2v.
+ *      No scaling needed at upsample time.
+ *      Corner-aligned (non-CCIR601) sampling alignment.
+ * Other special cases could be added, but in most applications these are
+ * the only common cases.  (For uncommon cases we fall back on the more
+ * general code in jdsample.c and jdcolor.c.)
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdmerge.h"
+#include "jsimd.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+#define SCALEBITS       16      /* speediest right-shift on some machines */
+#define ONE_HALF        ((JLONG)1 << (SCALEBITS - 1))
+#define FIX(x)          ((JLONG)((x) * (1L << SCALEBITS) + 0.5))
+
+
+/* Include inline routines for colorspace extensions */
+
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+
+#define RGB_RED  EXT_RGB_RED
+#define RGB_GREEN  EXT_RGB_GREEN
+#define RGB_BLUE  EXT_RGB_BLUE
+#define RGB_PIXELSIZE  EXT_RGB_PIXELSIZE
+#define h2v1_merged_upsample_internal  extrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED  EXT_RGBX_RED
+#define RGB_GREEN  EXT_RGBX_GREEN
+#define RGB_BLUE  EXT_RGBX_BLUE
+#define RGB_ALPHA  3
+#define RGB_PIXELSIZE  EXT_RGBX_PIXELSIZE
+#define h2v1_merged_upsample_internal  extrgbx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extrgbx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED  EXT_BGR_RED
+#define RGB_GREEN  EXT_BGR_GREEN
+#define RGB_BLUE  EXT_BGR_BLUE
+#define RGB_PIXELSIZE  EXT_BGR_PIXELSIZE
+#define h2v1_merged_upsample_internal  extbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED  EXT_BGRX_RED
+#define RGB_GREEN  EXT_BGRX_GREEN
+#define RGB_BLUE  EXT_BGRX_BLUE
+#define RGB_ALPHA  3
+#define RGB_PIXELSIZE  EXT_BGRX_PIXELSIZE
+#define h2v1_merged_upsample_internal  extbgrx_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extbgrx_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED  EXT_XBGR_RED
+#define RGB_GREEN  EXT_XBGR_GREEN
+#define RGB_BLUE  EXT_XBGR_BLUE
+#define RGB_ALPHA  0
+#define RGB_PIXELSIZE  EXT_XBGR_PIXELSIZE
+#define h2v1_merged_upsample_internal  extxbgr_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extxbgr_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+#define RGB_RED  EXT_XRGB_RED
+#define RGB_GREEN  EXT_XRGB_GREEN
+#define RGB_BLUE  EXT_XRGB_BLUE
+#define RGB_ALPHA  0
+#define RGB_PIXELSIZE  EXT_XRGB_PIXELSIZE
+#define h2v1_merged_upsample_internal  extxrgb_h2v1_merged_upsample_internal
+#define h2v2_merged_upsample_internal  extxrgb_h2v2_merged_upsample_internal
+#include "jdmrgext.c"
+#undef RGB_RED
+#undef RGB_GREEN
+#undef RGB_BLUE
+#undef RGB_ALPHA
+#undef RGB_PIXELSIZE
+#undef h2v1_merged_upsample_internal
+#undef h2v2_merged_upsample_internal
+
+
+/*
+ * Initialize tables for YCC->RGB colorspace conversion.
+ * This is taken directly from jdcolor.c; see that file for more info.
+ */
+
+LOCAL(void)
+build_ycc_rgb_table(j_decompress_ptr cinfo)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  int i;
+  JLONG x;
+  SHIFT_TEMPS
+
+  upsample->Cr_r_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(int));
+  upsample->Cb_b_tab = (int *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(int));
+  upsample->Cr_g_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
+  upsample->Cb_g_tab = (JLONG *)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                (_MAXJSAMPLE + 1) * sizeof(JLONG));
+
+  for (i = 0, x = -_CENTERJSAMPLE; i <= _MAXJSAMPLE; i++, x++) {
+    /* i is the actual input pixel value, in the range 0.._MAXJSAMPLE */
+    /* The Cb or Cr value we are thinking of is x = i - _CENTERJSAMPLE */
+    /* Cr=>R value is nearest int to 1.40200 * x */
+    upsample->Cr_r_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.40200) * x + ONE_HALF, SCALEBITS);
+    /* Cb=>B value is nearest int to 1.77200 * x */
+    upsample->Cb_b_tab[i] = (int)
+                    RIGHT_SHIFT(FIX(1.77200) * x + ONE_HALF, SCALEBITS);
+    /* Cr=>G value is scaled-up -0.71414 * x */
+    upsample->Cr_g_tab[i] = (-FIX(0.71414)) * x;
+    /* Cb=>G value is scaled-up -0.34414 * x */
+    /* We also add in ONE_HALF so that need not do it in inner loop */
+    upsample->Cb_g_tab[i] = (-FIX(0.34414)) * x + ONE_HALF;
+  }
+}
+
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_merged_upsample(j_decompress_ptr cinfo)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+
+  /* Mark the spare buffer empty */
+  upsample->spare_full = FALSE;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * The control routine just handles the row buffering considerations.
+ */
+
+METHODDEF(void)
+merged_2v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION *in_row_group_ctr,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
+                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+/* 2:1 vertical sampling case: may need a spare row. */
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  _JSAMPROW work_ptrs[2];
+  JDIMENSION num_rows;          /* number of rows returned to caller */
+
+  if (upsample->spare_full) {
+    /* If we have a spare row saved from a previous cycle, just return it. */
+    JDIMENSION size = upsample->out_row_width;
+    if (cinfo->out_color_space == JCS_RGB565)
+      size = cinfo->output_width * 2;
+    _jcopy_sample_rows(&upsample->spare_row, 0, output_buf + *out_row_ctr, 0,
+                       1, size);
+    num_rows = 1;
+    upsample->spare_full = FALSE;
+  } else {
+    /* Figure number of rows to return to caller. */
+    num_rows = 2;
+    /* Not more than the distance to the end of the image. */
+    if (num_rows > upsample->rows_to_go)
+      num_rows = upsample->rows_to_go;
+    /* And not more than what the client can accept: */
+    out_rows_avail -= *out_row_ctr;
+    if (num_rows > out_rows_avail)
+      num_rows = out_rows_avail;
+    /* Create output pointer array for upsampler. */
+    work_ptrs[0] = output_buf[*out_row_ctr];
+    if (num_rows > 1) {
+      work_ptrs[1] = output_buf[*out_row_ctr + 1];
+    } else {
+      work_ptrs[1] = upsample->spare_row;
+      upsample->spare_full = TRUE;
+    }
+    /* Now do the upsampling. */
+    (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr, work_ptrs);
+  }
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (!upsample->spare_full)
+    (*in_row_group_ctr)++;
+}
+
+
+METHODDEF(void)
+merged_1v_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION *in_row_group_ctr,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
+                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+/* 1:1 vertical sampling case: much easier, never need a spare row. */
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+
+  /* Just do the upsampling. */
+  (*upsample->upmethod) (cinfo, input_buf, *in_row_group_ctr,
+                         output_buf + *out_row_ctr);
+  /* Adjust counts */
+  (*out_row_ctr)++;
+  (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by the control routines to do
+ * the actual upsampling/conversion.  One row group is processed per call.
+ *
+ * Note: since we may be writing directly into application-supplied buffers,
+ * we have to be honest about the output width; we can't assume the buffer
+ * has been rounded up to an even width.
+ */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+METHODDEF(void)
+h2v1_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  switch (cinfo->out_color_space) {
+  case JCS_EXT_RGB:
+    extrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                         output_buf);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    extrgbx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_BGR:
+    extbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                         output_buf);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    extbgrx_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    extxbgr_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    extxrgb_h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  default:
+    h2v1_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                  output_buf);
+    break;
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+METHODDEF(void)
+h2v2_merged_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  switch (cinfo->out_color_space) {
+  case JCS_EXT_RGB:
+    extrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                         output_buf);
+    break;
+  case JCS_EXT_RGBX:
+  case JCS_EXT_RGBA:
+    extrgbx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_BGR:
+    extbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                         output_buf);
+    break;
+  case JCS_EXT_BGRX:
+  case JCS_EXT_BGRA:
+    extbgrx_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_XBGR:
+  case JCS_EXT_ABGR:
+    extxbgr_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  case JCS_EXT_XRGB:
+  case JCS_EXT_ARGB:
+    extxrgb_h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                          output_buf);
+    break;
+  default:
+    h2v2_merged_upsample_internal(cinfo, input_buf, in_row_group_ctr,
+                                  output_buf);
+    break;
+  }
+}
+
+
+/*
+ * RGB565 conversion
+ */
+
+#define PACK_SHORT_565_LE(r, g, b) \
+  ((((r) << 8) & 0xF800) | (((g) << 3) & 0x7E0) | ((b) >> 3))
+#define PACK_SHORT_565_BE(r, g, b) \
+  (((r) & 0xF8) | ((g) >> 5) | (((g) << 11) & 0xE000) | (((b) << 5) & 0x1F00))
+
+#define PACK_TWO_PIXELS_LE(l, r)    ((r << 16) | l)
+#define PACK_TWO_PIXELS_BE(l, r)    ((l << 16) | r)
+
+#define WRITE_TWO_PIXELS_LE(addr, pixels) { \
+  ((INT16 *)(addr))[0] = (INT16)(pixels); \
+  ((INT16 *)(addr))[1] = (INT16)((pixels) >> 16); \
+}
+#define WRITE_TWO_PIXELS_BE(addr, pixels) { \
+  ((INT16 *)(addr))[1] = (INT16)(pixels); \
+  ((INT16 *)(addr))[0] = (INT16)((pixels) >> 16); \
+}
+
+#define DITHER_565_R(r, dither)  ((r) + ((dither) & 0xFF))
+#define DITHER_565_G(g, dither)  ((g) + (((dither) & 0xFF) >> 1))
+#define DITHER_565_B(b, dither)  ((b) + ((dither) & 0xFF))
+
+
+/* Declarations for ordered dithering
+ *
+ * We use a 4x4 ordered dither array packed into 32 bits.  This array is
+ * sufficient for dithering RGB888 to RGB565.
+ */
+
+#define DITHER_MASK       0x3
+#define DITHER_ROTATE(x)  ((((x) & 0xFF) << 24) | (((x) >> 8) & 0x00FFFFFF))
+static const JLONG dither_matrix[4] = {
+  0x0008020A,
+  0x0C040E06,
+  0x030B0109,
+  0x0F070D05
+};
+
+
+/* Include inline routines for RGB565 conversion */
+
+#define PACK_SHORT_565  PACK_SHORT_565_LE
+#define PACK_TWO_PIXELS  PACK_TWO_PIXELS_LE
+#define WRITE_TWO_PIXELS  WRITE_TWO_PIXELS_LE
+#define h2v1_merged_upsample_565_internal  h2v1_merged_upsample_565_le
+#define h2v1_merged_upsample_565D_internal  h2v1_merged_upsample_565D_le
+#define h2v2_merged_upsample_565_internal  h2v2_merged_upsample_565_le
+#define h2v2_merged_upsample_565D_internal  h2v2_merged_upsample_565D_le
+#include "jdmrg565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef WRITE_TWO_PIXELS
+#undef h2v1_merged_upsample_565_internal
+#undef h2v1_merged_upsample_565D_internal
+#undef h2v2_merged_upsample_565_internal
+#undef h2v2_merged_upsample_565D_internal
+
+#define PACK_SHORT_565  PACK_SHORT_565_BE
+#define PACK_TWO_PIXELS  PACK_TWO_PIXELS_BE
+#define WRITE_TWO_PIXELS  WRITE_TWO_PIXELS_BE
+#define h2v1_merged_upsample_565_internal  h2v1_merged_upsample_565_be
+#define h2v1_merged_upsample_565D_internal  h2v1_merged_upsample_565D_be
+#define h2v2_merged_upsample_565_internal  h2v2_merged_upsample_565_be
+#define h2v2_merged_upsample_565D_internal  h2v2_merged_upsample_565D_be
+#include "jdmrg565.c"
+#undef PACK_SHORT_565
+#undef PACK_TWO_PIXELS
+#undef WRITE_TWO_PIXELS
+#undef h2v1_merged_upsample_565_internal
+#undef h2v1_merged_upsample_565D_internal
+#undef h2v2_merged_upsample_565_internal
+#undef h2v2_merged_upsample_565D_internal
+
+
+static INLINE boolean is_big_endian(void)
+{
+  int test_value = 1;
+  if (*(char *)&test_value != 1)
+    return TRUE;
+  return FALSE;
+}
+
+
+METHODDEF(void)
+h2v1_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  if (is_big_endian())
+    h2v1_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+  else
+    h2v1_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+}
+
+
+METHODDEF(void)
+h2v1_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  if (is_big_endian())
+    h2v1_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+  else
+    h2v1_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+}
+
+
+METHODDEF(void)
+h2v2_merged_upsample_565(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                         JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  if (is_big_endian())
+    h2v2_merged_upsample_565_be(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+  else
+    h2v2_merged_upsample_565_le(cinfo, input_buf, in_row_group_ctr,
+                                output_buf);
+}
+
+
+METHODDEF(void)
+h2v2_merged_upsample_565D(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                          JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf)
+{
+  if (is_big_endian())
+    h2v2_merged_upsample_565D_be(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+  else
+    h2v2_merged_upsample_565D_le(cinfo, input_buf, in_row_group_ctr,
+                                 output_buf);
+}
+
+
+/*
+ * Module initialization routine for merged upsampling/color conversion.
+ *
+ * NB: this is called under the conditions determined by use_merged_upsample()
+ * in jdmaster.c.  That routine MUST correspond to the actual capabilities
+ * of this module; no safety checks are made here.
+ */
+
+GLOBAL(void)
+_jinit_merged_upsampler(j_decompress_ptr cinfo)
+{
+  my_merged_upsample_ptr upsample;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  upsample = (my_merged_upsample_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_merged_upsampler));
+  cinfo->upsample = (struct jpeg_upsampler *)upsample;
+  upsample->pub.start_pass = start_pass_merged_upsample;
+  upsample->pub.need_context_rows = FALSE;
+
+  upsample->out_row_width = cinfo->output_width * cinfo->out_color_components;
+
+  if (cinfo->max_v_samp_factor == 2) {
+    upsample->pub._upsample = merged_2v_upsample;
+#ifdef WITH_SIMD
+    if (jsimd_can_h2v2_merged_upsample())
+      upsample->upmethod = jsimd_h2v2_merged_upsample;
+    else
+#endif
+      upsample->upmethod = h2v2_merged_upsample;
+    if (cinfo->out_color_space == JCS_RGB565) {
+      if (cinfo->dither_mode != JDITHER_NONE) {
+        upsample->upmethod = h2v2_merged_upsample_565D;
+      } else {
+        upsample->upmethod = h2v2_merged_upsample_565;
+      }
+    }
+    /* Allocate a spare row buffer */
+    upsample->spare_row = (_JSAMPROW)
+      (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                (size_t)(upsample->out_row_width * sizeof(_JSAMPLE)));
+  } else {
+    upsample->pub._upsample = merged_1v_upsample;
+#ifdef WITH_SIMD
+    if (jsimd_can_h2v1_merged_upsample())
+      upsample->upmethod = jsimd_h2v1_merged_upsample;
+    else
+#endif
+      upsample->upmethod = h2v1_merged_upsample;
+    if (cinfo->out_color_space == JCS_RGB565) {
+      if (cinfo->dither_mode != JDITHER_NONE) {
+        upsample->upmethod = h2v1_merged_upsample_565D;
+      } else {
+        upsample->upmethod = h2v1_merged_upsample_565;
+      }
+    }
+    /* No spare row needed */
+    upsample->spare_row = NULL;
+  }
+
+  build_ycc_rgb_table(cinfo);
+}
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jdmerge.h b/thirdparty/libjpeg-turbo/src/jdmerge.h
new file mode 100644
index 00000000000..73cbd605495
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmerge.h
@@ -0,0 +1,48 @@
+/*
+ * jdmerge.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+#ifdef UPSAMPLE_MERGING_SUPPORTED
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;    /* public fields */
+
+  /* Pointer to routine to do actual upsampling/conversion of one row group */
+  void (*upmethod) (j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                    JDIMENSION in_row_group_ctr, _JSAMPARRAY output_buf);
+
+  /* Private state for YCC->RGB conversion */
+  int *Cr_r_tab;                /* => table for Cr to R conversion */
+  int *Cb_b_tab;                /* => table for Cb to B conversion */
+  JLONG *Cr_g_tab;              /* => table for Cr to G conversion */
+  JLONG *Cb_g_tab;              /* => table for Cb to G conversion */
+
+  /* For 2:1 vertical sampling, we produce two output rows at a time.
+   * We need a "spare" row buffer to hold the second output row if the
+   * application provides just a one-row buffer; we also use the spare
+   * to discard the dummy last row if the image height is odd.
+   */
+  _JSAMPROW spare_row;
+  boolean spare_full;           /* T if spare buffer is occupied */
+
+  JDIMENSION out_row_width;     /* samples per output row */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
+} my_merged_upsampler;
+
+typedef my_merged_upsampler *my_merged_upsample_ptr;
+
+#endif /* UPSAMPLE_MERGING_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jdmrg565.c b/thirdparty/libjpeg-turbo/src/jdmrg565.c
new file mode 100644
index 00000000000..0c719b912ce
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmrg565.c
@@ -0,0 +1,355 @@
+/*
+ * jdmrg565.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, Linaro Limited.
+ * Copyright (C) 2014-2015, 2018, 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ */
+
+
+INLINE
+LOCAL(void)
+h2v1_merged_upsample_565_internal(j_decompress_ptr cinfo,
+                                  _JSAMPIMAGE input_buf,
+                                  JDIMENSION in_row_group_ctr,
+                                  _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  JLONG rgb;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = *inptr0++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr0++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = *inptr0;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr = (INT16)rgb;
+  }
+}
+
+
+INLINE
+LOCAL(void)
+h2v1_merged_upsample_565D_internal(j_decompress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf,
+                                   JDIMENSION in_row_group_ctr,
+                                   _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  unsigned int r, g, b;
+  JLONG rgb;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = *inptr0++;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr0++;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr, rgb);
+    outptr += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = *inptr0;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr = (INT16)rgb;
+  }
+}
+
+
+INLINE
+LOCAL(void)
+h2v2_merged_upsample_565_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                                  JDIMENSION in_row_group_ctr,
+                                  _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  unsigned int r, g, b;
+  JLONG rgb;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr * 2];
+  inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = *inptr00++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr00++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+
+    y  = *inptr01++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr01++;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    y  = *inptr00;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr0 = (INT16)rgb;
+
+    y  = *inptr01;
+    r = range_limit[y + cred];
+    g = range_limit[y + cgreen];
+    b = range_limit[y + cblue];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr1 = (INT16)rgb;
+  }
+}
+
+
+INLINE
+LOCAL(void)
+h2v2_merged_upsample_565D_internal(j_decompress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf,
+                                   JDIMENSION in_row_group_ctr,
+                                   _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  JLONG d0 = dither_matrix[cinfo->output_scanline & DITHER_MASK];
+  JLONG d1 = dither_matrix[(cinfo->output_scanline + 1) & DITHER_MASK];
+  unsigned int r, g, b;
+  JLONG rgb;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr * 2];
+  inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = *inptr00++;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr00++;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    d0 = DITHER_ROTATE(d0);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr0, rgb);
+    outptr0 += 4;
+
+    y  = *inptr01++;
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_SHORT_565(r, g, b);
+
+    y  = *inptr01++;
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    d1 = DITHER_ROTATE(d1);
+    rgb = PACK_TWO_PIXELS(rgb, PACK_SHORT_565(r, g, b));
+
+    WRITE_TWO_PIXELS(outptr1, rgb);
+    outptr1 += 4;
+  }
+
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+
+    y  = *inptr00;
+    r = range_limit[DITHER_565_R(y + cred, d0)];
+    g = range_limit[DITHER_565_G(y + cgreen, d0)];
+    b = range_limit[DITHER_565_B(y + cblue, d0)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr0 = (INT16)rgb;
+
+    y  = *inptr01;
+    r = range_limit[DITHER_565_R(y + cred, d1)];
+    g = range_limit[DITHER_565_G(y + cgreen, d1)];
+    b = range_limit[DITHER_565_B(y + cblue, d1)];
+    rgb = PACK_SHORT_565(r, g, b);
+    *(INT16 *)outptr1 = (INT16)rgb;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdmrgext.c b/thirdparty/libjpeg-turbo/src/jdmrgext.c
new file mode 100644
index 00000000000..8139e0a3ed6
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdmrgext.c
@@ -0,0 +1,184 @@
+/*
+ * jdmrgext.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2011, 2015, 2020, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains code for merged upsampling/color conversion.
+ */
+
+
+/* This file is included by jdmerge.c */
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 1:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v1_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                              JDIMENSION in_row_group_ctr,
+                              _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr;
+  _JSAMPROW inptr0, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr0 = input_buf[0][in_row_group_ctr];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr = output_buf[0];
+  /* Loop for each pair of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 2 Y values and emit 2 pixels */
+    y  = *inptr0++;
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr += RGB_PIXELSIZE;
+    y  = *inptr0++;
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = *inptr0;
+    outptr[RGB_RED] =   range_limit[y + cred];
+    outptr[RGB_GREEN] = range_limit[y + cgreen];
+    outptr[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+  }
+}
+
+
+/*
+ * Upsample and color convert for the case of 2:1 horizontal and 2:1 vertical.
+ */
+
+INLINE
+LOCAL(void)
+h2v2_merged_upsample_internal(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                              JDIMENSION in_row_group_ctr,
+                              _JSAMPARRAY output_buf)
+{
+  my_merged_upsample_ptr upsample = (my_merged_upsample_ptr)cinfo->upsample;
+  register int y, cred, cgreen, cblue;
+  int cb, cr;
+  register _JSAMPROW outptr0, outptr1;
+  _JSAMPROW inptr00, inptr01, inptr1, inptr2;
+  JDIMENSION col;
+  /* copy these pointers into registers if possible */
+  register _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *Crrtab = upsample->Cr_r_tab;
+  int *Cbbtab = upsample->Cb_b_tab;
+  JLONG *Crgtab = upsample->Cr_g_tab;
+  JLONG *Cbgtab = upsample->Cb_g_tab;
+  SHIFT_TEMPS
+
+  inptr00 = input_buf[0][in_row_group_ctr * 2];
+  inptr01 = input_buf[0][in_row_group_ctr * 2 + 1];
+  inptr1 = input_buf[1][in_row_group_ctr];
+  inptr2 = input_buf[2][in_row_group_ctr];
+  outptr0 = output_buf[0];
+  outptr1 = output_buf[1];
+  /* Loop for each group of output pixels */
+  for (col = cinfo->output_width >> 1; col > 0; col--) {
+    /* Do the chroma part of the calculation */
+    cb = *inptr1++;
+    cr = *inptr2++;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    /* Fetch 4 Y values and emit 4 pixels */
+    y  = *inptr00++;
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr0 += RGB_PIXELSIZE;
+    y  = *inptr00++;
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr0 += RGB_PIXELSIZE;
+    y  = *inptr01++;
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr1 += RGB_PIXELSIZE;
+    y  = *inptr01++;
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    outptr1 += RGB_PIXELSIZE;
+  }
+  /* If image width is odd, do the last output column separately */
+  if (cinfo->output_width & 1) {
+    cb = *inptr1;
+    cr = *inptr2;
+    cred = Crrtab[cr];
+    cgreen = (int)RIGHT_SHIFT(Cbgtab[cb] + Crgtab[cr], SCALEBITS);
+    cblue = Cbbtab[cb];
+    y  = *inptr00;
+    outptr0[RGB_RED] =   range_limit[y + cred];
+    outptr0[RGB_GREEN] = range_limit[y + cgreen];
+    outptr0[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr0[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+    y  = *inptr01;
+    outptr1[RGB_RED] =   range_limit[y + cred];
+    outptr1[RGB_GREEN] = range_limit[y + cgreen];
+    outptr1[RGB_BLUE] =  range_limit[y + cblue];
+#ifdef RGB_ALPHA
+    outptr1[RGB_ALPHA] = _MAXJSAMPLE;
+#endif
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jdphuff.c b/thirdparty/libjpeg-turbo/src/jdphuff.c
new file mode 100644
index 00000000000..bf97333a34c
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdphuff.c
@@ -0,0 +1,681 @@
+/*
+ * jdphuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015-2016, 2018-2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains Huffman entropy decoding routines for progressive JPEG.
+ *
+ * Much of the complexity here has to do with supporting input suspension.
+ * If the data source module demands suspension, we want to be able to back
+ * up to the start of the current MCU.  To do this, we copy state variables
+ * into local working storage, and update them back to the permanent
+ * storage only upon successful completion of an MCU.
+ *
+ * NOTE: All referenced figures are from
+ * Recommendation ITU-T T.81 (1992) | ISO/IEC 10918-1:1994.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdhuff.h"             /* Declarations shared with jd*huff.c */
+#include <limits.h>
+
+
+#ifdef D_PROGRESSIVE_SUPPORTED
+
+/*
+ * Expanded entropy decoder object for progressive Huffman decoding.
+ *
+ * The savable_state subrecord contains fields that change within an MCU,
+ * but must not be updated permanently until we complete the MCU.
+ */
+
+typedef struct {
+  unsigned int EOBRUN;                  /* remaining EOBs in EOBRUN */
+  int last_dc_val[MAX_COMPS_IN_SCAN];   /* last DC coef for each component */
+} savable_state;
+
+typedef struct {
+  struct jpeg_entropy_decoder pub; /* public fields */
+
+  /* These fields are loaded into local variables at start of each MCU.
+   * In case of suspension, we exit WITHOUT updating them.
+   */
+  bitread_perm_state bitstate;  /* Bit buffer at start of MCU */
+  savable_state saved;          /* Other state at start of MCU */
+
+  /* These fields are NOT loaded into local working state. */
+  unsigned int restarts_to_go;  /* MCUs left in this restart interval */
+
+  /* Pointers to derived tables (these workspaces have image lifespan) */
+  d_derived_tbl *derived_tbls[NUM_HUFF_TBLS];
+
+  d_derived_tbl *ac_derived_tbl; /* active table during an AC scan */
+} phuff_entropy_decoder;
+
+typedef phuff_entropy_decoder *phuff_entropy_ptr;
+
+/* Forward declarations */
+METHODDEF(boolean) decode_mcu_DC_first(j_decompress_ptr cinfo,
+                                       JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_AC_first(j_decompress_ptr cinfo,
+                                       JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_DC_refine(j_decompress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+METHODDEF(boolean) decode_mcu_AC_refine(j_decompress_ptr cinfo,
+                                        JBLOCKROW *MCU_data);
+
+
+/*
+ * Initialize for a Huffman-compressed scan.
+ */
+
+METHODDEF(void)
+start_pass_phuff_decoder(j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  boolean is_DC_band, bad;
+  int ci, coefi, tbl;
+  d_derived_tbl **pdtbl;
+  int *coef_bit_ptr, *prev_coef_bit_ptr;
+  jpeg_component_info *compptr;
+
+  is_DC_band = (cinfo->Ss == 0);
+
+  /* Validate scan parameters */
+  bad = FALSE;
+  if (is_DC_band) {
+    if (cinfo->Se != 0)
+      bad = TRUE;
+  } else {
+    /* need not check Ss/Se < 0 since they came from unsigned bytes */
+    if (cinfo->Ss > cinfo->Se || cinfo->Se >= DCTSIZE2)
+      bad = TRUE;
+    /* AC scans may have only one component */
+    if (cinfo->comps_in_scan != 1)
+      bad = TRUE;
+  }
+  if (cinfo->Ah != 0) {
+    /* Successive approximation refinement scan: must have Al = Ah-1. */
+    if (cinfo->Al != cinfo->Ah - 1)
+      bad = TRUE;
+  }
+  if (cinfo->Al > 13)           /* need not check for < 0 */
+    bad = TRUE;
+  /* Arguably the maximum Al value should be less than 13 for 8-bit precision,
+   * but the spec doesn't say so, and we try to be liberal about what we
+   * accept.  Note: large Al values could result in out-of-range DC
+   * coefficients during early scans, leading to bizarre displays due to
+   * overflows in the IDCT math.  But we won't crash.
+   */
+  if (bad)
+    ERREXIT4(cinfo, JERR_BAD_PROGRESSION,
+             cinfo->Ss, cinfo->Se, cinfo->Ah, cinfo->Al);
+  /* Update progression status, and verify that scan order is legal.
+   * Note that inter-scan inconsistencies are treated as warnings
+   * not fatal errors ... not clear if this is right way to behave.
+   */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    int cindex = cinfo->cur_comp_info[ci]->component_index;
+    coef_bit_ptr = &cinfo->coef_bits[cindex][0];
+    prev_coef_bit_ptr = &cinfo->coef_bits[cindex + cinfo->num_components][0];
+    if (!is_DC_band && coef_bit_ptr[0] < 0) /* AC without prior DC scan */
+      WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, 0);
+    for (coefi = MIN(cinfo->Ss, 1); coefi <= MAX(cinfo->Se, 9); coefi++) {
+      if (cinfo->input_scan_number > 1)
+        prev_coef_bit_ptr[coefi] = coef_bit_ptr[coefi];
+      else
+        prev_coef_bit_ptr[coefi] = 0;
+    }
+    for (coefi = cinfo->Ss; coefi <= cinfo->Se; coefi++) {
+      int expected = (coef_bit_ptr[coefi] < 0) ? 0 : coef_bit_ptr[coefi];
+      if (cinfo->Ah != expected)
+        WARNMS2(cinfo, JWRN_BOGUS_PROGRESSION, cindex, coefi);
+      coef_bit_ptr[coefi] = cinfo->Al;
+    }
+  }
+
+  /* Select MCU decoding routine */
+  if (cinfo->Ah == 0) {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_first;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_first;
+  } else {
+    if (is_DC_band)
+      entropy->pub.decode_mcu = decode_mcu_DC_refine;
+    else
+      entropy->pub.decode_mcu = decode_mcu_AC_refine;
+  }
+
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++) {
+    compptr = cinfo->cur_comp_info[ci];
+    /* Make sure requested tables are present, and compute derived tables.
+     * We may build same derived table more than once, but it's not expensive.
+     */
+    if (is_DC_band) {
+      if (cinfo->Ah == 0) {     /* DC refinement needs no table */
+        tbl = compptr->dc_tbl_no;
+        pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
+        jpeg_make_d_derived_tbl(cinfo, TRUE, tbl, pdtbl);
+      }
+    } else {
+      tbl = compptr->ac_tbl_no;
+      pdtbl = (d_derived_tbl **)(entropy->derived_tbls) + tbl;
+      jpeg_make_d_derived_tbl(cinfo, FALSE, tbl, pdtbl);
+      /* remember the single active table */
+      entropy->ac_derived_tbl = entropy->derived_tbls[tbl];
+    }
+    /* Initialize DC predictions to 0 */
+    entropy->saved.last_dc_val[ci] = 0;
+  }
+
+  /* Initialize bitread state variables */
+  entropy->bitstate.bits_left = 0;
+  entropy->bitstate.get_buffer = 0; /* unnecessary, but keeps Purify quiet */
+  entropy->pub.insufficient_data = FALSE;
+
+  /* Initialize private state variables */
+  entropy->saved.EOBRUN = 0;
+
+  /* Initialize restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+}
+
+
+/*
+ * Figure F.12: extend sign bit.
+ * On some machines, a shift and add will be faster than a table lookup.
+ */
+
+#define AVOID_TABLES
+#ifdef AVOID_TABLES
+
+#define NEG_1  ((unsigned)-1)
+#define HUFF_EXTEND(x, s) \
+  ((x) < (1 << ((s) - 1)) ? (x) + (((NEG_1) << (s)) + 1) : (x))
+
+#else
+
+#define HUFF_EXTEND(x, s) \
+  ((x) < extend_test[s] ? (x) + extend_offset[s] : (x))
+
+static const int extend_test[16] = {   /* entry n is 2**(n-1) */
+  0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080,
+  0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000
+};
+
+static const int extend_offset[16] = { /* entry n is (-1 << n) + 1 */
+  0, ((-1) << 1) + 1, ((-1) << 2) + 1, ((-1) << 3) + 1, ((-1) << 4) + 1,
+  ((-1) << 5) + 1, ((-1) << 6) + 1, ((-1) << 7) + 1, ((-1) << 8) + 1,
+  ((-1) << 9) + 1, ((-1) << 10) + 1, ((-1) << 11) + 1, ((-1) << 12) + 1,
+  ((-1) << 13) + 1, ((-1) << 14) + 1, ((-1) << 15) + 1
+};
+
+#endif /* AVOID_TABLES */
+
+
+/*
+ * Check for a restart marker & resynchronize decoder.
+ * Returns FALSE if must suspend.
+ */
+
+LOCAL(boolean)
+process_restart(j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  int ci;
+
+  /* Throw away any unused bits remaining in bit buffer; */
+  /* include any full bytes in next_marker's count of discarded bytes */
+  cinfo->marker->discarded_bytes += entropy->bitstate.bits_left / 8;
+  entropy->bitstate.bits_left = 0;
+
+  /* Advance past the RSTn marker */
+  if (!(*cinfo->marker->read_restart_marker) (cinfo))
+    return FALSE;
+
+  /* Re-initialize DC predictions to 0 */
+  for (ci = 0; ci < cinfo->comps_in_scan; ci++)
+    entropy->saved.last_dc_val[ci] = 0;
+  /* Re-init EOB run count, too */
+  entropy->saved.EOBRUN = 0;
+
+  /* Reset restart counter */
+  entropy->restarts_to_go = cinfo->restart_interval;
+
+  /* Reset out-of-data flag, unless read_restart_marker left us smack up
+   * against a marker.  In that case we will end up treating the next data
+   * segment as empty, and we can avoid producing bogus output pixels by
+   * leaving the flag set.
+   */
+  if (cinfo->unread_marker == 0)
+    entropy->pub.insufficient_data = FALSE;
+
+  return TRUE;
+}
+
+
+/*
+ * Huffman MCU decoding.
+ * Each of these routines decodes and returns one MCU's worth of
+ * Huffman-compressed coefficients.
+ * The coefficients are reordered from zigzag order into natural array order,
+ * but are not dequantized.
+ *
+ * The i'th block of the MCU is stored into the block pointed to by
+ * MCU_data[i].  WE ASSUME THIS AREA IS INITIALLY ZEROED BY THE CALLER.
+ *
+ * We return FALSE if data source requested suspension.  In that case no
+ * changes have been made to permanent state.  (Exception: some output
+ * coefficients may already have been assigned.  This is harmless for
+ * spectral selection, since we'll just re-assign them on the next call.
+ * Successive approximation AC refinement has to be more careful, however.)
+ */
+
+/*
+ * MCU decoding for DC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  int Al = cinfo->Al;
+  register int s, r;
+  int blkn, ci;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  savable_state state;
+  d_derived_tbl *tbl;
+  jpeg_component_info *compptr;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!process_restart(cinfo))
+        return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (!entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    state = entropy->saved;
+
+    /* Outer loop handles each block in the MCU */
+
+    for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+      block = MCU_data[blkn];
+      ci = cinfo->MCU_membership[blkn];
+      compptr = cinfo->cur_comp_info[ci];
+      tbl = entropy->derived_tbls[compptr->dc_tbl_no];
+
+      /* Decode a single block's worth of coefficients */
+
+      /* Section F.2.2.1: decode the DC coefficient difference */
+      HUFF_DECODE(s, br_state, tbl, return FALSE, label1);
+      if (s) {
+        CHECK_BIT_BUFFER(br_state, s, return FALSE);
+        r = GET_BITS(s);
+        s = HUFF_EXTEND(r, s);
+      }
+
+      /* Convert DC difference to actual value, update last_dc_val */
+      if ((state.last_dc_val[ci] >= 0 &&
+           s > INT_MAX - state.last_dc_val[ci]) ||
+          (state.last_dc_val[ci] < 0 && s < INT_MIN - state.last_dc_val[ci]))
+        ERREXIT(cinfo, JERR_BAD_DCT_COEF);
+      s += state.last_dc_val[ci];
+      state.last_dc_val[ci] = s;
+      /* Scale and output the coefficient (assumes jpeg_natural_order[0]=0) */
+      (*block)[0] = (JCOEF)LEFT_SHIFT(s, Al);
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    entropy->saved = state;
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC initial scan (either spectral selection,
+ * or first pass of successive approximation).
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_first(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  int Se = cinfo->Se;
+  int Al = cinfo->Al;
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+  d_derived_tbl *tbl;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!process_restart(cinfo))
+        return FALSE;
+  }
+
+  /* If we've run out of data, just leave the MCU set to zeroes.
+   * This way, we return uniform gray for the remainder of the segment.
+   */
+  if (!entropy->pub.insufficient_data) {
+
+    /* Load up working state.
+     * We can avoid loading/saving bitread state if in an EOB run.
+     */
+    EOBRUN = entropy->saved.EOBRUN;     /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+
+    if (EOBRUN > 0)             /* if it's a band of zeroes... */
+      EOBRUN--;                 /* ...process it now (we do nothing) */
+    else {
+      BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+      block = MCU_data[0];
+      tbl = entropy->ac_derived_tbl;
+
+      for (k = cinfo->Ss; k <= Se; k++) {
+        HUFF_DECODE(s, br_state, tbl, return FALSE, label2);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          k += r;
+          CHECK_BIT_BUFFER(br_state, s, return FALSE);
+          r = GET_BITS(s);
+          s = HUFF_EXTEND(r, s);
+          /* Scale and output coefficient in natural (dezigzagged) order */
+          (*block)[jpeg_natural_order[k]] = (JCOEF)LEFT_SHIFT(s, Al);
+        } else {
+          if (r == 15) {        /* ZRL */
+            k += 15;            /* skip 15 zeroes in band */
+          } else {              /* EOBr, run length is 2^r + appended bits */
+            EOBRUN = 1 << r;
+            if (r) {            /* EOBr, r > 0 */
+              CHECK_BIT_BUFFER(br_state, r, return FALSE);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            EOBRUN--;           /* this band is processed at this moment */
+            break;              /* force end-of-band */
+          }
+        }
+      }
+
+      BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    }
+
+    /* Completed MCU, so update state */
+    entropy->saved.EOBRUN = EOBRUN;     /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for DC successive approximation refinement scan.
+ * Note: we assume such scans can be multi-component, although the spec
+ * is not very clear on the point.
+ */
+
+METHODDEF(boolean)
+decode_mcu_DC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  int p1 = 1 << cinfo->Al;      /* 1 in the bit position being coded */
+  int blkn;
+  JBLOCKROW block;
+  BITREAD_STATE_VARS;
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!process_restart(cinfo))
+        return FALSE;
+  }
+
+  /* Not worth the cycles to check insufficient_data here,
+   * since we will not change the data anyway if we read zeroes.
+   */
+
+  /* Load up working state */
+  BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+
+  /* Outer loop handles each block in the MCU */
+
+  for (blkn = 0; blkn < cinfo->blocks_in_MCU; blkn++) {
+    block = MCU_data[blkn];
+
+    /* Encoded data is simply the next bit of the two's-complement DC value */
+    CHECK_BIT_BUFFER(br_state, 1, return FALSE);
+    if (GET_BITS(1))
+      (*block)[0] |= p1;
+    /* Note: since we use |=, repeating the assignment later is safe */
+  }
+
+  /* Completed MCU, so update state */
+  BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+
+  /* Account for restart interval (no-op if not using restarts) */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+}
+
+
+/*
+ * MCU decoding for AC successive approximation refinement scan.
+ */
+
+METHODDEF(boolean)
+decode_mcu_AC_refine(j_decompress_ptr cinfo, JBLOCKROW *MCU_data)
+{
+  phuff_entropy_ptr entropy = (phuff_entropy_ptr)cinfo->entropy;
+  int Se = cinfo->Se;
+  int p1 = 1 << cinfo->Al;        /* 1 in the bit position being coded */
+  int m1 = (NEG_1) << cinfo->Al;  /* -1 in the bit position being coded */
+  register int s, k, r;
+  unsigned int EOBRUN;
+  JBLOCKROW block;
+  JCOEFPTR thiscoef;
+  BITREAD_STATE_VARS;
+  d_derived_tbl *tbl;
+  int num_newnz;
+  int newnz_pos[DCTSIZE2];
+
+  /* Process restart marker if needed; may have to suspend */
+  if (cinfo->restart_interval) {
+    if (entropy->restarts_to_go == 0)
+      if (!process_restart(cinfo))
+        return FALSE;
+  }
+
+  /* If we've run out of data, don't modify the MCU.
+   */
+  if (!entropy->pub.insufficient_data) {
+
+    /* Load up working state */
+    BITREAD_LOAD_STATE(cinfo, entropy->bitstate);
+    EOBRUN = entropy->saved.EOBRUN; /* only part of saved state we need */
+
+    /* There is always only one block per MCU */
+    block = MCU_data[0];
+    tbl = entropy->ac_derived_tbl;
+
+    /* If we are forced to suspend, we must undo the assignments to any newly
+     * nonzero coefficients in the block, because otherwise we'd get confused
+     * next time about which coefficients were already nonzero.
+     * But we need not undo addition of bits to already-nonzero coefficients;
+     * instead, we can test the current bit to see if we already did it.
+     */
+    num_newnz = 0;
+
+    /* initialize coefficient loop counter to start of band */
+    k = cinfo->Ss;
+
+    if (EOBRUN == 0) {
+      for (; k <= Se; k++) {
+        HUFF_DECODE(s, br_state, tbl, goto undoit, label3);
+        r = s >> 4;
+        s &= 15;
+        if (s) {
+          if (s != 1)           /* size of new coef should always be 1 */
+            WARNMS(cinfo, JWRN_HUFF_BAD_CODE);
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1))
+            s = p1;             /* newly nonzero coef is positive */
+          else
+            s = m1;             /* newly nonzero coef is negative */
+        } else {
+          if (r != 15) {
+            EOBRUN = 1 << r;    /* EOBr, run length is 2^r + appended bits */
+            if (r) {
+              CHECK_BIT_BUFFER(br_state, r, goto undoit);
+              r = GET_BITS(r);
+              EOBRUN += r;
+            }
+            break;              /* rest of block is handled by EOB logic */
+          }
+          /* note s = 0 for processing ZRL */
+        }
+        /* Advance over already-nonzero coefs and r still-zero coefs,
+         * appending correction bits to the nonzeroes.  A correction bit is 1
+         * if the absolute value of the coefficient must be increased.
+         */
+        do {
+          thiscoef = *block + jpeg_natural_order[k];
+          if (*thiscoef != 0) {
+            CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+            if (GET_BITS(1)) {
+              if ((*thiscoef & p1) == 0) { /* do nothing if already set it */
+                if (*thiscoef >= 0)
+                  *thiscoef += (JCOEF)p1;
+                else
+                  *thiscoef += (JCOEF)m1;
+              }
+            }
+          } else {
+            if (--r < 0)
+              break;            /* reached target zero coefficient */
+          }
+          k++;
+        } while (k <= Se);
+        if (s) {
+          int pos = jpeg_natural_order[k];
+          /* Output newly nonzero coefficient */
+          (*block)[pos] = (JCOEF)s;
+          /* Remember its position in case we have to suspend */
+          newnz_pos[num_newnz++] = pos;
+        }
+      }
+    }
+
+    if (EOBRUN > 0) {
+      /* Scan any remaining coefficient positions after the end-of-band
+       * (the last newly nonzero coefficient, if any).  Append a correction
+       * bit to each already-nonzero coefficient.  A correction bit is 1
+       * if the absolute value of the coefficient must be increased.
+       */
+      for (; k <= Se; k++) {
+        thiscoef = *block + jpeg_natural_order[k];
+        if (*thiscoef != 0) {
+          CHECK_BIT_BUFFER(br_state, 1, goto undoit);
+          if (GET_BITS(1)) {
+            if ((*thiscoef & p1) == 0) { /* do nothing if already changed it */
+              if (*thiscoef >= 0)
+                *thiscoef += (JCOEF)p1;
+              else
+                *thiscoef += (JCOEF)m1;
+            }
+          }
+        }
+      }
+      /* Count one block completed in EOB run */
+      EOBRUN--;
+    }
+
+    /* Completed MCU, so update state */
+    BITREAD_SAVE_STATE(cinfo, entropy->bitstate);
+    entropy->saved.EOBRUN = EOBRUN; /* only part of saved state we need */
+  }
+
+  /* Account for restart interval (no-op if not using restarts) */
+  if (cinfo->restart_interval)
+    entropy->restarts_to_go--;
+
+  return TRUE;
+
+undoit:
+  /* Re-zero any output coefficients that we made newly nonzero */
+  while (num_newnz > 0)
+    (*block)[newnz_pos[--num_newnz]] = 0;
+
+  return FALSE;
+}
+
+
+/*
+ * Module initialization routine for progressive Huffman entropy decoding.
+ */
+
+GLOBAL(void)
+jinit_phuff_decoder(j_decompress_ptr cinfo)
+{
+  phuff_entropy_ptr entropy;
+  int *coef_bit_ptr;
+  int ci, i;
+
+  entropy = (phuff_entropy_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(phuff_entropy_decoder));
+  cinfo->entropy = (struct jpeg_entropy_decoder *)entropy;
+  entropy->pub.start_pass = start_pass_phuff_decoder;
+
+  /* Mark derived tables unallocated */
+  for (i = 0; i < NUM_HUFF_TBLS; i++) {
+    entropy->derived_tbls[i] = NULL;
+  }
+
+  /* Create progression status table */
+  cinfo->coef_bits = (int (*)[DCTSIZE2])
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                cinfo->num_components * 2 * DCTSIZE2 *
+                                sizeof(int));
+  coef_bit_ptr = &cinfo->coef_bits[0][0];
+  for (ci = 0; ci < cinfo->num_components; ci++)
+    for (i = 0; i < DCTSIZE2; i++)
+      *coef_bit_ptr++ = -1;
+}
+
+#endif /* D_PROGRESSIVE_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jdpostct.c b/thirdparty/libjpeg-turbo/src/jdpostct.c
new file mode 100644
index 00000000000..9bc6210d17e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdpostct.c
@@ -0,0 +1,328 @@
+/*
+ * jdpostct.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022-2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the decompression postprocessing controller.
+ * This controller manages the upsampling, color conversion, and color
+ * quantization/reduction steps; specifically, it controls the buffering
+ * between upsample/color conversion and color quantization/reduction.
+ *
+ * If no color quantization/reduction is required, then this module has no
+ * work to do, and it just hands off to the upsample/color conversion code.
+ * An integrated upsample/convert/quantize process would replace this module
+ * entirely.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/* Private buffer controller object */
+
+typedef struct {
+  struct jpeg_d_post_controller pub; /* public fields */
+
+  /* Color quantization source buffer: this holds output data from
+   * the upsample/color conversion step to be passed to the quantizer.
+   * For two-pass color quantization, we need a full-image buffer;
+   * for one-pass operation, a strip buffer is sufficient.
+   */
+  jvirt_sarray_ptr whole_image; /* virtual array, or NULL if one-pass */
+  _JSAMPARRAY buffer;           /* strip buffer, or current strip of virtual */
+  JDIMENSION strip_height;      /* buffer size in rows */
+  /* for two-pass mode only: */
+  JDIMENSION starting_row;      /* row # of first row in current strip */
+  JDIMENSION next_row;          /* index of next row to fill/empty in strip */
+} my_post_controller;
+
+typedef my_post_controller *my_post_ptr;
+
+
+/* Forward declarations */
+#if BITS_IN_JSAMPLE != 16
+METHODDEF(void) post_process_1pass(j_decompress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf,
+                                   JDIMENSION *in_row_group_ctr,
+                                   JDIMENSION in_row_groups_avail,
+                                   _JSAMPARRAY output_buf,
+                                   JDIMENSION *out_row_ctr,
+                                   JDIMENSION out_rows_avail);
+#endif
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
+METHODDEF(void) post_process_prepass(j_decompress_ptr cinfo,
+                                     _JSAMPIMAGE input_buf,
+                                     JDIMENSION *in_row_group_ctr,
+                                     JDIMENSION in_row_groups_avail,
+                                     _JSAMPARRAY output_buf,
+                                     JDIMENSION *out_row_ctr,
+                                     JDIMENSION out_rows_avail);
+METHODDEF(void) post_process_2pass(j_decompress_ptr cinfo,
+                                   _JSAMPIMAGE input_buf,
+                                   JDIMENSION *in_row_group_ctr,
+                                   JDIMENSION in_row_groups_avail,
+                                   _JSAMPARRAY output_buf,
+                                   JDIMENSION *out_row_ctr,
+                                   JDIMENSION out_rows_avail);
+#endif
+
+
+/*
+ * Initialize for a processing pass.
+ */
+
+METHODDEF(void)
+start_pass_dpost(j_decompress_ptr cinfo, J_BUF_MODE pass_mode)
+{
+  my_post_ptr post = (my_post_ptr)cinfo->post;
+
+  switch (pass_mode) {
+  case JBUF_PASS_THRU:
+#if BITS_IN_JSAMPLE != 16
+    if (cinfo->quantize_colors) {
+      /* Single-pass processing with color quantization. */
+      post->pub._post_process_data = post_process_1pass;
+      /* We could be doing buffered-image output before starting a 2-pass
+       * color quantization; in that case, jinit_d_post_controller did not
+       * allocate a strip buffer.  Use the virtual-array buffer as workspace.
+       */
+      if (post->buffer == NULL) {
+        post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+          ((j_common_ptr)cinfo, post->whole_image,
+           (JDIMENSION)0, post->strip_height, TRUE);
+      }
+    } else
+#endif
+    {
+      /* For single-pass processing without color quantization,
+       * I have no work to do; just call the upsampler directly.
+       */
+      post->pub._post_process_data = cinfo->upsample->_upsample;
+    }
+    break;
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
+  case JBUF_SAVE_AND_PASS:
+    /* First pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub._post_process_data = post_process_prepass;
+    break;
+  case JBUF_CRANK_DEST:
+    /* Second pass of 2-pass quantization */
+    if (post->whole_image == NULL)
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    post->pub._post_process_data = post_process_2pass;
+    break;
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
+  default:
+    ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+    break;
+  }
+  post->starting_row = post->next_row = 0;
+}
+
+
+/*
+ * Process some data in the one-pass (strip buffer) case.
+ * This is used for color precision reduction as well as one-pass quantization.
+ */
+
+#if BITS_IN_JSAMPLE != 16
+
+METHODDEF(void)
+post_process_1pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION *in_row_group_ctr,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
+                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr)cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Fill the buffer, but not more than what we can dump out in one go. */
+  /* Note we rely on the upsampler to detect bottom of image. */
+  max_rows = out_rows_avail - *out_row_ctr;
+  if (max_rows > post->strip_height)
+    max_rows = post->strip_height;
+  num_rows = 0;
+  (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
+                                 in_row_groups_avail, post->buffer, &num_rows,
+                                 max_rows);
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer,
+                                        output_buf + *out_row_ctr,
+                                        (int)num_rows);
+  *out_row_ctr += num_rows;
+}
+
+#endif
+
+
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
+
+/*
+ * Process some data in the first pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_prepass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                     JDIMENSION *in_row_group_ctr,
+                     JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
+                     JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr)cinfo->post;
+  JDIMENSION old_next_row, num_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+        ((j_common_ptr)cinfo, post->whole_image,
+         post->starting_row, post->strip_height, TRUE);
+  }
+
+  /* Upsample some data (up to a strip height's worth). */
+  old_next_row = post->next_row;
+  (*cinfo->upsample->_upsample) (cinfo, input_buf, in_row_group_ctr,
+                                 in_row_groups_avail, post->buffer,
+                                 &post->next_row, post->strip_height);
+
+  /* Allow quantizer to scan new data.  No data is emitted, */
+  /* but we advance out_row_ctr so outer loop can tell when we're done. */
+  if (post->next_row > old_next_row) {
+    num_rows = post->next_row - old_next_row;
+    (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + old_next_row,
+                                          (_JSAMPARRAY)NULL, (int)num_rows);
+    *out_row_ctr += num_rows;
+  }
+
+  /* Advance if we filled the strip. */
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+
+/*
+ * Process some data in the second pass of 2-pass quantization.
+ */
+
+METHODDEF(void)
+post_process_2pass(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+                   JDIMENSION *in_row_group_ctr,
+                   JDIMENSION in_row_groups_avail, _JSAMPARRAY output_buf,
+                   JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail)
+{
+  my_post_ptr post = (my_post_ptr)cinfo->post;
+  JDIMENSION num_rows, max_rows;
+
+  /* Reposition virtual buffer if at start of strip. */
+  if (post->next_row == 0) {
+    post->buffer = (_JSAMPARRAY)(*cinfo->mem->access_virt_sarray)
+        ((j_common_ptr)cinfo, post->whole_image,
+         post->starting_row, post->strip_height, FALSE);
+  }
+
+  /* Determine number of rows to emit. */
+  num_rows = post->strip_height - post->next_row; /* available in strip */
+  max_rows = out_rows_avail - *out_row_ctr; /* available in output area */
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+  /* We have to check bottom of image here, can't depend on upsampler. */
+  max_rows = cinfo->output_height - post->starting_row;
+  if (num_rows > max_rows)
+    num_rows = max_rows;
+
+  /* Quantize and emit data. */
+  (*cinfo->cquantize->_color_quantize) (cinfo, post->buffer + post->next_row,
+                                        output_buf + *out_row_ctr,
+                                        (int)num_rows);
+  *out_row_ctr += num_rows;
+
+  /* Advance if we filled the strip. */
+  post->next_row += num_rows;
+  if (post->next_row >= post->strip_height) {
+    post->starting_row += post->strip_height;
+    post->next_row = 0;
+  }
+}
+
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
+
+
+/*
+ * Initialize postprocessing controller.
+ */
+
+GLOBAL(void)
+_jinit_d_post_controller(j_decompress_ptr cinfo, boolean need_full_buffer)
+{
+  my_post_ptr post;
+
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  post = (my_post_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_post_controller));
+  cinfo->post = (struct jpeg_d_post_controller *)post;
+  post->pub.start_pass = start_pass_dpost;
+  post->whole_image = NULL;     /* flag for no virtual arrays */
+  post->buffer = NULL;          /* flag for no strip buffer */
+
+  /* Create the quantization buffer, if needed */
+  if (cinfo->quantize_colors) {
+#if BITS_IN_JSAMPLE != 16
+    /* The buffer strip height is max_v_samp_factor, which is typically
+     * an efficient number of rows for upsampling to return.
+     * (In the presence of output rescaling, we might want to be smarter?)
+     */
+    post->strip_height = (JDIMENSION)cinfo->max_v_samp_factor;
+    if (need_full_buffer) {
+      /* Two-pass color quantization: need full-image storage. */
+      /* We round up the number of rows to a multiple of the strip height. */
+#ifdef QUANT_2PASS_SUPPORTED
+      post->whole_image = (*cinfo->mem->request_virt_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE, FALSE,
+         cinfo->output_width * cinfo->out_color_components,
+         (JDIMENSION)jround_up((long)cinfo->output_height,
+                               (long)post->strip_height),
+         post->strip_height);
+#else
+      ERREXIT(cinfo, JERR_BAD_BUFFER_MODE);
+#endif /* QUANT_2PASS_SUPPORTED */
+    } else {
+      /* One-pass color quantization: just make a strip buffer. */
+      post->buffer = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+         cinfo->output_width * cinfo->out_color_components,
+         post->strip_height);
+    }
+#else
+    ERREXIT(cinfo, JERR_NOTIMPL);
+#endif
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdsample.c b/thirdparty/libjpeg-turbo/src/jdsample.c
new file mode 100644
index 00000000000..e5a127de42b
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdsample.c
@@ -0,0 +1,553 @@
+/*
+ * jdsample.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2010, 2015-2016, 2022, 2024, D. R. Commander.
+ * Copyright (C) 2014, MIPS Technologies, Inc., California.
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2019-2020, Arm Limited.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains upsampling routines.
+ *
+ * Upsampling input data is counted in "row groups".  A row group
+ * is defined to be (v_samp_factor * DCT_scaled_size / min_DCT_scaled_size)
+ * sample rows of each component.  Upsampling will normally produce
+ * max_v_samp_factor pixel rows from each row group (but this could vary
+ * if the upsampler is applying a scale factor of its own).
+ *
+ * An excellent reference for image resampling is
+ *   Digital Image Warping, George Wolberg, 1990.
+ *   Pub. by IEEE Computer Society Press, Los Alamitos, CA. ISBN 0-8186-8944-7.
+ */
+
+#include "jinclude.h"
+#include "jdsample.h"
+#include "jsimd.h"
+#include "jpegapicomp.h"
+
+
+
+#if BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED)
+
+/*
+ * Initialize for an upsampling pass.
+ */
+
+METHODDEF(void)
+start_pass_upsample(j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+
+  /* Mark the conversion buffer empty */
+  upsample->next_row_out = cinfo->max_v_samp_factor;
+  /* Initialize total-height counter for detecting bottom of image */
+  upsample->rows_to_go = cinfo->output_height;
+}
+
+
+/*
+ * Control routine to do upsampling (and color conversion).
+ *
+ * In this version we upsample each component independently.
+ * We upsample one row group into the conversion buffer, then apply
+ * color conversion a row at a time.
+ */
+
+METHODDEF(void)
+sep_upsample(j_decompress_ptr cinfo, _JSAMPIMAGE input_buf,
+             JDIMENSION *in_row_group_ctr, JDIMENSION in_row_groups_avail,
+             _JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+             JDIMENSION out_rows_avail)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  int ci;
+  jpeg_component_info *compptr;
+  JDIMENSION num_rows;
+
+  /* Fill the conversion buffer, if it's empty */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor) {
+    for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+         ci++, compptr++) {
+      /* Invoke per-component upsample method.  Notice we pass a POINTER
+       * to color_buf[ci], so that fullsize_upsample can change it.
+       */
+      (*upsample->methods[ci]) (cinfo, compptr,
+        input_buf[ci] + (*in_row_group_ctr * upsample->rowgroup_height[ci]),
+        upsample->color_buf + ci);
+    }
+    upsample->next_row_out = 0;
+  }
+
+  /* Color-convert and emit rows */
+
+  /* How many we have in the buffer: */
+  num_rows = (JDIMENSION)(cinfo->max_v_samp_factor - upsample->next_row_out);
+  /* Not more than the distance to the end of the image.  Need this test
+   * in case the image height is not a multiple of max_v_samp_factor:
+   */
+  if (num_rows > upsample->rows_to_go)
+    num_rows = upsample->rows_to_go;
+  /* And not more than what the client can accept: */
+  out_rows_avail -= *out_row_ctr;
+  if (num_rows > out_rows_avail)
+    num_rows = out_rows_avail;
+
+  (*cinfo->cconvert->_color_convert) (cinfo, upsample->color_buf,
+                                      (JDIMENSION)upsample->next_row_out,
+                                      output_buf + *out_row_ctr,
+                                      (int)num_rows);
+
+  /* Adjust counts */
+  *out_row_ctr += num_rows;
+  upsample->rows_to_go -= num_rows;
+  upsample->next_row_out += num_rows;
+  /* When the buffer is emptied, declare this input row group consumed */
+  if (upsample->next_row_out >= cinfo->max_v_samp_factor)
+    (*in_row_group_ctr)++;
+}
+
+
+/*
+ * These are the routines invoked by sep_upsample to upsample pixel values
+ * of a single component.  One row group is processed per call.
+ */
+
+
+/*
+ * For full-size components, we just make color_buf[ci] point at the
+ * input buffer, and thus avoid copying any data.  Note that this is
+ * safe only because sep_upsample doesn't declare the input row group
+ * "consumed" until we are done color converting and emitting it.
+ */
+
+METHODDEF(void)
+fullsize_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                  _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  *output_data_ptr = input_data;
+}
+
+
+/*
+ * This is a no-op version used for "uninteresting" components.
+ * These components will not be referenced by color conversion.
+ */
+
+METHODDEF(void)
+noop_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  *output_data_ptr = NULL;      /* safety check */
+}
+
+
+/*
+ * This version handles any integral sampling ratios.
+ * This is not used for typical JPEG files, so it need not be fast.
+ * Nor, for that matter, is it particularly accurate: the algorithm is
+ * simple replication of the input pixel onto the corresponding output
+ * pixels.  The hi-falutin sampling literature refers to this as a
+ * "box filter".  A box filter tends to introduce visible artifacts,
+ * so if you are actually going to use 3:1 or 4:1 sampling ratios
+ * you would be well advised to improve this code.
+ */
+
+METHODDEF(void)
+int_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+             _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  my_upsample_ptr upsample = (my_upsample_ptr)cinfo->upsample;
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
+  register int h;
+  _JSAMPROW outend;
+  int h_expand, v_expand;
+  int inrow, outrow;
+
+  h_expand = upsample->h_expand[compptr->component_index];
+  v_expand = upsample->v_expand[compptr->component_index];
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    /* Generate one output row with proper horizontal expansion */
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;
+      for (h = h_expand; h > 0; h--) {
+        *outptr++ = invalue;
+      }
+    }
+    /* Generate any additional output rows by duplicating the first one */
+    if (v_expand > 1) {
+      _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1,
+                         v_expand - 1, cinfo->output_width);
+    }
+    inrow++;
+    outrow += v_expand;
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 1:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v1_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
+  _JSAMPROW outend;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+  }
+}
+
+
+/*
+ * Fast processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * It's still a box filter.
+ */
+
+METHODDEF(void)
+h2v2_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+              _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register _JSAMPLE invalue;
+  _JSAMPROW outend;
+  int inrow, outrow;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    inptr = input_data[inrow];
+    outptr = output_data[outrow];
+    outend = outptr + cinfo->output_width;
+    while (outptr < outend) {
+      invalue = *inptr++;
+      *outptr++ = invalue;
+      *outptr++ = invalue;
+    }
+    _jcopy_sample_rows(output_data, outrow, output_data, outrow + 1, 1,
+                       cinfo->output_width);
+    inrow++;
+    outrow += 2;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 1:1 vertical.
+ *
+ * The upsampling algorithm is linear interpolation between pixel centers,
+ * also known as a "triangle filter".  This is a good compromise between
+ * speed and visual quality.  The centers of the output pixels are 1/4 and 3/4
+ * of the way between input pixel centers.
+ *
+ * A note about the "bias" calculations: when rounding fractional values to
+ * integer, we do not want to always round 0.5 up to the next integer.
+ * If we did that, we'd introduce a noticeable bias towards larger values.
+ * Instead, this code is arranged so that 0.5 will be rounded up or down at
+ * alternate pixel locations (a simple ordered dither pattern).
+ */
+
+METHODDEF(void)
+h2v1_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr, outptr;
+  register int invalue;
+  register JDIMENSION colctr;
+  int inrow;
+
+  for (inrow = 0; inrow < cinfo->max_v_samp_factor; inrow++) {
+    inptr = input_data[inrow];
+    outptr = output_data[inrow];
+    /* Special case for first column */
+    invalue = *inptr++;
+    *outptr++ = (_JSAMPLE)invalue;
+    *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[0] + 2) >> 2);
+
+    for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+      /* General case: 3/4 * nearer pixel + 1/4 * further pixel */
+      invalue = (*inptr++) * 3;
+      *outptr++ = (_JSAMPLE)((invalue + inptr[-2] + 1) >> 2);
+      *outptr++ = (_JSAMPLE)((invalue + inptr[0] + 2) >> 2);
+    }
+
+    /* Special case for last column */
+    invalue = *inptr;
+    *outptr++ = (_JSAMPLE)((invalue * 3 + inptr[-1] + 1) >> 2);
+    *outptr++ = (_JSAMPLE)invalue;
+  }
+}
+
+
+/*
+ * Fancy processing for 1:1 horizontal and 2:1 vertical (4:4:0 subsampling).
+ *
+ * This is a less common case, but it can be encountered when losslessly
+ * rotating/transposing a JPEG file that uses 4:2:2 chroma subsampling.
+ */
+
+METHODDEF(void)
+h1v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  _JSAMPARRAY output_data = *output_data_ptr;
+  _JSAMPROW inptr0, inptr1, outptr;
+#if BITS_IN_JSAMPLE == 8
+  int thiscolsum, bias;
+#else
+  JLONG thiscolsum, bias;
+#endif
+  JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0) {             /* next nearest is row above */
+        inptr1 = input_data[inrow - 1];
+        bias = 1;
+      } else {                  /* next nearest is row below */
+        inptr1 = input_data[inrow + 1];
+        bias = 2;
+      }
+      outptr = output_data[outrow++];
+
+      for (colctr = 0; colctr < compptr->downsampled_width; colctr++) {
+        thiscolsum = (*inptr0++) * 3 + (*inptr1++);
+        *outptr++ = (_JSAMPLE)((thiscolsum + bias) >> 2);
+      }
+    }
+    inrow++;
+  }
+}
+
+
+/*
+ * Fancy processing for the common case of 2:1 horizontal and 2:1 vertical.
+ * Again a triangle filter; see comments for h2v1 case, above.
+ *
+ * It is OK for us to reference the adjacent input rows because we demanded
+ * context from the main buffer controller (see initialization code).
+ */
+
+METHODDEF(void)
+h2v2_fancy_upsample(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                    _JSAMPARRAY input_data, _JSAMPARRAY *output_data_ptr)
+{
+  _JSAMPARRAY output_data = *output_data_ptr;
+  register _JSAMPROW inptr0, inptr1, outptr;
+#if BITS_IN_JSAMPLE == 8
+  register int thiscolsum, lastcolsum, nextcolsum;
+#else
+  register JLONG thiscolsum, lastcolsum, nextcolsum;
+#endif
+  register JDIMENSION colctr;
+  int inrow, outrow, v;
+
+  inrow = outrow = 0;
+  while (outrow < cinfo->max_v_samp_factor) {
+    for (v = 0; v < 2; v++) {
+      /* inptr0 points to nearest input row, inptr1 points to next nearest */
+      inptr0 = input_data[inrow];
+      if (v == 0)               /* next nearest is row above */
+        inptr1 = input_data[inrow - 1];
+      else                      /* next nearest is row below */
+        inptr1 = input_data[inrow + 1];
+      outptr = output_data[outrow++];
+
+      /* Special case for first column */
+      thiscolsum = (*inptr0++) * 3 + (*inptr1++);
+      nextcolsum = (*inptr0++) * 3 + (*inptr1++);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 8) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
+      lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
+
+      for (colctr = compptr->downsampled_width - 2; colctr > 0; colctr--) {
+        /* General case: 3/4 * nearer pixel + 1/4 * further pixel in each */
+        /* dimension, thus 9/16, 3/16, 3/16, 1/16 overall */
+        nextcolsum = (*inptr0++) * 3 + (*inptr1++);
+        *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
+        *outptr++ = (_JSAMPLE)((thiscolsum * 3 + nextcolsum + 7) >> 4);
+        lastcolsum = thiscolsum;  thiscolsum = nextcolsum;
+      }
+
+      /* Special case for last column */
+      *outptr++ = (_JSAMPLE)((thiscolsum * 3 + lastcolsum + 8) >> 4);
+      *outptr++ = (_JSAMPLE)((thiscolsum * 4 + 7) >> 4);
+    }
+    inrow++;
+  }
+}
+
+
+/*
+ * Module initialization routine for upsampling.
+ */
+
+GLOBAL(void)
+_jinit_upsampler(j_decompress_ptr cinfo)
+{
+  my_upsample_ptr upsample;
+  int ci;
+  jpeg_component_info *compptr;
+  boolean need_buffer, do_fancy;
+  int h_in_group, v_in_group, h_out_group, v_out_group;
+
+#ifdef D_LOSSLESS_SUPPORTED
+  if (cinfo->master->lossless) {
+#if BITS_IN_JSAMPLE == 8
+    if (cinfo->data_precision > BITS_IN_JSAMPLE || cinfo->data_precision < 2)
+#else
+    if (cinfo->data_precision > BITS_IN_JSAMPLE ||
+        cinfo->data_precision < BITS_IN_JSAMPLE - 3)
+#endif
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  } else
+#endif
+  {
+    if (cinfo->data_precision != BITS_IN_JSAMPLE)
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+  }
+
+  if (!cinfo->master->jinit_upsampler_no_alloc) {
+    upsample = (my_upsample_ptr)
+      (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                  sizeof(my_upsampler));
+    cinfo->upsample = (struct jpeg_upsampler *)upsample;
+    upsample->pub.start_pass = start_pass_upsample;
+    upsample->pub._upsample = sep_upsample;
+    upsample->pub.need_context_rows = FALSE; /* until we find out differently */
+  } else
+    upsample = (my_upsample_ptr)cinfo->upsample;
+
+  if (cinfo->CCIR601_sampling)  /* this isn't supported */
+    ERREXIT(cinfo, JERR_CCIR601_NOTIMPL);
+
+  /* jdmainct.c doesn't support context rows when min_DCT_scaled_size = 1,
+   * so don't ask for it.
+   */
+  do_fancy = cinfo->do_fancy_upsampling && cinfo->_min_DCT_scaled_size > 1;
+
+  /* Verify we can handle the sampling factors, select per-component methods,
+   * and create storage as needed.
+   */
+  for (ci = 0, compptr = cinfo->comp_info; ci < cinfo->num_components;
+       ci++, compptr++) {
+    /* Compute size of an "input group" after IDCT scaling.  This many samples
+     * are to be converted to max_h_samp_factor * max_v_samp_factor pixels.
+     */
+    h_in_group = (compptr->h_samp_factor * compptr->_DCT_scaled_size) /
+                 cinfo->_min_DCT_scaled_size;
+    v_in_group = (compptr->v_samp_factor * compptr->_DCT_scaled_size) /
+                 cinfo->_min_DCT_scaled_size;
+    h_out_group = cinfo->max_h_samp_factor;
+    v_out_group = cinfo->max_v_samp_factor;
+    upsample->rowgroup_height[ci] = v_in_group; /* save for use later */
+    need_buffer = TRUE;
+    if (!compptr->component_needed) {
+      /* Don't bother to upsample an uninteresting component. */
+      upsample->methods[ci] = noop_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group == h_out_group && v_in_group == v_out_group) {
+      /* Fullsize components can be processed without any work. */
+      upsample->methods[ci] = fullsize_upsample;
+      need_buffer = FALSE;
+    } else if (h_in_group * 2 == h_out_group && v_in_group == v_out_group) {
+      /* Special cases for 2h1v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2) {
+#ifdef WITH_SIMD
+        if (jsimd_can_h2v1_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v1_fancy_upsample;
+        else
+#endif
+          upsample->methods[ci] = h2v1_fancy_upsample;
+      } else {
+#ifdef WITH_SIMD
+        if (jsimd_can_h2v1_upsample())
+          upsample->methods[ci] = jsimd_h2v1_upsample;
+        else
+#endif
+          upsample->methods[ci] = h2v1_upsample;
+      }
+    } else if (h_in_group == h_out_group &&
+               v_in_group * 2 == v_out_group && do_fancy) {
+      /* Non-fancy upsampling is handled by the generic method */
+#if defined(WITH_SIMD) && (defined(__arm__) || defined(__aarch64__) || \
+                           defined(_M_ARM) || defined(_M_ARM64))
+      if (jsimd_can_h1v2_fancy_upsample())
+        upsample->methods[ci] = jsimd_h1v2_fancy_upsample;
+      else
+#endif
+        upsample->methods[ci] = h1v2_fancy_upsample;
+      upsample->pub.need_context_rows = TRUE;
+    } else if (h_in_group * 2 == h_out_group &&
+               v_in_group * 2 == v_out_group) {
+      /* Special cases for 2h2v upsampling */
+      if (do_fancy && compptr->downsampled_width > 2) {
+#ifdef WITH_SIMD
+        if (jsimd_can_h2v2_fancy_upsample())
+          upsample->methods[ci] = jsimd_h2v2_fancy_upsample;
+        else
+#endif
+          upsample->methods[ci] = h2v2_fancy_upsample;
+        upsample->pub.need_context_rows = TRUE;
+      } else {
+#ifdef WITH_SIMD
+        if (jsimd_can_h2v2_upsample())
+          upsample->methods[ci] = jsimd_h2v2_upsample;
+        else
+#endif
+          upsample->methods[ci] = h2v2_upsample;
+      }
+    } else if ((h_out_group % h_in_group) == 0 &&
+               (v_out_group % v_in_group) == 0) {
+      /* Generic integral-factors upsampling method */
+#if defined(WITH_SIMD) && defined(__mips__)
+      if (jsimd_can_int_upsample())
+        upsample->methods[ci] = jsimd_int_upsample;
+      else
+#endif
+        upsample->methods[ci] = int_upsample;
+      upsample->h_expand[ci] = (UINT8)(h_out_group / h_in_group);
+      upsample->v_expand[ci] = (UINT8)(v_out_group / v_in_group);
+    } else
+      ERREXIT(cinfo, JERR_FRACT_SAMPLE_NOTIMPL);
+    if (need_buffer && !cinfo->master->jinit_upsampler_no_alloc) {
+      upsample->color_buf[ci] = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+        ((j_common_ptr)cinfo, JPOOL_IMAGE,
+         (JDIMENSION)jround_up((long)cinfo->output_width,
+                               (long)cinfo->max_h_samp_factor),
+         (JDIMENSION)cinfo->max_v_samp_factor);
+    }
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 || defined(D_LOSSLESS_SUPPORTED) */
diff --git a/thirdparty/libjpeg-turbo/src/jdsample.h b/thirdparty/libjpeg-turbo/src/jdsample.h
new file mode 100644
index 00000000000..a8a92980940
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdsample.h
@@ -0,0 +1,53 @@
+/*
+ * jdsample.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#define JPEG_INTERNALS
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+/* Pointer to routine to upsample a single component */
+typedef void (*upsample1_ptr) (j_decompress_ptr cinfo,
+                               jpeg_component_info *compptr,
+                               _JSAMPARRAY input_data,
+                               _JSAMPARRAY *output_data_ptr);
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_upsampler pub;    /* public fields */
+
+  /* Color conversion buffer.  When using separate upsampling and color
+   * conversion steps, this buffer holds one upsampled row group until it
+   * has been color converted and output.
+   * Note: we do not allocate any storage for component(s) which are full-size,
+   * ie do not need rescaling.  The corresponding entry of color_buf[] is
+   * simply set to point to the input data array, thereby avoiding copying.
+   */
+  _JSAMPARRAY color_buf[MAX_COMPONENTS];
+
+  /* Per-component upsampling method pointers */
+  upsample1_ptr methods[MAX_COMPONENTS];
+
+  int next_row_out;             /* counts rows emitted from color_buf */
+  JDIMENSION rows_to_go;        /* counts rows remaining in image */
+
+  /* Height of an input row group for each component. */
+  int rowgroup_height[MAX_COMPONENTS];
+
+  /* These arrays save pixel expansion factors so that int_expand need not
+   * recompute them each time.  They are unused for other upsampling methods.
+   */
+  UINT8 h_expand[MAX_COMPONENTS];
+  UINT8 v_expand[MAX_COMPONENTS];
+} my_upsampler;
+
+typedef my_upsampler *my_upsample_ptr;
diff --git a/thirdparty/libjpeg-turbo/src/jdtrans.c b/thirdparty/libjpeg-turbo/src/jdtrans.c
new file mode 100644
index 00000000000..719813f6767
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jdtrans.c
@@ -0,0 +1,162 @@
+/*
+ * jdtrans.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1995-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains library routines for transcoding decompression,
+ * that is, reading raw DCT coefficient arrays from an input JPEG file.
+ * The routines in jdapimin.c will also be needed by a transcoder.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jpegapicomp.h"
+
+
+/* Forward declarations */
+LOCAL(void) transdecode_master_selection(j_decompress_ptr cinfo);
+
+
+/*
+ * Read the coefficient arrays from a JPEG file.
+ * jpeg_read_header must be completed before calling this.
+ *
+ * The entire image is read into a set of virtual coefficient-block arrays,
+ * one per component.  The return value is a pointer to the array of
+ * virtual-array descriptors.  These can be manipulated directly via the
+ * JPEG memory manager, or handed off to jpeg_write_coefficients().
+ * To release the memory occupied by the virtual arrays, call
+ * jpeg_finish_decompress() when done with the data.
+ *
+ * An alternative usage is to simply obtain access to the coefficient arrays
+ * during a buffered-image-mode decompression operation.  This is allowed
+ * after any jpeg_finish_output() call.  The arrays can be accessed until
+ * jpeg_finish_decompress() is called.  (Note that any call to the library
+ * may reposition the arrays, so don't rely on access_virt_barray() results
+ * to stay valid across library calls.)
+ *
+ * Returns NULL if suspended.  This case need be checked only if
+ * a suspending data source is used.
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jpeg_read_coefficients(j_decompress_ptr cinfo)
+{
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  if (cinfo->global_state == DSTATE_READY) {
+    /* First call: initialize active modules */
+    transdecode_master_selection(cinfo);
+    cinfo->global_state = DSTATE_RDCOEFS;
+  }
+  if (cinfo->global_state == DSTATE_RDCOEFS) {
+    /* Absorb whole file into the coef buffer */
+    for (;;) {
+      int retcode;
+      /* Call progress monitor hook if present */
+      if (cinfo->progress != NULL)
+        (*cinfo->progress->progress_monitor) ((j_common_ptr)cinfo);
+      /* Absorb some more input */
+      retcode = (*cinfo->inputctl->consume_input) (cinfo);
+      if (retcode == JPEG_SUSPENDED)
+        return NULL;
+      if (retcode == JPEG_REACHED_EOI)
+        break;
+      /* Advance progress counter if appropriate */
+      if (cinfo->progress != NULL &&
+          (retcode == JPEG_ROW_COMPLETED || retcode == JPEG_REACHED_SOS)) {
+        if (++cinfo->progress->pass_counter >= cinfo->progress->pass_limit) {
+          /* startup underestimated number of scans; ratchet up one scan */
+          cinfo->progress->pass_limit += (long)cinfo->total_iMCU_rows;
+        }
+      }
+    }
+    /* Set state so that jpeg_finish_decompress does the right thing */
+    cinfo->global_state = DSTATE_STOPPING;
+  }
+  /* At this point we should be in state DSTATE_STOPPING if being used
+   * standalone, or in state DSTATE_BUFIMAGE if being invoked to get access
+   * to the coefficients during a full buffered-image-mode decompression.
+   */
+  if ((cinfo->global_state == DSTATE_STOPPING ||
+       cinfo->global_state == DSTATE_BUFIMAGE) && cinfo->buffered_image) {
+    return cinfo->coef->coef_arrays;
+  }
+  /* Oops, improper usage */
+  ERREXIT1(cinfo, JERR_BAD_STATE, cinfo->global_state);
+  return NULL;                  /* keep compiler happy */
+}
+
+
+/*
+ * Master selection of decompression modules for transcoding.
+ * This substitutes for jdmaster.c's initialization of the full decompressor.
+ */
+
+LOCAL(void)
+transdecode_master_selection(j_decompress_ptr cinfo)
+{
+  /* This is effectively a buffered-image operation. */
+  cinfo->buffered_image = TRUE;
+
+#if JPEG_LIB_VERSION >= 80
+  /* Compute output image dimensions and related values. */
+  jpeg_core_output_dimensions(cinfo);
+#endif
+
+  /* Entropy decoding: either Huffman or arithmetic coding. */
+  if (cinfo->arith_code) {
+#ifdef D_ARITH_CODING_SUPPORTED
+    jinit_arith_decoder(cinfo);
+#else
+    ERREXIT(cinfo, JERR_ARITH_NOTIMPL);
+#endif
+  } else {
+    if (cinfo->progressive_mode) {
+#ifdef D_PROGRESSIVE_SUPPORTED
+      jinit_phuff_decoder(cinfo);
+#else
+      ERREXIT(cinfo, JERR_NOT_COMPILED);
+#endif
+    } else
+      jinit_huff_decoder(cinfo);
+  }
+
+  /* Always get a full-image coefficient buffer. */
+  if (cinfo->data_precision == 12)
+    j12init_d_coef_controller(cinfo, TRUE);
+  else
+    jinit_d_coef_controller(cinfo, TRUE);
+
+  /* We can now tell the memory manager to allocate virtual arrays. */
+  (*cinfo->mem->realize_virt_arrays) ((j_common_ptr)cinfo);
+
+  /* Initialize input side of decompressor to consume first scan. */
+  (*cinfo->inputctl->start_input_pass) (cinfo);
+
+  /* Initialize progress monitoring. */
+  if (cinfo->progress != NULL) {
+    int nscans;
+    /* Estimate number of scans to set pass_limit. */
+    if (cinfo->progressive_mode) {
+      /* Arbitrarily estimate 2 interleaved DC scans + 3 AC scans/component. */
+      nscans = 2 + 3 * cinfo->num_components;
+    } else if (cinfo->inputctl->has_multiple_scans) {
+      /* For a nonprogressive multiscan file, estimate 1 scan per component. */
+      nscans = cinfo->num_components;
+    } else {
+      nscans = 1;
+    }
+    cinfo->progress->pass_counter = 0L;
+    cinfo->progress->pass_limit = (long)cinfo->total_iMCU_rows * nscans;
+    cinfo->progress->completed_passes = 0;
+    cinfo->progress->total_passes = 1;
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/jerror.c b/thirdparty/libjpeg-turbo/src/jerror.c
new file mode 100644
index 00000000000..2133244f8ae
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jerror.c
@@ -0,0 +1,243 @@
+/*
+ * jerror.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains simple error-reporting and trace-message routines.
+ * These are suitable for Unix-like systems and others where writing to
+ * stderr is the right thing to do.  Many applications will want to replace
+ * some or all of these routines.
+ *
+ * If you define USE_WINDOWS_MESSAGEBOX in jconfig.h or in the makefile,
+ * you get a Windows-specific hack to display error messages in a dialog box.
+ * It ain't much, but it beats dropping error messages into the bit bucket,
+ * which is what happens to output to stderr under most Windows C compilers.
+ *
+ * These routines are used by both the compression and decompression code.
+ */
+
+/* this is not a core library module, so it doesn't define JPEG_INTERNALS */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jversion.h"
+#include "jerror.h"
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+#include <windows.h>
+#endif
+
+#ifndef EXIT_FAILURE            /* define exit() codes if not provided */
+#define EXIT_FAILURE  1
+#endif
+
+
+/*
+ * Create the message string table.
+ * We do this from the master message list in jerror.h by re-reading
+ * jerror.h with a suitable definition for macro JMESSAGE.
+ */
+
+#define JMESSAGE(code, string)  string,
+
+static const char * const jpeg_std_message_table[] = {
+#include "jerror.h"
+  NULL
+};
+
+
+/*
+ * Error exit handler: must not return to caller.
+ *
+ * Applications may override this if they want to get control back after
+ * an error.  Typically one would longjmp somewhere instead of exiting.
+ * The setjmp buffer can be made a private field within an expanded error
+ * handler object.  Note that the info needed to generate an error message
+ * is stored in the error object, so you can generate the message now or
+ * later, at your convenience.
+ * You should make sure that the JPEG object is cleaned up (with jpeg_abort
+ * or jpeg_destroy) at some point.
+ */
+
+METHODDEF(void)
+error_exit(j_common_ptr cinfo)
+{
+  /* Always display the message */
+  (*cinfo->err->output_message) (cinfo);
+
+  /* Let the memory manager delete any temp files before we die */
+  jpeg_destroy(cinfo);
+
+  exit(EXIT_FAILURE);
+}
+
+
+/*
+ * Actual output of an error or trace message.
+ * Applications may override this method to send JPEG messages somewhere
+ * other than stderr.
+ *
+ * On Windows, printing to stderr is generally completely useless,
+ * so we provide optional code to produce an error-dialog popup.
+ * Most Windows applications will still prefer to override this routine,
+ * but if they don't, it'll do something at least marginally useful.
+ *
+ * NOTE: to use the library in an environment that doesn't support the
+ * C stdio library, you may have to delete the call to fprintf() entirely,
+ * not just not use this routine.
+ */
+
+METHODDEF(void)
+output_message(j_common_ptr cinfo)
+{
+  char buffer[JMSG_LENGTH_MAX];
+
+  /* Create the message */
+  (*cinfo->err->format_message) (cinfo, buffer);
+
+#ifdef USE_WINDOWS_MESSAGEBOX
+  /* Display it in a message dialog box */
+  MessageBox(GetActiveWindow(), buffer, "JPEG Library Error",
+             MB_OK | MB_ICONERROR);
+#else
+  /* Send it to stderr, adding a newline */
+  fprintf(stderr, "%s\n", buffer);
+#endif
+}
+
+
+/*
+ * Decide whether to emit a trace or warning message.
+ * msg_level is one of:
+ *   -1: recoverable corrupt-data warning, may want to abort.
+ *    0: important advisory messages (always display to user).
+ *    1: first level of tracing detail.
+ *    2,3,...: successively more detailed tracing messages.
+ * An application might override this method if it wanted to abort on warnings
+ * or change the policy about which messages to display.
+ */
+
+METHODDEF(void)
+emit_message(j_common_ptr cinfo, int msg_level)
+{
+  struct jpeg_error_mgr *err = cinfo->err;
+
+  if (msg_level < 0) {
+    /* It's a warning message.  Since corrupt files may generate many warnings,
+     * the policy implemented here is to show only the first warning,
+     * unless trace_level >= 3.
+     */
+    if (err->num_warnings == 0 || err->trace_level >= 3)
+      (*err->output_message) (cinfo);
+    /* Always count warnings in num_warnings. */
+    err->num_warnings++;
+  } else {
+    /* It's a trace message.  Show it if trace_level >= msg_level. */
+    if (err->trace_level >= msg_level)
+      (*err->output_message) (cinfo);
+  }
+}
+
+
+/*
+ * Format a message string for the most recent JPEG error or message.
+ * The message is stored into buffer, which should be at least JMSG_LENGTH_MAX
+ * characters.  Note that no '\n' character is added to the string.
+ * Few applications should need to override this method.
+ */
+
+METHODDEF(void)
+format_message(j_common_ptr cinfo, char *buffer)
+{
+  struct jpeg_error_mgr *err = cinfo->err;
+  int msg_code = err->msg_code;
+  const char *msgtext = NULL;
+  const char *msgptr;
+  char ch;
+  boolean isstring;
+
+  /* Look up message string in proper table */
+  if (msg_code > 0 && msg_code <= err->last_jpeg_message) {
+    msgtext = err->jpeg_message_table[msg_code];
+  } else if (err->addon_message_table != NULL &&
+             msg_code >= err->first_addon_message &&
+             msg_code <= err->last_addon_message) {
+    msgtext = err->addon_message_table[msg_code - err->first_addon_message];
+  }
+
+  /* Defend against bogus message number */
+  if (msgtext == NULL) {
+    err->msg_parm.i[0] = msg_code;
+    msgtext = err->jpeg_message_table[0];
+  }
+
+  /* Check for string parameter, as indicated by %s in the message text */
+  isstring = FALSE;
+  msgptr = msgtext;
+  while ((ch = *msgptr++) != '\0') {
+    if (ch == '%') {
+      if (*msgptr == 's') isstring = TRUE;
+      break;
+    }
+  }
+
+  /* Format the message into the passed buffer */
+  if (isstring)
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext, err->msg_parm.s);
+  else
+    SNPRINTF(buffer, JMSG_LENGTH_MAX, msgtext,
+             err->msg_parm.i[0], err->msg_parm.i[1],
+             err->msg_parm.i[2], err->msg_parm.i[3],
+             err->msg_parm.i[4], err->msg_parm.i[5],
+             err->msg_parm.i[6], err->msg_parm.i[7]);
+}
+
+
+/*
+ * Reset error state variables at start of a new image.
+ * This is called during compression startup to reset trace/error
+ * processing to default state, without losing any application-specific
+ * method pointers.  An application might possibly want to override
+ * this method if it has additional error processing state.
+ */
+
+METHODDEF(void)
+reset_error_mgr(j_common_ptr cinfo)
+{
+  cinfo->err->num_warnings = 0;
+  /* trace_level is not reset since it is an application-supplied parameter */
+  cinfo->err->msg_code = 0;     /* may be useful as a flag for "no error" */
+}
+
+
+/*
+ * Fill in the standard error-handling methods in a jpeg_error_mgr object.
+ * Typical call is:
+ *      struct jpeg_compress_struct cinfo;
+ *      struct jpeg_error_mgr err;
+ *
+ *      cinfo.err = jpeg_std_error(&err);
+ * after which the application may override some of the methods.
+ */
+
+GLOBAL(struct jpeg_error_mgr *)
+jpeg_std_error(struct jpeg_error_mgr *err)
+{
+  memset(err, 0, sizeof(struct jpeg_error_mgr));
+
+  err->error_exit = error_exit;
+  err->emit_message = emit_message;
+  err->output_message = output_message;
+  err->format_message = format_message;
+  err->reset_error_mgr = reset_error_mgr;
+
+  /* Initialize message table pointers */
+  err->jpeg_message_table = jpeg_std_message_table;
+  err->last_jpeg_message = (int)JMSG_LASTMSGCODE - 1;
+
+  return err;
+}
diff --git a/thirdparty/libjpeg-turbo/src/jerror.h b/thirdparty/libjpeg-turbo/src/jerror.h
new file mode 100644
index 00000000000..71ba03e2a3e
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jerror.h
@@ -0,0 +1,336 @@
+/*
+ * jerror.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1997, Thomas G. Lane.
+ * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, 2017, 2021-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file defines the error and message codes for the JPEG library.
+ * Edit this file to add new codes, or to translate the message strings to
+ * some other language.
+ * A set of error-reporting macros are defined too.  Some applications using
+ * the JPEG library may wish to include this file to get the error codes
+ * and/or the macros.
+ */
+
+/*
+ * To define the enum list of message codes, include this file without
+ * defining macro JMESSAGE.  To create a message string table, include it
+ * again with a suitable JMESSAGE definition (see jerror.c for an example).
+ */
+#ifndef JMESSAGE
+#ifndef JERROR_H
+/* First time through, define the enum list */
+#define JMAKE_ENUM_LIST
+#else
+/* Repeated inclusions of this file are no-ops unless JMESSAGE is defined */
+#define JMESSAGE(code, string)
+#endif /* JERROR_H */
+#endif /* JMESSAGE */
+
+#ifdef JMAKE_ENUM_LIST
+
+typedef enum {
+
+#define JMESSAGE(code, string)  code,
+
+#endif /* JMAKE_ENUM_LIST */
+
+JMESSAGE(JMSG_NOMESSAGE, "Bogus message code %d") /* Must be first entry! */
+
+/* For maintenance convenience, list is alphabetical by message code name */
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_ARITH_NOTIMPL, "Sorry, arithmetic coding is not implemented")
+#endif
+JMESSAGE(JERR_BAD_ALIGN_TYPE, "ALIGN_TYPE is wrong, please fix")
+JMESSAGE(JERR_BAD_ALLOC_CHUNK, "MAX_ALLOC_CHUNK is wrong, please fix")
+JMESSAGE(JERR_BAD_BUFFER_MODE, "Bogus buffer control mode")
+JMESSAGE(JERR_BAD_COMPONENT_ID, "Invalid component ID %d in SOS")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#endif
+JMESSAGE(JERR_BAD_DCT_COEF,
+         "DCT coefficient (lossy) or spatial difference (lossless) out of range")
+JMESSAGE(JERR_BAD_DCTSIZE, "IDCT output block size %d not supported")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
+JMESSAGE(JERR_BAD_HUFF_TABLE, "Bogus Huffman table definition")
+JMESSAGE(JERR_BAD_IN_COLORSPACE, "Bogus input colorspace")
+JMESSAGE(JERR_BAD_J_COLORSPACE, "Bogus JPEG colorspace")
+JMESSAGE(JERR_BAD_LENGTH, "Bogus marker length")
+JMESSAGE(JERR_BAD_LIB_VERSION,
+         "Wrong JPEG library version: library is %d, caller expects %d")
+JMESSAGE(JERR_BAD_MCU_SIZE, "Sampling factors too large for interleaved scan")
+JMESSAGE(JERR_BAD_POOL_ID, "Invalid memory pool code %d")
+JMESSAGE(JERR_BAD_PRECISION, "Unsupported JPEG data precision %d")
+JMESSAGE(JERR_BAD_PROGRESSION,
+         "Invalid progressive/lossless parameters Ss=%d Se=%d Ah=%d Al=%d")
+JMESSAGE(JERR_BAD_PROG_SCRIPT,
+         "Invalid progressive/lossless parameters at scan script entry %d")
+JMESSAGE(JERR_BAD_SAMPLING, "Bogus sampling factors")
+JMESSAGE(JERR_BAD_SCAN_SCRIPT, "Invalid scan script at entry %d")
+JMESSAGE(JERR_BAD_STATE, "Improper call to JPEG library in state %d")
+JMESSAGE(JERR_BAD_STRUCT_SIZE,
+         "JPEG parameter struct mismatch: library thinks size is %u, caller expects %u")
+JMESSAGE(JERR_BAD_VIRTUAL_ACCESS, "Bogus virtual array access")
+JMESSAGE(JERR_BUFFER_SIZE, "Buffer passed to JPEG library is too small")
+JMESSAGE(JERR_CANT_SUSPEND, "Suspension not allowed here")
+JMESSAGE(JERR_CCIR601_NOTIMPL, "CCIR601 sampling not implemented yet")
+JMESSAGE(JERR_COMPONENT_COUNT, "Too many color components: %d, max %d")
+JMESSAGE(JERR_CONVERSION_NOTIMPL, "Unsupported color conversion request")
+JMESSAGE(JERR_DAC_INDEX, "Bogus DAC index %d")
+JMESSAGE(JERR_DAC_VALUE, "Bogus DAC value 0x%x")
+JMESSAGE(JERR_DHT_INDEX, "Bogus DHT index %d")
+JMESSAGE(JERR_DQT_INDEX, "Bogus DQT index %d")
+JMESSAGE(JERR_EMPTY_IMAGE, "Empty JPEG image (DNL not supported)")
+JMESSAGE(JERR_EMS_READ, "Read from EMS failed")
+JMESSAGE(JERR_EMS_WRITE, "Write to EMS failed")
+JMESSAGE(JERR_EOI_EXPECTED, "Didn't expect more than one scan")
+JMESSAGE(JERR_FILE_READ, "Input file read error")
+JMESSAGE(JERR_FILE_WRITE, "Output file write error --- out of disk space?")
+JMESSAGE(JERR_FRACT_SAMPLE_NOTIMPL, "Fractional sampling not implemented yet")
+JMESSAGE(JERR_HUFF_CLEN_OVERFLOW, "Huffman code size table overflow")
+JMESSAGE(JERR_HUFF_MISSING_CODE, "Missing Huffman code table entry")
+JMESSAGE(JERR_IMAGE_TOO_BIG, "Maximum supported image dimension is %u pixels")
+JMESSAGE(JERR_INPUT_EMPTY, "Empty input file")
+JMESSAGE(JERR_INPUT_EOF, "Premature end of input file")
+JMESSAGE(JERR_MISMATCHED_QUANT_TABLE,
+         "Cannot transcode due to multiple use of quantization table %d")
+JMESSAGE(JERR_MISSING_DATA, "Scan script does not transmit all data")
+JMESSAGE(JERR_MODE_CHANGE, "Invalid color quantization mode change")
+JMESSAGE(JERR_NOTIMPL, "Requested features are incompatible")
+JMESSAGE(JERR_NOT_COMPILED, "Requested feature was omitted at compile time")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+#endif
+JMESSAGE(JERR_NO_BACKING_STORE, "Memory limit exceeded")
+JMESSAGE(JERR_NO_HUFF_TABLE, "Huffman table 0x%02x was not defined")
+JMESSAGE(JERR_NO_IMAGE, "JPEG datastream contains no image")
+JMESSAGE(JERR_NO_QUANT_TABLE, "Quantization table 0x%02x was not defined")
+JMESSAGE(JERR_NO_SOI, "Not a JPEG file: starts with 0x%02x 0x%02x")
+JMESSAGE(JERR_OUT_OF_MEMORY, "Insufficient memory (case %d)")
+JMESSAGE(JERR_QUANT_COMPONENTS,
+         "Cannot quantize more than %d color components")
+JMESSAGE(JERR_QUANT_FEW_COLORS, "Cannot quantize to fewer than %d colors")
+JMESSAGE(JERR_QUANT_MANY_COLORS, "Cannot quantize to more than %d colors")
+JMESSAGE(JERR_SOF_DUPLICATE, "Invalid JPEG file structure: two SOF markers")
+JMESSAGE(JERR_SOF_NO_SOS, "Invalid JPEG file structure: missing SOS marker")
+JMESSAGE(JERR_SOF_UNSUPPORTED, "Unsupported JPEG process: SOF type 0x%02x")
+JMESSAGE(JERR_SOI_DUPLICATE, "Invalid JPEG file structure: two SOI markers")
+JMESSAGE(JERR_SOS_NO_SOF, "Invalid JPEG file structure: SOS before SOF")
+JMESSAGE(JERR_TFILE_CREATE, "Failed to create temporary file %s")
+JMESSAGE(JERR_TFILE_READ, "Read failed on temporary file")
+JMESSAGE(JERR_TFILE_SEEK, "Seek failed on temporary file")
+JMESSAGE(JERR_TFILE_WRITE,
+         "Write failed on temporary file --- out of disk space?")
+JMESSAGE(JERR_TOO_LITTLE_DATA, "Application transferred too few scanlines")
+JMESSAGE(JERR_UNKNOWN_MARKER, "Unsupported marker type 0x%02x")
+JMESSAGE(JERR_VIRTUAL_BUG, "Virtual array controller messed up")
+JMESSAGE(JERR_WIDTH_OVERFLOW, "Image too wide for this implementation")
+JMESSAGE(JERR_XMS_READ, "Read from XMS failed")
+JMESSAGE(JERR_XMS_WRITE, "Write to XMS failed")
+JMESSAGE(JMSG_COPYRIGHT, JCOPYRIGHT_SHORT)
+JMESSAGE(JMSG_VERSION, JVERSION)
+JMESSAGE(JTRC_16BIT_TABLES,
+         "Caution: quantization tables are too coarse for baseline JPEG")
+JMESSAGE(JTRC_ADOBE,
+         "Adobe APP14 marker: version %d, flags 0x%04x 0x%04x, transform %d")
+JMESSAGE(JTRC_APP0, "Unknown APP0 marker (not JFIF), length %u")
+JMESSAGE(JTRC_APP14, "Unknown APP14 marker (not Adobe), length %u")
+JMESSAGE(JTRC_DAC, "Define Arithmetic Table 0x%02x: 0x%02x")
+JMESSAGE(JTRC_DHT, "Define Huffman Table 0x%02x")
+JMESSAGE(JTRC_DQT, "Define Quantization Table %d  precision %d")
+JMESSAGE(JTRC_DRI, "Define Restart Interval %u")
+JMESSAGE(JTRC_EMS_CLOSE, "Freed EMS handle %u")
+JMESSAGE(JTRC_EMS_OPEN, "Obtained EMS handle %u")
+JMESSAGE(JTRC_EOI, "End Of Image")
+JMESSAGE(JTRC_HUFFBITS, "        %3d %3d %3d %3d %3d %3d %3d %3d")
+JMESSAGE(JTRC_JFIF, "JFIF APP0 marker: version %d.%02d, density %dx%d  %d")
+JMESSAGE(JTRC_JFIF_BADTHUMBNAILSIZE,
+         "Warning: thumbnail image size does not match data length %u")
+JMESSAGE(JTRC_JFIF_EXTENSION, "JFIF extension marker: type 0x%02x, length %u")
+JMESSAGE(JTRC_JFIF_THUMBNAIL, "    with %d x %d thumbnail image")
+JMESSAGE(JTRC_MISC_MARKER, "Miscellaneous marker 0x%02x, length %u")
+JMESSAGE(JTRC_PARMLESS_MARKER, "Unexpected marker 0x%02x")
+JMESSAGE(JTRC_QUANTVALS, "        %4u %4u %4u %4u %4u %4u %4u %4u")
+JMESSAGE(JTRC_QUANT_3_NCOLORS, "Quantizing to %d = %d*%d*%d colors")
+JMESSAGE(JTRC_QUANT_NCOLORS, "Quantizing to %d colors")
+JMESSAGE(JTRC_QUANT_SELECTED, "Selected %d colors for quantization")
+JMESSAGE(JTRC_RECOVERY_ACTION, "At marker 0x%02x, recovery action %d")
+JMESSAGE(JTRC_RST, "RST%d")
+JMESSAGE(JTRC_SMOOTH_NOTIMPL,
+         "Smoothing not supported with nonstandard sampling ratios")
+JMESSAGE(JTRC_SOF, "Start Of Frame 0x%02x: width=%u, height=%u, components=%d")
+JMESSAGE(JTRC_SOF_COMPONENT, "    Component %d: %dhx%dv q=%d")
+JMESSAGE(JTRC_SOI, "Start of Image")
+JMESSAGE(JTRC_SOS, "Start Of Scan: %d components")
+JMESSAGE(JTRC_SOS_COMPONENT, "    Component %d: dc=%d ac=%d")
+JMESSAGE(JTRC_SOS_PARAMS, "  Ss=%d, Se=%d, Ah=%d, Al=%d")
+JMESSAGE(JTRC_TFILE_CLOSE, "Closed temporary file %s")
+JMESSAGE(JTRC_TFILE_OPEN, "Opened temporary file %s")
+JMESSAGE(JTRC_THUMB_JPEG,
+         "JFIF extension marker: JPEG-compressed thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_PALETTE,
+         "JFIF extension marker: palette thumbnail image, length %u")
+JMESSAGE(JTRC_THUMB_RGB,
+         "JFIF extension marker: RGB thumbnail image, length %u")
+JMESSAGE(JTRC_UNKNOWN_IDS,
+         "Unrecognized component IDs %d %d %d, assuming YCbCr (lossy) or RGB (lossless)")
+JMESSAGE(JTRC_XMS_CLOSE, "Freed XMS handle %u")
+JMESSAGE(JTRC_XMS_OPEN, "Obtained XMS handle %u")
+JMESSAGE(JWRN_ADOBE_XFORM, "Unknown Adobe color transform code %d")
+#if JPEG_LIB_VERSION >= 70
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
+JMESSAGE(JWRN_BOGUS_PROGRESSION,
+         "Inconsistent progression sequence for component %d coefficient %d")
+JMESSAGE(JWRN_EXTRANEOUS_DATA,
+         "Corrupt JPEG data: %u extraneous bytes before marker 0x%02x")
+JMESSAGE(JWRN_HIT_MARKER, "Corrupt JPEG data: premature end of data segment")
+JMESSAGE(JWRN_HUFF_BAD_CODE, "Corrupt JPEG data: bad Huffman code")
+JMESSAGE(JWRN_JFIF_MAJOR, "Warning: unknown JFIF revision number %d.%02d")
+JMESSAGE(JWRN_JPEG_EOF, "Premature end of JPEG file")
+JMESSAGE(JWRN_MUST_RESYNC,
+         "Corrupt JPEG data: found marker 0x%02x instead of RST%d")
+JMESSAGE(JWRN_NOT_SEQUENTIAL, "Invalid SOS parameters for sequential JPEG")
+JMESSAGE(JWRN_TOO_MUCH_DATA, "Application transferred too many scanlines")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_CROP_SPEC, "Invalid crop request")
+#if defined(C_ARITH_CODING_SUPPORTED) || defined(D_ARITH_CODING_SUPPORTED)
+JMESSAGE(JERR_NO_ARITH_TABLE, "Arithmetic table 0x%02x was not defined")
+JMESSAGE(JWRN_ARITH_BAD_CODE, "Corrupt JPEG data: bad arithmetic code")
+#endif
+#endif
+JMESSAGE(JWRN_BOGUS_ICC, "Corrupt JPEG data: bad ICC marker")
+#if JPEG_LIB_VERSION < 70
+JMESSAGE(JERR_BAD_DROP_SAMPLING,
+         "Component index %d: mismatching sampling ratio %d:%d, %d:%d, %c")
+#endif
+JMESSAGE(JERR_BAD_RESTART,
+         "Invalid restart interval %d; must be an integer multiple of the number of MCUs in an MCU row (%d)")
+
+#ifdef JMAKE_ENUM_LIST
+
+  JMSG_LASTMSGCODE
+} J_MESSAGE_CODE;
+
+#undef JMAKE_ENUM_LIST
+#endif /* JMAKE_ENUM_LIST */
+
+/* Zap JMESSAGE macro so that future re-inclusions do nothing by default */
+#undef JMESSAGE
+
+
+#ifndef JERROR_H
+#define JERROR_H
+
+/* Macros to simplify using the error and trace message stuff */
+/* The first parameter is either type of cinfo pointer */
+
+/* Fatal errors (print message and exit) */
+#define ERREXIT(cinfo, code) \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT1(cinfo, code, p1) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT2(cinfo, code, p1, p2) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT3(cinfo, code, p1, p2, p3) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT4(cinfo, code, p1, p2, p3, p4) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXIT6(cinfo, code, p1, p2, p3, p4, p5, p6) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (cinfo)->err->msg_parm.i[2] = (p3), \
+   (cinfo)->err->msg_parm.i[3] = (p4), \
+   (cinfo)->err->msg_parm.i[4] = (p5), \
+   (cinfo)->err->msg_parm.i[5] = (p6), \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+#define ERREXITS(cinfo, code, str) \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
+   (*(cinfo)->err->error_exit) ((j_common_ptr)(cinfo)))
+
+#define MAKESTMT(stuff)         do { stuff } while (0)
+
+/* Nonfatal errors (we can keep going, but the data is probably corrupt) */
+#define WARNMS(cinfo, code) \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1))
+#define WARNMS1(cinfo, code, p1) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1))
+#define WARNMS2(cinfo, code, p1, p2) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), -1))
+
+/* Informational/debugging messages */
+#define TRACEMS(cinfo, lvl, code) \
+  ((cinfo)->err->msg_code = (code), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))
+#define TRACEMS1(cinfo, lvl, code, p1) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))
+#define TRACEMS2(cinfo, lvl, code, p1, p2) \
+  ((cinfo)->err->msg_code = (code), \
+   (cinfo)->err->msg_parm.i[0] = (p1), \
+   (cinfo)->err->msg_parm.i[1] = (p2), \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))
+#define TRACEMS3(cinfo, lvl, code, p1, p2, p3) \
+  MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \
+           _mp[0] = (p1);  _mp[1] = (p2);  _mp[2] = (p3); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); )
+#define TRACEMS4(cinfo, lvl, code, p1, p2, p3, p4) \
+  MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \
+           _mp[0] = (p1);  _mp[1] = (p2);  _mp[2] = (p3);  _mp[3] = (p4); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); )
+#define TRACEMS5(cinfo, lvl, code, p1, p2, p3, p4, p5) \
+  MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \
+           _mp[0] = (p1);  _mp[1] = (p2);  _mp[2] = (p3);  _mp[3] = (p4); \
+           _mp[4] = (p5); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); )
+#define TRACEMS8(cinfo, lvl, code, p1, p2, p3, p4, p5, p6, p7, p8) \
+  MAKESTMT(int *_mp = (cinfo)->err->msg_parm.i; \
+           _mp[0] = (p1);  _mp[1] = (p2);  _mp[2] = (p3);  _mp[3] = (p4); \
+           _mp[4] = (p5);  _mp[5] = (p6);  _mp[6] = (p7);  _mp[7] = (p8); \
+           (cinfo)->err->msg_code = (code); \
+           (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)); )
+#define TRACEMSS(cinfo, lvl, code, str) \
+  ((cinfo)->err->msg_code = (code), \
+   strncpy((cinfo)->err->msg_parm.s, (str), JMSG_STR_PARM_MAX), \
+   (cinfo)->err->msg_parm.s[JMSG_STR_PARM_MAX - 1] = '\0', \
+   (*(cinfo)->err->emit_message) ((j_common_ptr)(cinfo), (lvl)))
+
+#endif /* JERROR_H */
diff --git a/thirdparty/libjpeg-turbo/src/jfdctflt.c b/thirdparty/libjpeg-turbo/src/jfdctflt.c
new file mode 100644
index 00000000000..ab6f6d08253
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jfdctflt.c
@@ -0,0 +1,169 @@
+/*
+ * jfdctflt.c
+ *
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * This file is part of the Independent JPEG Group's software.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a floating-point implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * This implementation should be more accurate than either of the integer
+ * DCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README.ijg).  The following
+ * code is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+jpeg_fdct_float(FAST_FLOAT *data)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z1, z2, z3, z4, z5, z11, z13;
+  FAST_FLOAT *dataptr;
+  int ctr;
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT)0.707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT)0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT)0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT)1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT)0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;      /* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
+    tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
+    tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
+    tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
+    tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
+    tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
+    tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
+    tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE * 0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE * 4] = tmp10 - tmp11;
+
+    z1 = (tmp12 + tmp13) * ((FAST_FLOAT)0.707106781); /* c4 */
+    dataptr[DCTSIZE * 2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE * 6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = (tmp10 - tmp12) * ((FAST_FLOAT)0.382683433); /* c6 */
+    z2 = ((FAST_FLOAT)0.541196100) * tmp10 + z5; /* c2-c6 */
+    z4 = ((FAST_FLOAT)1.306562965) * tmp12 + z5; /* c2+c6 */
+    z3 = tmp11 * ((FAST_FLOAT)0.707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE * 5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE * 3] = z13 - z2;
+    dataptr[DCTSIZE * 1] = z11 + z4;
+    dataptr[DCTSIZE * 7] = z11 - z4;
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jfdctfst.c b/thirdparty/libjpeg-turbo/src/jfdctfst.c
new file mode 100644
index 00000000000..26070d19a62
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jfdctfst.c
@@ -0,0 +1,227 @@
+/*
+ * jfdctfst.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README.ijg).  The following
+ * code is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jfdctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * Again to save a few shifts, the intermediate results between pass 1 and
+ * pass 2 are not upscaled, but are represented only to integral precision.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#define CONST_BITS  8
+
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_0_382683433  ((JLONG)98)            /* FIX(0.382683433) */
+#define FIX_0_541196100  ((JLONG)139)           /* FIX(0.541196100) */
+#define FIX_0_707106781  ((JLONG)181)           /* FIX(0.707106781) */
+#define FIX_1_306562965  ((JLONG)334)           /* FIX(1.306562965) */
+#else
+#define FIX_0_382683433  FIX(0.382683433)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_707106781  FIX(0.707106781)
+#define FIX_1_306562965  FIX(1.306562965)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x, n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an JLONG constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var, const)  ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+_jpeg_fdct_ifast(DCTELEM *data)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z1, z2, z3, z4, z5, z11, z13;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[2] = tmp13 + z1;    /* phase 5 */
+    dataptr[6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[5] = z13 + z2;      /* phase 6 */
+    dataptr[3] = z13 - z2;
+    dataptr[1] = z11 + z4;
+    dataptr[7] = z11 - z4;
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
+    tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
+    tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
+    tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
+    tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
+    tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
+    tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
+    tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
+
+    /* Even part */
+
+    tmp10 = tmp0 + tmp3;        /* phase 2 */
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE * 0] = tmp10 + tmp11; /* phase 3 */
+    dataptr[DCTSIZE * 4] = tmp10 - tmp11;
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_707106781); /* c4 */
+    dataptr[DCTSIZE * 2] = tmp13 + z1; /* phase 5 */
+    dataptr[DCTSIZE * 6] = tmp13 - z1;
+
+    /* Odd part */
+
+    tmp10 = tmp4 + tmp5;        /* phase 2 */
+    tmp11 = tmp5 + tmp6;
+    tmp12 = tmp6 + tmp7;
+
+    /* The rotator is modified from fig 4-8 to avoid extra negations. */
+    z5 = MULTIPLY(tmp10 - tmp12, FIX_0_382683433); /* c6 */
+    z2 = MULTIPLY(tmp10, FIX_0_541196100) + z5; /* c2-c6 */
+    z4 = MULTIPLY(tmp12, FIX_1_306562965) + z5; /* c2+c6 */
+    z3 = MULTIPLY(tmp11, FIX_0_707106781); /* c4 */
+
+    z11 = tmp7 + z3;            /* phase 5 */
+    z13 = tmp7 - z3;
+
+    dataptr[DCTSIZE * 5] = z13 + z2; /* phase 6 */
+    dataptr[DCTSIZE * 3] = z13 - z2;
+    dataptr[DCTSIZE * 1] = z11 + z4;
+    dataptr[DCTSIZE * 7] = z11 - z4;
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jfdctint.c b/thirdparty/libjpeg-turbo/src/jfdctint.c
new file mode 100644
index 00000000000..974013fa409
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jfdctint.c
@@ -0,0 +1,288 @@
+/*
+ * jfdctint.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a slower but more accurate integer implementation of the
+ * forward DCT (Discrete Cosine Transform).
+ *
+ * A 2-D DCT can be done by 1-D DCT on each row followed by 1-D DCT
+ * on each column.  Direct algorithms are also available, but they are
+ * much more complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D DCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true DCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D DCT,
+ * because the y0 and y4 outputs need not be divided by sqrt(N).
+ * In the IJG code, this factor of 8 is removed by the quantization step
+ * (in jcdctmgr.c), NOT in this module.
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (For 12-bit sample data, the intermediate
+ * array is JLONG anyway.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((JLONG)2446)          /* FIX(0.298631336) */
+#define FIX_0_390180644  ((JLONG)3196)          /* FIX(0.390180644) */
+#define FIX_0_541196100  ((JLONG)4433)          /* FIX(0.541196100) */
+#define FIX_0_765366865  ((JLONG)6270)          /* FIX(0.765366865) */
+#define FIX_0_899976223  ((JLONG)7373)          /* FIX(0.899976223) */
+#define FIX_1_175875602  ((JLONG)9633)          /* FIX(1.175875602) */
+#define FIX_1_501321110  ((JLONG)12299)         /* FIX(1.501321110) */
+#define FIX_1_847759065  ((JLONG)15137)         /* FIX(1.847759065) */
+#define FIX_1_961570560  ((JLONG)16069)         /* FIX(1.961570560) */
+#define FIX_2_053119869  ((JLONG)16819)         /* FIX(2.053119869) */
+#define FIX_2_562915447  ((JLONG)20995)         /* FIX(2.562915447) */
+#define FIX_3_072711026  ((JLONG)25172)         /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var, const)  MULTIPLY16C16(var, const)
+#else
+#define MULTIPLY(var, const)  ((var) * (const))
+#endif
+
+
+/*
+ * Perform the forward DCT on one block of samples.
+ */
+
+GLOBAL(void)
+_jpeg_fdct_islow(DCTELEM *data)
+{
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  JLONG tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3, z4, z5;
+  DCTELEM *dataptr;
+  int ctr;
+  SHIFT_TEMPS
+
+  /* Pass 1: process rows. */
+  /* Note results are scaled up by sqrt(8) compared to a true DCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[0] + dataptr[7];
+    tmp7 = dataptr[0] - dataptr[7];
+    tmp1 = dataptr[1] + dataptr[6];
+    tmp6 = dataptr[1] - dataptr[6];
+    tmp2 = dataptr[2] + dataptr[5];
+    tmp5 = dataptr[2] - dataptr[5];
+    tmp3 = dataptr[3] + dataptr[4];
+    tmp4 = dataptr[3] - dataptr[4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[0] = (DCTELEM)LEFT_SHIFT(tmp10 + tmp11, PASS1_BITS);
+    dataptr[4] = (DCTELEM)LEFT_SHIFT(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[2] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                                  CONST_BITS - PASS1_BITS);
+    dataptr[6] = (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065),
+                                  CONST_BITS - PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
+    z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[7] = (DCTELEM)DESCALE(tmp4 + z1 + z3, CONST_BITS - PASS1_BITS);
+    dataptr[5] = (DCTELEM)DESCALE(tmp5 + z2 + z4, CONST_BITS - PASS1_BITS);
+    dataptr[3] = (DCTELEM)DESCALE(tmp6 + z2 + z3, CONST_BITS - PASS1_BITS);
+    dataptr[1] = (DCTELEM)DESCALE(tmp7 + z1 + z4, CONST_BITS - PASS1_BITS);
+
+    dataptr += DCTSIZE;         /* advance pointer to next row */
+  }
+
+  /* Pass 2: process columns.
+   * We remove the PASS1_BITS scaling, but leave the results scaled up
+   * by an overall factor of 8.
+   */
+
+  dataptr = data;
+  for (ctr = DCTSIZE - 1; ctr >= 0; ctr--) {
+    tmp0 = dataptr[DCTSIZE * 0] + dataptr[DCTSIZE * 7];
+    tmp7 = dataptr[DCTSIZE * 0] - dataptr[DCTSIZE * 7];
+    tmp1 = dataptr[DCTSIZE * 1] + dataptr[DCTSIZE * 6];
+    tmp6 = dataptr[DCTSIZE * 1] - dataptr[DCTSIZE * 6];
+    tmp2 = dataptr[DCTSIZE * 2] + dataptr[DCTSIZE * 5];
+    tmp5 = dataptr[DCTSIZE * 2] - dataptr[DCTSIZE * 5];
+    tmp3 = dataptr[DCTSIZE * 3] + dataptr[DCTSIZE * 4];
+    tmp4 = dataptr[DCTSIZE * 3] - dataptr[DCTSIZE * 4];
+
+    /* Even part per LL&M figure 1 --- note that published figure is faulty;
+     * rotator "sqrt(2)*c1" should be "sqrt(2)*c6".
+     */
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    dataptr[DCTSIZE * 0] = (DCTELEM)DESCALE(tmp10 + tmp11, PASS1_BITS);
+    dataptr[DCTSIZE * 4] = (DCTELEM)DESCALE(tmp10 - tmp11, PASS1_BITS);
+
+    z1 = MULTIPLY(tmp12 + tmp13, FIX_0_541196100);
+    dataptr[DCTSIZE * 2] =
+      (DCTELEM)DESCALE(z1 + MULTIPLY(tmp13, FIX_0_765366865),
+                       CONST_BITS + PASS1_BITS);
+    dataptr[DCTSIZE * 6] =
+      (DCTELEM)DESCALE(z1 + MULTIPLY(tmp12, -FIX_1_847759065),
+                       CONST_BITS + PASS1_BITS);
+
+    /* Odd part per figure 8 --- note paper omits factor of sqrt(2).
+     * cK represents cos(K*pi/16).
+     * i0..i3 in the paper are tmp4..tmp7 here.
+     */
+
+    z1 = tmp4 + tmp7;
+    z2 = tmp5 + tmp6;
+    z3 = tmp4 + tmp6;
+    z4 = tmp5 + tmp7;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp4 = MULTIPLY(tmp4, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp5 = MULTIPLY(tmp5, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp6 = MULTIPLY(tmp6, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp7 = MULTIPLY(tmp7, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
+    z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    dataptr[DCTSIZE * 7] = (DCTELEM)DESCALE(tmp4 + z1 + z3,
+                                            CONST_BITS + PASS1_BITS);
+    dataptr[DCTSIZE * 5] = (DCTELEM)DESCALE(tmp5 + z2 + z4,
+                                            CONST_BITS + PASS1_BITS);
+    dataptr[DCTSIZE * 3] = (DCTELEM)DESCALE(tmp6 + z2 + z3,
+                                            CONST_BITS + PASS1_BITS);
+    dataptr[DCTSIZE * 1] = (DCTELEM)DESCALE(tmp7 + z1 + z4,
+                                            CONST_BITS + PASS1_BITS);
+
+    dataptr++;                  /* advance pointer to next column */
+  }
+}
+
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jidctflt.c b/thirdparty/libjpeg-turbo/src/jidctflt.c
new file mode 100644
index 00000000000..ee3a31a6168
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jidctflt.c
@@ -0,0 +1,240 @@
+/*
+ * jidctflt.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * Modified 2010 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2014, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a floating-point implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * This implementation should be more accurate than either of the integer
+ * IDCT implementations.  However, it may not give the same results on all
+ * machines because of differences in roundoff behavior.  Speed will depend
+ * on the hardware's floating point capacity.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README.ijg).  The following
+ * code is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with a fixed-point
+ * implementation, accuracy is lost due to imprecise representation of the
+ * scaled quantization values.  However, that problem does not arise if
+ * we use floating point arithmetic.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_FLOAT_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a float result.
+ */
+
+#define DEQUANTIZE(coef, quantval)  (((FAST_FLOAT)(coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+_jpeg_idct_float(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  FAST_FLOAT tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  FAST_FLOAT tmp10, tmp11, tmp12, tmp13;
+  FAST_FLOAT z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  FLOAT_MULT_TYPE *quantptr;
+  FAST_FLOAT *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int ctr;
+  FAST_FLOAT workspace[DCTSIZE2]; /* buffers data between passes */
+#define _0_125  ((FLOAT_MULT_TYPE)0.125)
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (FLOAT_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
+        inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
+        inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
+        inptr[DCTSIZE * 7] == 0) {
+      /* AC terms all zero */
+      FAST_FLOAT dcval = DEQUANTIZE(inptr[DCTSIZE * 0],
+                                    quantptr[DCTSIZE * 0] * _0_125);
+
+      wsptr[DCTSIZE * 0] = dcval;
+      wsptr[DCTSIZE * 1] = dcval;
+      wsptr[DCTSIZE * 2] = dcval;
+      wsptr[DCTSIZE * 3] = dcval;
+      wsptr[DCTSIZE * 4] = dcval;
+      wsptr[DCTSIZE * 5] = dcval;
+      wsptr[DCTSIZE * 6] = dcval;
+      wsptr[DCTSIZE * 7] = dcval;
+
+      inptr++;                  /* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0] * _0_125);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2] * _0_125);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4] * _0_125);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6] * _0_125);
+
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
+    tmp12 = (tmp1 - tmp3) * ((FAST_FLOAT)1.414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1] * _0_125);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3] * _0_125);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5] * _0_125);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7] * _0_125);
+
+    z13 = tmp6 + tmp5;          /* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;           /* phase 5 */
+    tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562); /* 2*c4 */
+
+    z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    wsptr[DCTSIZE * 0] = tmp0 + tmp7;
+    wsptr[DCTSIZE * 7] = tmp0 - tmp7;
+    wsptr[DCTSIZE * 1] = tmp1 + tmp6;
+    wsptr[DCTSIZE * 6] = tmp1 - tmp6;
+    wsptr[DCTSIZE * 2] = tmp2 + tmp5;
+    wsptr[DCTSIZE * 5] = tmp2 - tmp5;
+    wsptr[DCTSIZE * 3] = tmp3 + tmp4;
+    wsptr[DCTSIZE * 4] = tmp3 - tmp4;
+
+    inptr++;                    /* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * And testing floats for zero is relatively expensive, so we don't bother.
+     */
+
+    /* Even part */
+
+    /* Apply signed->unsigned and prepare float->int conversion */
+    z5 = wsptr[0] + ((FAST_FLOAT)_CENTERJSAMPLE + (FAST_FLOAT)0.5);
+    tmp10 = z5 + wsptr[4];
+    tmp11 = z5 - wsptr[4];
+
+    tmp13 = wsptr[2] + wsptr[6];
+    tmp12 = (wsptr[2] - wsptr[6]) * ((FAST_FLOAT)1.414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = wsptr[5] + wsptr[3];
+    z10 = wsptr[5] - wsptr[3];
+    z11 = wsptr[1] + wsptr[7];
+    z12 = wsptr[1] - wsptr[7];
+
+    tmp7 = z11 + z13;
+    tmp11 = (z11 - z13) * ((FAST_FLOAT)1.414213562);
+
+    z5 = (z10 + z12) * ((FAST_FLOAT)1.847759065); /* 2*c2 */
+    tmp10 = z5 - z12 * ((FAST_FLOAT)1.082392200); /* 2*(c2-c6) */
+    tmp12 = z5 - z10 * ((FAST_FLOAT)2.613125930); /* 2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 - tmp5;
+
+    /* Final output stage: float->int conversion and range-limit */
+
+    outptr[0] = range_limit[((int)(tmp0 + tmp7)) & RANGE_MASK];
+    outptr[7] = range_limit[((int)(tmp0 - tmp7)) & RANGE_MASK];
+    outptr[1] = range_limit[((int)(tmp1 + tmp6)) & RANGE_MASK];
+    outptr[6] = range_limit[((int)(tmp1 - tmp6)) & RANGE_MASK];
+    outptr[2] = range_limit[((int)(tmp2 + tmp5)) & RANGE_MASK];
+    outptr[5] = range_limit[((int)(tmp2 - tmp5)) & RANGE_MASK];
+    outptr[3] = range_limit[((int)(tmp3 + tmp4)) & RANGE_MASK];
+    outptr[4] = range_limit[((int)(tmp3 - tmp4)) & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_FLOAT_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jidctfst.c b/thirdparty/libjpeg-turbo/src/jidctfst.c
new file mode 100644
index 00000000000..68119b9942b
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jidctfst.c
@@ -0,0 +1,371 @@
+/*
+ * jidctfst.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a fast, not so accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on Arai, Agui, and Nakajima's algorithm for
+ * scaled DCT.  Their original paper (Trans. IEICE E-71(11):1095) is in
+ * Japanese, but the algorithm is described in the Pennebaker & Mitchell
+ * JPEG textbook (see REFERENCES section in file README.ijg).  The following
+ * code is based directly on figure 4-8 in P&M.
+ * While an 8-point DCT cannot be done in less than 11 multiplies, it is
+ * possible to arrange the computation so that many of the multiplies are
+ * simple scalings of the final outputs.  These multiplies can then be
+ * folded into the multiplications or divisions by the JPEG quantization
+ * table entries.  The AA&N method leaves only 5 multiplies and 29 adds
+ * to be done in the DCT itself.
+ * The primary disadvantage of this method is that with fixed-point math,
+ * accuracy is lost due to imprecise representation of the scaled
+ * quantization values.  The smaller the quantization table entry, the less
+ * precise the scaled value, so this implementation does worse with high-
+ * quality-setting files than with low-quality ones.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_IFAST_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling decisions are generally the same as in the LL&M algorithm;
+ * see jidctint.c for more details.  However, we choose to descale
+ * (right shift) multiplication products as soon as they are formed,
+ * rather than carrying additional fractional bits into subsequent additions.
+ * This compromises accuracy slightly, but it lets us save a few shifts.
+ * More importantly, 16-bit arithmetic is then adequate (for 8-bit samples)
+ * everywhere except in the multiplications proper; this saves a good deal
+ * of work on 16-bit-int machines.
+ *
+ * The dequantized coefficients are not integers because the AA&N scaling
+ * factors have been incorporated.  We represent them scaled up by PASS1_BITS,
+ * so that the first and second IDCT rounds have the same input scaling.
+ * For 8-bit samples, we choose IFAST_SCALE_BITS = PASS1_BITS so as to
+ * avoid a descaling shift; this compromises accuracy rather drastically
+ * for small quantization table entries, but it saves a lot of shifts.
+ * For 12-bit samples, there's no hope of using 16x16 multiplies anyway,
+ * so we use a much larger scaling factor to preserve accuracy.
+ *
+ * A final compromise is to represent the multiplicative constants to only
+ * 8 fractional bits, rather than 13.  This saves some shifting work on some
+ * machines, and may also reduce the cost of multiplication (since there
+ * are fewer one-bits in the constants).
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  8
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  8
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 8
+#define FIX_1_082392200  ((JLONG)277)           /* FIX(1.082392200) */
+#define FIX_1_414213562  ((JLONG)362)           /* FIX(1.414213562) */
+#define FIX_1_847759065  ((JLONG)473)           /* FIX(1.847759065) */
+#define FIX_2_613125930  ((JLONG)669)           /* FIX(2.613125930) */
+#else
+#define FIX_1_082392200  FIX(1.082392200)
+#define FIX_1_414213562  FIX(1.414213562)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_613125930  FIX(2.613125930)
+#endif
+
+
+/* We can gain a little more speed, with a further compromise in accuracy,
+ * by omitting the addition in a descaling shift.  This yields an incorrectly
+ * rounded result half the time...
+ */
+
+#ifndef USE_ACCURATE_ROUNDING
+#undef DESCALE
+#define DESCALE(x, n)  RIGHT_SHIFT(x, n)
+#endif
+
+
+/* Multiply a DCTELEM variable by an JLONG constant, and immediately
+ * descale to yield a DCTELEM result.
+ */
+
+#define MULTIPLY(var, const)  ((DCTELEM)DESCALE((var) * (const), CONST_BITS))
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce a DCTELEM result.  For 8-bit data a 16x16->16
+ * multiplication will do.  For 12-bit data, the multiplier table is
+ * declared JLONG, so a 32-bit multiply will be used.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define DEQUANTIZE(coef, quantval)  (((IFAST_MULT_TYPE)(coef)) * (quantval))
+#else
+#define DEQUANTIZE(coef, quantval) \
+  DESCALE((coef) * (quantval), IFAST_SCALE_BITS - PASS1_BITS)
+#endif
+
+
+/* Like DESCALE, but applies to a DCTELEM and produces an int.
+ * We assume that int right shift is unsigned if JLONG right shift is.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define ISHIFT_TEMPS    DCTELEM ishift_temp;
+#if BITS_IN_JSAMPLE == 8
+#define DCTELEMBITS  16         /* DCTELEM may be 16 or 32 bits */
+#else
+#define DCTELEMBITS  32         /* DCTELEM must be 32 bits */
+#endif
+#define IRIGHT_SHIFT(x, shft) \
+  ((ishift_temp = (x)) < 0 ? \
+   (ishift_temp >> (shft)) | ((~((DCTELEM)0)) << (DCTELEMBITS - (shft))) : \
+   (ishift_temp >> (shft)))
+#else
+#define ISHIFT_TEMPS
+#define IRIGHT_SHIFT(x, shft)   ((x) >> (shft))
+#endif
+
+#ifdef USE_ACCURATE_ROUNDING
+#define IDESCALE(x, n)  ((int)IRIGHT_SHIFT((x) + (1 << ((n) - 1)), n))
+#else
+#define IDESCALE(x, n)  ((int)IRIGHT_SHIFT(x, n))
+#endif
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+_jpeg_idct_ifast(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  DCTELEM tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+  DCTELEM tmp10, tmp11, tmp12, tmp13;
+  DCTELEM z5, z10, z11, z12, z13;
+  JCOEFPTR inptr;
+  IFAST_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];      /* buffers data between passes */
+  SHIFT_TEMPS                   /* for DESCALE */
+  ISHIFT_TEMPS                  /* for IDESCALE */
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (IFAST_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
+        inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
+        inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
+        inptr[DCTSIZE * 7] == 0) {
+      /* AC terms all zero */
+      int dcval = (int)DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+
+      wsptr[DCTSIZE * 0] = dcval;
+      wsptr[DCTSIZE * 1] = dcval;
+      wsptr[DCTSIZE * 2] = dcval;
+      wsptr[DCTSIZE * 3] = dcval;
+      wsptr[DCTSIZE * 4] = dcval;
+      wsptr[DCTSIZE * 5] = dcval;
+      wsptr[DCTSIZE * 6] = dcval;
+      wsptr[DCTSIZE * 7] = dcval;
+
+      inptr++;                  /* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp10 = tmp0 + tmp2;        /* phase 3 */
+    tmp11 = tmp0 - tmp2;
+
+    tmp13 = tmp1 + tmp3;        /* phases 5-3 */
+    tmp12 = MULTIPLY(tmp1 - tmp3, FIX_1_414213562) - tmp13; /* 2*c4 */
+
+    tmp0 = tmp10 + tmp13;       /* phase 2 */
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    tmp4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    tmp5 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    tmp6 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    tmp7 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    z13 = tmp6 + tmp5;          /* phase 6 */
+    z10 = tmp6 - tmp5;
+    z11 = tmp4 + tmp7;
+    z12 = tmp4 - tmp7;
+
+    tmp7 = z11 + z13;           /* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    wsptr[DCTSIZE * 0] = (int)(tmp0 + tmp7);
+    wsptr[DCTSIZE * 7] = (int)(tmp0 - tmp7);
+    wsptr[DCTSIZE * 1] = (int)(tmp1 + tmp6);
+    wsptr[DCTSIZE * 6] = (int)(tmp1 - tmp6);
+    wsptr[DCTSIZE * 2] = (int)(tmp2 + tmp5);
+    wsptr[DCTSIZE * 5] = (int)(tmp2 - tmp5);
+    wsptr[DCTSIZE * 4] = (int)(tmp3 + tmp4);
+    wsptr[DCTSIZE * 3] = (int)(tmp3 - tmp4);
+
+    inptr++;                    /* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      _JSAMPLE dcval =
+        range_limit[IDESCALE(wsptr[0], PASS1_BITS + 3) & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part */
+
+    tmp10 = ((DCTELEM)wsptr[0] + (DCTELEM)wsptr[4]);
+    tmp11 = ((DCTELEM)wsptr[0] - (DCTELEM)wsptr[4]);
+
+    tmp13 = ((DCTELEM)wsptr[2] + (DCTELEM)wsptr[6]);
+    tmp12 =
+      MULTIPLY((DCTELEM)wsptr[2] - (DCTELEM)wsptr[6], FIX_1_414213562) - tmp13;
+
+    tmp0 = tmp10 + tmp13;
+    tmp3 = tmp10 - tmp13;
+    tmp1 = tmp11 + tmp12;
+    tmp2 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z13 = (DCTELEM)wsptr[5] + (DCTELEM)wsptr[3];
+    z10 = (DCTELEM)wsptr[5] - (DCTELEM)wsptr[3];
+    z11 = (DCTELEM)wsptr[1] + (DCTELEM)wsptr[7];
+    z12 = (DCTELEM)wsptr[1] - (DCTELEM)wsptr[7];
+
+    tmp7 = z11 + z13;           /* phase 5 */
+    tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562); /* 2*c4 */
+
+    z5 = MULTIPLY(z10 + z12, FIX_1_847759065); /* 2*c2 */
+    tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5; /* 2*(c2-c6) */
+    tmp12 = MULTIPLY(z10, -FIX_2_613125930) + z5; /* -2*(c2+c6) */
+
+    tmp6 = tmp12 - tmp7;        /* phase 2 */
+    tmp5 = tmp11 - tmp6;
+    tmp4 = tmp10 + tmp5;
+
+    /* Final output stage: scale down by a factor of 8 and range-limit */
+
+    outptr[0] =
+      range_limit[IDESCALE(tmp0 + tmp7, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[7] =
+      range_limit[IDESCALE(tmp0 - tmp7, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[1] =
+      range_limit[IDESCALE(tmp1 + tmp6, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[6] =
+      range_limit[IDESCALE(tmp1 - tmp6, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[2] =
+      range_limit[IDESCALE(tmp2 + tmp5, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[5] =
+      range_limit[IDESCALE(tmp2 - tmp5, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[4] =
+      range_limit[IDESCALE(tmp3 + tmp4, PASS1_BITS + 3) & RANGE_MASK];
+    outptr[3] =
+      range_limit[IDESCALE(tmp3 - tmp4, PASS1_BITS + 3) & RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
+  }
+}
+
+#endif /* DCT_IFAST_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jidctint.c b/thirdparty/libjpeg-turbo/src/jidctint.c
new file mode 100644
index 00000000000..c58592d626d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jidctint.c
@@ -0,0 +1,2627 @@
+/*
+ * jidctint.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modification developed 2002-2018 by Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2020, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains a slower but more accurate integer implementation of the
+ * inverse DCT (Discrete Cosine Transform).  In the IJG code, this routine
+ * must also perform dequantization of the input coefficients.
+ *
+ * A 2-D IDCT can be done by 1-D IDCT on each column followed by 1-D IDCT
+ * on each row (or vice versa, but it's more convenient to emit a row at
+ * a time).  Direct algorithms are also available, but they are much more
+ * complex and seem not to be any faster when reduced to code.
+ *
+ * This implementation is based on an algorithm described in
+ *   C. Loeffler, A. Ligtenberg and G. Moschytz, "Practical Fast 1-D DCT
+ *   Algorithms with 11 Multiplications", Proc. Int'l. Conf. on Acoustics,
+ *   Speech, and Signal Processing 1989 (ICASSP '89), pp. 988-991.
+ * The primary algorithm described there uses 11 multiplies and 29 adds.
+ * We use their alternate method with 12 multiplies and 32 adds.
+ * The advantage of this method is that no data path contains more than one
+ * multiplication; this allows a very simple and accurate implementation in
+ * scaled fixed-point arithmetic, with a minimal number of shifts.
+ *
+ * We also provide IDCT routines with various output sample block sizes for
+ * direct resolution reduction or enlargement without additional resampling:
+ * NxN (N=1...16) pixels for one 8x8 input DCT block.
+ *
+ * For N<8 we simply take the corresponding low-frequency coefficients of
+ * the 8x8 input DCT block and apply an NxN point IDCT on the sub-block
+ * to yield the downscaled outputs.
+ * This can be seen as direct low-pass downsampling from the DCT domain
+ * point of view rather than the usual spatial domain point of view,
+ * yielding significant computational savings and results at least
+ * as good as common bilinear (averaging) spatial downsampling.
+ *
+ * For N>8 we apply a partial NxN IDCT on the 8 input coefficients as
+ * lower frequencies and higher frequencies assumed to be zero.
+ * It turns out that the computational effort is similar to the 8x8 IDCT
+ * regarding the output size.
+ * Furthermore, the scaling and descaling is the same for all IDCT sizes.
+ *
+ * CAUTION: We rely on the FIX() macro except for the N=1,2,4,8 cases
+ * since there would be too many additional constants to pre-calculate.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef DCT_ISLOW_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCT blocks. /* deliberate syntax err */
+#endif
+
+
+/*
+ * The poop on this scaling stuff is as follows:
+ *
+ * Each 1-D IDCT step produces outputs which are a factor of sqrt(N)
+ * larger than the true IDCT outputs.  The final outputs are therefore
+ * a factor of N larger than desired; since N=8 this can be cured by
+ * a simple right shift at the end of the algorithm.  The advantage of
+ * this arrangement is that we save two multiplications per 1-D IDCT,
+ * because the y0 and y4 inputs need not be divided by sqrt(N).
+ *
+ * We have to do addition and subtraction of the integer inputs, which
+ * is no problem, and multiplication by fractional constants, which is
+ * a problem to do in integer arithmetic.  We multiply all the constants
+ * by CONST_SCALE and convert them to integer constants (thus retaining
+ * CONST_BITS bits of precision in the constants).  After doing a
+ * multiplication we have to divide the product by CONST_SCALE, with proper
+ * rounding, to produce the correct output.  This division can be done
+ * cheaply as a right shift of CONST_BITS bits.  We postpone shifting
+ * as long as possible so that partial sums can be added together with
+ * full fractional precision.
+ *
+ * The outputs of the first pass are scaled up by PASS1_BITS bits so that
+ * they are represented to better-than-integral precision.  These outputs
+ * require BITS_IN_JSAMPLE + PASS1_BITS + 3 bits; this fits in a 16-bit word
+ * with the recommended scaling.  (To scale up 12-bit sample data further, an
+ * intermediate JLONG array would be needed.)
+ *
+ * To avoid overflow of the 32-bit intermediate results in pass 2, we must
+ * have BITS_IN_JSAMPLE + CONST_BITS + PASS1_BITS <= 26.  Error analysis
+ * shows that the values given below are the most effective.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_298631336  ((JLONG)2446)          /* FIX(0.298631336) */
+#define FIX_0_390180644  ((JLONG)3196)          /* FIX(0.390180644) */
+#define FIX_0_541196100  ((JLONG)4433)          /* FIX(0.541196100) */
+#define FIX_0_765366865  ((JLONG)6270)          /* FIX(0.765366865) */
+#define FIX_0_899976223  ((JLONG)7373)          /* FIX(0.899976223) */
+#define FIX_1_175875602  ((JLONG)9633)          /* FIX(1.175875602) */
+#define FIX_1_501321110  ((JLONG)12299)         /* FIX(1.501321110) */
+#define FIX_1_847759065  ((JLONG)15137)         /* FIX(1.847759065) */
+#define FIX_1_961570560  ((JLONG)16069)         /* FIX(1.961570560) */
+#define FIX_2_053119869  ((JLONG)16819)         /* FIX(2.053119869) */
+#define FIX_2_562915447  ((JLONG)20995)         /* FIX(2.562915447) */
+#define FIX_3_072711026  ((JLONG)25172)         /* FIX(3.072711026) */
+#else
+#define FIX_0_298631336  FIX(0.298631336)
+#define FIX_0_390180644  FIX(0.390180644)
+#define FIX_0_541196100  FIX(0.541196100)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_175875602  FIX(1.175875602)
+#define FIX_1_501321110  FIX(1.501321110)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_1_961570560  FIX(1.961570560)
+#define FIX_2_053119869  FIX(2.053119869)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_072711026  FIX(3.072711026)
+#endif
+
+
+/* Multiply an JLONG variable by an JLONG constant to yield an JLONG result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var, const)  MULTIPLY16C16(var, const)
+#else
+#define MULTIPLY(var, const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef, quantval)  (((ISLOW_MULT_TYPE)(coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients.
+ */
+
+GLOBAL(void)
+_jpeg_idct_islow(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp2, tmp3;
+  JLONG tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE2];      /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+  /* Note results are scaled up by sqrt(8) compared to a true IDCT; */
+  /* furthermore, we scale the results by 2**PASS1_BITS. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; ctr--) {
+    /* Due to quantization, we will usually find that many of the input
+     * coefficients are zero, especially the AC terms.  We can exploit this
+     * by short-circuiting the IDCT calculation for any column in which all
+     * the AC terms are zero.  In that case each output is equal to the
+     * DC coefficient (with scale factor as needed).
+     * With typical images and quantization tables, half or more of the
+     * column DCT calculations can be simplified this way.
+     */
+
+    if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
+        inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 4] == 0 &&
+        inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 6] == 0 &&
+        inptr[DCTSIZE * 7] == 0) {
+      /* AC terms all zero */
+      int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0],
+                             quantptr[DCTSIZE * 0]), PASS1_BITS);
+
+      wsptr[DCTSIZE * 0] = dcval;
+      wsptr[DCTSIZE * 1] = dcval;
+      wsptr[DCTSIZE * 2] = dcval;
+      wsptr[DCTSIZE * 3] = dcval;
+      wsptr[DCTSIZE * 4] = dcval;
+      wsptr[DCTSIZE * 5] = dcval;
+      wsptr[DCTSIZE * 6] = dcval;
+      wsptr[DCTSIZE * 7] = dcval;
+
+      inptr++;                  /* advance pointers to next column */
+      quantptr++;
+      wsptr++;
+      continue;
+    }
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+
+    tmp0 = LEFT_SHIFT(z2 + z3, CONST_BITS);
+    tmp1 = LEFT_SHIFT(z2 - z3, CONST_BITS);
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    tmp3 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
+    z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    wsptr[DCTSIZE * 0] = (int)DESCALE(tmp10 + tmp3, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 7] = (int)DESCALE(tmp10 - tmp3, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 1] = (int)DESCALE(tmp11 + tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 6] = (int)DESCALE(tmp11 - tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 2] = (int)DESCALE(tmp12 + tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 5] = (int)DESCALE(tmp12 - tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 3] = (int)DESCALE(tmp13 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[DCTSIZE * 4] = (int)DESCALE(tmp13 - tmp0, CONST_BITS - PASS1_BITS);
+
+    inptr++;                    /* advance pointers to next column */
+    quantptr++;
+    wsptr++;
+  }
+
+  /* Pass 2: process rows from work array, store into output array. */
+  /* Note that we must descale the results by a factor of 8 == 2**3, */
+  /* and also undo the PASS1_BITS scaling. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < DCTSIZE; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* Rows of zeroes can be exploited in the same way as we did with columns.
+     * However, the column calculation has created many nonzero AC terms, so
+     * the simplification applies less often (typically 5% to 10% of the time).
+     * On machines with very fast multiplication, it's possible that the
+     * test takes more time than it's worth.  In that case this section
+     * may be commented out.
+     */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 && wsptr[4] == 0 &&
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+      outptr[4] = dcval;
+      outptr[5] = dcval;
+      outptr[6] = dcval;
+      outptr[7] = dcval;
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part: reverse the even part of the forward DCT. */
+    /* The rotator is sqrt(2)*c(-6). */
+
+    z2 = (JLONG)wsptr[2];
+    z3 = (JLONG)wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
+    tmp2 = z1 + MULTIPLY(z3, -FIX_1_847759065);
+    tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
+
+    tmp0 = LEFT_SHIFT((JLONG)wsptr[0] + (JLONG)wsptr[4], CONST_BITS);
+    tmp1 = LEFT_SHIFT((JLONG)wsptr[0] - (JLONG)wsptr[4], CONST_BITS);
+
+    tmp10 = tmp0 + tmp3;
+    tmp13 = tmp0 - tmp3;
+    tmp11 = tmp1 + tmp2;
+    tmp12 = tmp1 - tmp2;
+
+    /* Odd part per figure 8; the matrix is unitary and hence its
+     * transpose is its inverse.  i0..i3 are y7,y5,y3,y1 respectively.
+     */
+
+    tmp0 = (JLONG)wsptr[7];
+    tmp1 = (JLONG)wsptr[5];
+    tmp2 = (JLONG)wsptr[3];
+    tmp3 = (JLONG)wsptr[1];
+
+    z1 = tmp0 + tmp3;
+    z2 = tmp1 + tmp2;
+    z3 = tmp0 + tmp2;
+    z4 = tmp1 + tmp3;
+    z5 = MULTIPLY(z3 + z4, FIX_1_175875602); /* sqrt(2) * c3 */
+
+    tmp0 = MULTIPLY(tmp0, FIX_0_298631336); /* sqrt(2) * (-c1+c3+c5-c7) */
+    tmp1 = MULTIPLY(tmp1, FIX_2_053119869); /* sqrt(2) * ( c1+c3-c5+c7) */
+    tmp2 = MULTIPLY(tmp2, FIX_3_072711026); /* sqrt(2) * ( c1+c3+c5-c7) */
+    tmp3 = MULTIPLY(tmp3, FIX_1_501321110); /* sqrt(2) * ( c1+c3-c5-c7) */
+    z1 = MULTIPLY(z1, -FIX_0_899976223); /* sqrt(2) * ( c7-c3) */
+    z2 = MULTIPLY(z2, -FIX_2_562915447); /* sqrt(2) * (-c1-c3) */
+    z3 = MULTIPLY(z3, -FIX_1_961570560); /* sqrt(2) * (-c3-c5) */
+    z4 = MULTIPLY(z4, -FIX_0_390180644); /* sqrt(2) * ( c5-c3) */
+
+    z3 += z5;
+    z4 += z5;
+
+    tmp0 += z1 + z3;
+    tmp1 += z2 + z4;
+    tmp2 += z2 + z3;
+    tmp3 += z1 + z4;
+
+    /* Final output stage: inputs are tmp10..tmp13, tmp0..tmp3 */
+
+    outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp3,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[7] = range_limit[(int)DESCALE(tmp10 - tmp3,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)DESCALE(tmp11 + tmp2,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[6] = range_limit[(int)DESCALE(tmp11 - tmp2,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)DESCALE(tmp12 + tmp1,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[5] = range_limit[(int)DESCALE(tmp12 - tmp1,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)DESCALE(tmp13 + tmp0,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)DESCALE(tmp13 - tmp0,
+                                         CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
+  }
+}
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 7x7 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/14).
+ */
+
+GLOBAL(void)
+_jpeg_idct_7x7(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12, tmp13;
+  JLONG z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[7 * 7];         /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp13 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp13 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276));     /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    wsptr[7 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 6] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 5] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 4] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[7 * 3] = (int)RIGHT_SHIFT(tmp13, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 7 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 7; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp13 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp13 = LEFT_SHIFT(tmp13, CONST_BITS);
+
+    z1 = (JLONG)wsptr[2];
+    z2 = (JLONG)wsptr[4];
+    z3 = (JLONG)wsptr[6];
+
+    tmp10 = MULTIPLY(z2 - z3, FIX(0.881747734));     /* c4 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.314692123));     /* c6 */
+    tmp11 = tmp10 + tmp12 + tmp13 - MULTIPLY(z2, FIX(1.841218003)); /* c2+c4-c6 */
+    tmp0 = z1 + z3;
+    z2 -= tmp0;
+    tmp0 = MULTIPLY(tmp0, FIX(1.274162392)) + tmp13; /* c2 */
+    tmp10 += tmp0 - MULTIPLY(z3, FIX(0.077722536));  /* c2-c4-c6 */
+    tmp12 += tmp0 - MULTIPLY(z1, FIX(2.470602249));  /* c2+c4+c6 */
+    tmp13 += MULTIPLY(z2, FIX(1.414213562));         /* c0 */
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+
+    tmp1 = MULTIPLY(z1 + z2, FIX(0.935414347));      /* (c3+c1-c5)/2 */
+    tmp2 = MULTIPLY(z1 - z2, FIX(0.170262339));      /* (c3+c5-c1)/2 */
+    tmp0 = tmp1 - tmp2;
+    tmp1 += tmp2;
+    tmp2 = MULTIPLY(z2 + z3, -FIX(1.378756276));     /* -c1 */
+    tmp1 += tmp2;
+    z2 = MULTIPLY(z1 + z3, FIX(0.613604268));        /* c5 */
+    tmp0 += z2;
+    tmp2 += z2 + MULTIPLY(z3, FIX(1.870828693));     /* c3+c1-c5 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 7;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 6x6 output block.
+ *
+ * Optimized algorithm with 3 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/12).
+ */
+
+GLOBAL(void)
+_jpeg_idct_6x6(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp2, tmp10, tmp11, tmp12;
+  JLONG z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[6 * 6];         /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = RIGHT_SHIFT(tmp0 - tmp10 - tmp10, CONST_BITS - PASS1_BITS);
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
+    tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
+    tmp1 = LEFT_SHIFT(z1 - z2 - z3, PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[6 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[6 * 5] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[6 * 1] = (int)(tmp11 + tmp1);
+    wsptr[6 * 4] = (int)(tmp11 - tmp1);
+    wsptr[6 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[6 * 3] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 6 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 6; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    tmp2 = (JLONG)wsptr[4];
+    tmp10 = MULTIPLY(tmp2, FIX(0.707106781));   /* c4 */
+    tmp1 = tmp0 + tmp10;
+    tmp11 = tmp0 - tmp10 - tmp10;
+    tmp10 = (JLONG)wsptr[2];
+    tmp0 = MULTIPLY(tmp10, FIX(1.224744871));   /* c2 */
+    tmp10 = tmp1 + tmp0;
+    tmp12 = tmp1 - tmp0;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    tmp1 = MULTIPLY(z1 + z3, FIX(0.366025404)); /* c5 */
+    tmp0 = tmp1 + LEFT_SHIFT(z1 + z2, CONST_BITS);
+    tmp2 = tmp1 + LEFT_SHIFT(z3 - z2, CONST_BITS);
+    tmp1 = LEFT_SHIFT(z1 - z2 - z3, CONST_BITS);
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 6;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 5x5 output block.
+ *
+ * Optimized algorithm with 5 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/10).
+ */
+
+GLOBAL(void)
+_jpeg_idct_5x5(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp10, tmp11, tmp12;
+  JLONG z1, z2, z3;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[5 * 5];         /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp12 += ONE << (CONST_BITS - PASS1_BITS - 1);
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    tmp1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= LEFT_SHIFT(z2, 2);
+
+    /* Odd part */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    wsptr[5 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[5 * 4] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[5 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[5 * 3] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[5 * 2] = (int)RIGHT_SHIFT(tmp12, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 5 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 5; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp12 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp12 = LEFT_SHIFT(tmp12, CONST_BITS);
+    tmp0 = (JLONG)wsptr[2];
+    tmp1 = (JLONG)wsptr[4];
+    z1 = MULTIPLY(tmp0 + tmp1, FIX(0.790569415)); /* (c2+c4)/2 */
+    z2 = MULTIPLY(tmp0 - tmp1, FIX(0.353553391)); /* (c2-c4)/2 */
+    z3 = tmp12 + z2;
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z1;
+    tmp12 -= LEFT_SHIFT(z2, 2);
+
+    /* Odd part */
+
+    z2 = (JLONG)wsptr[1];
+    z3 = (JLONG)wsptr[3];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));     /* c3 */
+    tmp0 = z1 + MULTIPLY(z2, FIX(0.513743148));   /* c1-c3 */
+    tmp1 = z1 - MULTIPLY(z3, FIX(2.176250899));   /* c1+c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 5;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 3x3 output block.
+ *
+ * Optimized algorithm with 2 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/6).
+ */
+
+GLOBAL(void)
+_jpeg_idct_3x3(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp2, tmp10, tmp12;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[3 * 3];         /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
+    tmp2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    wsptr[3 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[3 * 2] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[3 * 1] = (int)RIGHT_SHIFT(tmp2, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 3 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 3; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    tmp2 = (JLONG)wsptr[2];
+    tmp12 = MULTIPLY(tmp2, FIX(0.707106781)); /* c2 */
+    tmp10 = tmp0 + tmp12;
+    tmp2 = tmp0 - tmp12 - tmp12;
+
+    /* Odd part */
+
+    tmp12 = (JLONG)wsptr[1];
+    tmp0 = MULTIPLY(tmp12, FIX(1.224744871)); /* c1 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 3;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 9x9 output block.
+ *
+ * Optimized algorithm with 10 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/18).
+ */
+
+GLOBAL(void)
+_jpeg_idct_9x9(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 9];         /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    z2 = MULTIPLY(z2, -FIX(1.224744871));            /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp10 + tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp10 - tmp0, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp11 + tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7] = (int)RIGHT_SHIFT(tmp11 - tmp1, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2] = (int)RIGHT_SHIFT(tmp12 + tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp12 - tmp2, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp13 + tmp3, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp13 - tmp3, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp14, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 9 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 9; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+
+    z1 = (JLONG)wsptr[2];
+    z2 = (JLONG)wsptr[4];
+    z3 = (JLONG)wsptr[6];
+
+    tmp3 = MULTIPLY(z3, FIX(0.707106781));      /* c6 */
+    tmp1 = tmp0 + tmp3;
+    tmp2 = tmp0 - tmp3 - tmp3;
+
+    tmp0 = MULTIPLY(z1 - z2, FIX(0.707106781)); /* c6 */
+    tmp11 = tmp2 + tmp0;
+    tmp14 = tmp2 - tmp0 - tmp0;
+
+    tmp0 = MULTIPLY(z1 + z2, FIX(1.328926049)); /* c2 */
+    tmp2 = MULTIPLY(z1, FIX(1.083350441));      /* c4 */
+    tmp3 = MULTIPLY(z2, FIX(0.245575608));      /* c8 */
+
+    tmp10 = tmp1 + tmp0 - tmp3;
+    tmp12 = tmp1 - tmp0 + tmp2;
+    tmp13 = tmp1 - tmp2 + tmp3;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+
+    z2 = MULTIPLY(z2, -FIX(1.224744871));            /* -c3 */
+
+    tmp2 = MULTIPLY(z1 + z3, FIX(0.909038955));      /* c5 */
+    tmp3 = MULTIPLY(z1 + z4, FIX(0.483689525));      /* c7 */
+    tmp0 = tmp2 + tmp3 - z2;
+    tmp1 = MULTIPLY(z3 - z4, FIX(1.392728481));      /* c1 */
+    tmp2 += z2 - tmp1;
+    tmp3 += z2 + tmp1;
+    tmp1 = MULTIPLY(z1 - z3 - z4, FIX(1.224744871)); /* c3 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp10 + tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp10 - tmp0,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp11 + tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp11 - tmp1,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp12 + tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp12 - tmp2,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp13 + tmp3,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp13 - tmp3,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp14,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 10x10 output block.
+ *
+ * Optimized algorithm with 12 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/20).
+ */
+
+GLOBAL(void)
+_jpeg_idct_10x10(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24;
+  JLONG z1, z2, z3, z4, z5;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 10];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z3 = LEFT_SHIFT(z3, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS - PASS1_BITS - 1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = RIGHT_SHIFT(z3 - LEFT_SHIFT(z1 - z2, 1),
+                        CONST_BITS - PASS1_BITS); /* c0 = (c4-c8)*2 */
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+    z5 = LEFT_SHIFT(z3, CONST_BITS);
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z5 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z5 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
+
+    tmp12 = LEFT_SHIFT(z1 - tmp13 - z3, PASS1_BITS);
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0] = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1] = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2] = (int)(tmp22 + tmp12);
+    wsptr[8 * 7] = (int)(tmp22 - tmp12);
+    wsptr[8 * 3] = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4] = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 10 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 10; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    z3 = LEFT_SHIFT(z3, CONST_BITS);
+    z4 = (JLONG)wsptr[4];
+    z1 = MULTIPLY(z4, FIX(1.144122806));         /* c4 */
+    z2 = MULTIPLY(z4, FIX(0.437016024));         /* c8 */
+    tmp10 = z3 + z1;
+    tmp11 = z3 - z2;
+
+    tmp22 = z3 - LEFT_SHIFT(z1 - z2, 1);         /* c0 = (c4-c8)*2 */
+
+    z2 = (JLONG)wsptr[2];
+    z3 = (JLONG)wsptr[6];
+
+    z1 = MULTIPLY(z2 + z3, FIX(0.831253876));    /* c6 */
+    tmp12 = z1 + MULTIPLY(z2, FIX(0.513743148)); /* c2-c6 */
+    tmp13 = z1 - MULTIPLY(z3, FIX(2.176250899)); /* c2+c6 */
+
+    tmp20 = tmp10 + tmp12;
+    tmp24 = tmp10 - tmp12;
+    tmp21 = tmp11 + tmp13;
+    tmp23 = tmp11 - tmp13;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z3 = LEFT_SHIFT(z3, CONST_BITS);
+    z4 = (JLONG)wsptr[7];
+
+    tmp11 = z2 + z4;
+    tmp13 = z2 - z4;
+
+    tmp12 = MULTIPLY(tmp13, FIX(0.309016994));        /* (c3-c7)/2 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.951056516));           /* (c3+c7)/2 */
+    z4 = z3 + tmp12;
+
+    tmp10 = MULTIPLY(z1, FIX(1.396802247)) + z2 + z4; /* c1 */
+    tmp14 = MULTIPLY(z1, FIX(0.221231742)) - z2 + z4; /* c9 */
+
+    z2 = MULTIPLY(tmp11, FIX(0.587785252));           /* (c1-c9)/2 */
+    z4 = z3 - tmp12 - LEFT_SHIFT(tmp13, CONST_BITS - 1);
+
+    tmp12 = LEFT_SHIFT(z1 - tmp13, CONST_BITS) - z3;
+
+    tmp11 = MULTIPLY(z1, FIX(1.260073511)) - z2 - z4; /* c3 */
+    tmp13 = MULTIPLY(z1, FIX(0.642039522)) - z2 + z4; /* c7 */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[9] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[8] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[7] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[6] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[4] = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+    outptr[5] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                             CONST_BITS + PASS1_BITS + 3) &
+                            RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing an 11x11 output block.
+ *
+ * Optimized algorithm with 24 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/22).
+ */
+
+GLOBAL(void)
+_jpeg_idct_11x11(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 11];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp10 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp10 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, -FIX(1.155664402));         /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, -FIX(1.798248910));        /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, -FIX(1.467221301)) +           /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 11 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 11; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp10 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp10 = LEFT_SHIFT(tmp10, CONST_BITS);
+
+    z1 = (JLONG)wsptr[2];
+    z2 = (JLONG)wsptr[4];
+    z3 = (JLONG)wsptr[6];
+
+    tmp20 = MULTIPLY(z2 - z3, FIX(2.546640132));     /* c2+c4 */
+    tmp23 = MULTIPLY(z2 - z1, FIX(0.430815045));     /* c2-c6 */
+    z4 = z1 + z3;
+    tmp24 = MULTIPLY(z4, -FIX(1.155664402));         /* -(c2-c10) */
+    z4 -= z2;
+    tmp25 = tmp10 + MULTIPLY(z4, FIX(1.356927976));  /* c2 */
+    tmp21 = tmp20 + tmp23 + tmp25 -
+            MULTIPLY(z2, FIX(1.821790775));          /* c2+c4+c10-c6 */
+    tmp20 += tmp25 + MULTIPLY(z3, FIX(2.115825087)); /* c4+c6 */
+    tmp23 += tmp25 - MULTIPLY(z1, FIX(1.513598477)); /* c6+c8 */
+    tmp24 += tmp25;
+    tmp22 = tmp24 - MULTIPLY(z3, FIX(0.788749120));  /* c8+c10 */
+    tmp24 += MULTIPLY(z2, FIX(1.944413522)) -        /* c2+c8 */
+             MULTIPLY(z1, FIX(1.390975730));         /* c4+c10 */
+    tmp25 = tmp10 - MULTIPLY(z4, FIX(1.414213562));  /* c0 */
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+
+    tmp11 = z1 + z2;
+    tmp14 = MULTIPLY(tmp11 + z3 + z4, FIX(0.398430003)); /* c9 */
+    tmp11 = MULTIPLY(tmp11, FIX(0.887983902));           /* c3-c9 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(0.670361295));         /* c5-c9 */
+    tmp13 = tmp14 + MULTIPLY(z1 + z4, FIX(0.366151574)); /* c7-c9 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(0.923107866));              /* c7+c5+c3-c1-2*c9 */
+    z1    = tmp14 - MULTIPLY(z2 + z3, FIX(1.163011579)); /* c7+c9 */
+    tmp11 += z1 + MULTIPLY(z2, FIX(2.073276588));        /* c1+c7+3*c9-c3 */
+    tmp12 += z1 - MULTIPLY(z3, FIX(1.192193623));        /* c3+c5-c7-c9 */
+    z1    = MULTIPLY(z2 + z4, -FIX(1.798248910));        /* -(c1+c9) */
+    tmp11 += z1;
+    tmp13 += z1 + MULTIPLY(z4, FIX(2.102458632));        /* c1+c5+c9-c7 */
+    tmp14 += MULTIPLY(z2, -FIX(1.467221301)) +           /* -(c5+c9) */
+             MULTIPLY(z3, FIX(1.001388905)) -            /* c1-c9 */
+             MULTIPLY(z4, FIX(1.684843907));             /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 12x12 output block.
+ *
+ * Optimized algorithm with 15 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/24).
+ */
+
+GLOBAL(void)
+_jpeg_idct_12x12(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 12];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z3 = LEFT_SHIFT(z3, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    z3 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+    z2 = LEFT_SHIFT(z2, CONST_BITS);
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, -FIX_0_541196100);                  /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580));            /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 12 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 12; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z3 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    z3 = LEFT_SHIFT(z3, CONST_BITS);
+
+    z4 = (JLONG)wsptr[4];
+    z4 = MULTIPLY(z4, FIX(1.224744871)); /* c4 */
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    z1 = (JLONG)wsptr[2];
+    z4 = MULTIPLY(z1, FIX(1.366025404)); /* c2 */
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    z2 = (JLONG)wsptr[6];
+    z2 = LEFT_SHIFT(z2, CONST_BITS);
+
+    tmp12 = z1 - z2;
+
+    tmp21 = z3 + tmp12;
+    tmp24 = z3 - tmp12;
+
+    tmp12 = z4 + z2;
+
+    tmp20 = tmp10 + tmp12;
+    tmp25 = tmp10 - tmp12;
+
+    tmp12 = z4 - z1 - z2;
+
+    tmp22 = tmp11 + tmp12;
+    tmp23 = tmp11 - tmp12;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+
+    tmp11 = MULTIPLY(z2, FIX(1.306562965));                  /* c3 */
+    tmp14 = MULTIPLY(z2, -FIX_0_541196100);                  /* -c9 */
+
+    tmp10 = z1 + z3;
+    tmp15 = MULTIPLY(tmp10 + z4, FIX(0.860918669));          /* c7 */
+    tmp12 = tmp15 + MULTIPLY(tmp10, FIX(0.261052384));       /* c5-c7 */
+    tmp10 = tmp12 + tmp11 + MULTIPLY(z1, FIX(0.280143716));  /* c1-c5 */
+    tmp13 = MULTIPLY(z3 + z4, -FIX(1.045510580));            /* -(c7+c11) */
+    tmp12 += tmp13 + tmp14 - MULTIPLY(z3, FIX(1.478575242)); /* c1+c5-c7-c11 */
+    tmp13 += tmp15 - tmp11 + MULTIPLY(z4, FIX(1.586706681)); /* c1+c11 */
+    tmp15 += tmp14 - MULTIPLY(z1, FIX(0.676326758)) -        /* c7-c11 */
+             MULTIPLY(z4, FIX(1.982889723));                 /* c5+c7 */
+
+    z1 -= z4;
+    z2 -= z3;
+    z3 = MULTIPLY(z1 + z2, FIX_0_541196100);                 /* c9 */
+    tmp11 = z3 + MULTIPLY(z1, FIX_0_765366865);              /* c3-c9 */
+    tmp14 = z3 - MULTIPLY(z2, FIX_1_847759065);              /* c3+c9 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 13x13 output block.
+ *
+ * Optimized algorithm with 29 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/26).
+ */
+
+GLOBAL(void)
+_jpeg_idct_13x13(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 13];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13;  /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13;  /* c12 */
+    tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13;  /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458));    /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945));    /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813));    /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp26, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 13 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 13; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+
+    z2 = (JLONG)wsptr[2];
+    z3 = (JLONG)wsptr[4];
+    z4 = (JLONG)wsptr[6];
+
+    tmp10 = z3 + z4;
+    tmp11 = z3 - z4;
+
+    tmp12 = MULTIPLY(tmp10, FIX(1.155388986));                /* (c4+c6)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.096834934)) + z1;           /* (c4-c6)/2 */
+
+    tmp20 = MULTIPLY(z2, FIX(1.373119086)) + tmp12 + tmp13;   /* c2 */
+    tmp22 = MULTIPLY(z2, FIX(0.501487041)) - tmp12 + tmp13;   /* c10 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.316450131));                /* (c8-c12)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.486914739)) + z1;           /* (c8+c12)/2 */
+
+    tmp21 = MULTIPLY(z2, FIX(1.058554052)) - tmp12 + tmp13;   /* c6 */
+    tmp25 = MULTIPLY(z2, -FIX(1.252223920)) + tmp12 + tmp13;  /* c4 */
+
+    tmp12 = MULTIPLY(tmp10, FIX(0.435816023));                /* (c2-c10)/2 */
+    tmp13 = MULTIPLY(tmp11, FIX(0.937303064)) - z1;           /* (c2+c10)/2 */
+
+    tmp23 = MULTIPLY(z2, -FIX(0.170464608)) - tmp12 - tmp13;  /* c12 */
+    tmp24 = MULTIPLY(z2, -FIX(0.803364869)) + tmp12 - tmp13;  /* c8 */
+
+    tmp26 = MULTIPLY(tmp11 - z2, FIX(1.414213562)) + z1;      /* c0 */
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.322312651));     /* c3 */
+    tmp12 = MULTIPLY(z1 + z3, FIX(1.163874945));     /* c5 */
+    tmp15 = z1 + z4;
+    tmp13 = MULTIPLY(tmp15, FIX(0.937797057));       /* c7 */
+    tmp10 = tmp11 + tmp12 + tmp13 -
+            MULTIPLY(z1, FIX(2.020082300));          /* c7+c5+c3-c1 */
+    tmp14 = MULTIPLY(z2 + z3, -FIX(0.338443458));    /* -c11 */
+    tmp11 += tmp14 + MULTIPLY(z2, FIX(0.837223564)); /* c5+c9+c11-c3 */
+    tmp12 += tmp14 - MULTIPLY(z3, FIX(1.572116027)); /* c1+c5-c9-c11 */
+    tmp14 = MULTIPLY(z2 + z4, -FIX(1.163874945));    /* -c5 */
+    tmp11 += tmp14;
+    tmp13 += tmp14 + MULTIPLY(z4, FIX(2.205608352)); /* c3+c5+c9-c7 */
+    tmp14 = MULTIPLY(z3 + z4, -FIX(0.657217813));    /* -c9 */
+    tmp12 += tmp14;
+    tmp13 += tmp14;
+    tmp15 = MULTIPLY(tmp15, FIX(0.338443458));       /* c11 */
+    tmp14 = tmp15 + MULTIPLY(z1, FIX(0.318774355)) - /* c9-c11 */
+            MULTIPLY(z2, FIX(0.466105296));          /* c1-c7 */
+    z1    = MULTIPLY(z3 - z2, FIX(0.937797057));     /* c7 */
+    tmp14 += z1;
+    tmp15 += z1 + MULTIPLY(z3, FIX(0.384515595)) -   /* c3-c7 */
+             MULTIPLY(z4, FIX(1.742345811));         /* c1+c11 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp26,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 14x14 output block.
+ *
+ * Optimized algorithm with 20 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/28).
+ */
+
+GLOBAL(void)
+_jpeg_idct_14x14(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 14];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = RIGHT_SHIFT(z1 - LEFT_SHIFT(z2 + z3 - z4, 1),
+                        CONST_BITS - PASS1_BITS); /* c0 = (c4+c12-c8)*2 */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+    tmp13 = LEFT_SHIFT(z4, CONST_BITS);
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + tmp13 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - tmp13;        /* c11 */
+    tmp16 += tmp15;
+    z1    += z4;
+    z4    = MULTIPLY(z2 + z3, -FIX(0.158341681)) - tmp13;  /* -c13 */
+    tmp11 += z4 - MULTIPLY(z2, FIX(0.424103948));          /* c3-c9-c13 */
+    tmp12 += z4 - MULTIPLY(z3, FIX(2.373959773));          /* c3+c5-c13 */
+    z4    = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += z4 + tmp13 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += z4 + MULTIPLY(z2, FIX(0.674957567));          /* c1+c11-c5 */
+
+    tmp13 = LEFT_SHIFT(z1 - z3, PASS1_BITS);
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)(tmp23 + tmp13);
+    wsptr[8 * 10] = (int)(tmp23 - tmp13);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 14 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 14; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    z4 = (JLONG)wsptr[4];
+    z2 = MULTIPLY(z4, FIX(1.274162392));         /* c4 */
+    z3 = MULTIPLY(z4, FIX(0.314692123));         /* c12 */
+    z4 = MULTIPLY(z4, FIX(0.881747734));         /* c8 */
+
+    tmp10 = z1 + z2;
+    tmp11 = z1 + z3;
+    tmp12 = z1 - z4;
+
+    tmp23 = z1 - LEFT_SHIFT(z2 + z3 - z4, 1);    /* c0 = (c4+c12-c8)*2 */
+
+    z1 = (JLONG)wsptr[2];
+    z2 = (JLONG)wsptr[6];
+
+    z3 = MULTIPLY(z1 + z2, FIX(1.105676686));    /* c6 */
+
+    tmp13 = z3 + MULTIPLY(z1, FIX(0.273079590)); /* c2-c6 */
+    tmp14 = z3 - MULTIPLY(z2, FIX(1.719280954)); /* c6+c10 */
+    tmp15 = MULTIPLY(z1, FIX(0.613604268)) -     /* c10 */
+            MULTIPLY(z2, FIX(1.378756276));      /* c2 */
+
+    tmp20 = tmp10 + tmp13;
+    tmp26 = tmp10 - tmp13;
+    tmp21 = tmp11 + tmp14;
+    tmp25 = tmp11 - tmp14;
+    tmp22 = tmp12 + tmp15;
+    tmp24 = tmp12 - tmp15;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+    z4 = LEFT_SHIFT(z4, CONST_BITS);
+
+    tmp14 = z1 + z3;
+    tmp11 = MULTIPLY(z1 + z2, FIX(1.334852607));           /* c3 */
+    tmp12 = MULTIPLY(tmp14, FIX(1.197448846));             /* c5 */
+    tmp10 = tmp11 + tmp12 + z4 - MULTIPLY(z1, FIX(1.126980169)); /* c3+c5-c1 */
+    tmp14 = MULTIPLY(tmp14, FIX(0.752406978));             /* c9 */
+    tmp16 = tmp14 - MULTIPLY(z1, FIX(1.061150426));        /* c9+c11-c13 */
+    z1    -= z2;
+    tmp15 = MULTIPLY(z1, FIX(0.467085129)) - z4;           /* c11 */
+    tmp16 += tmp15;
+    tmp13 = MULTIPLY(z2 + z3, -FIX(0.158341681)) - z4;     /* -c13 */
+    tmp11 += tmp13 - MULTIPLY(z2, FIX(0.424103948));       /* c3-c9-c13 */
+    tmp12 += tmp13 - MULTIPLY(z3, FIX(2.373959773));       /* c3+c5-c13 */
+    tmp13 = MULTIPLY(z3 - z2, FIX(1.405321284));           /* c1 */
+    tmp14 += tmp13 + z4 - MULTIPLY(z3, FIX(1.6906431334)); /* c1+c9-c11 */
+    tmp15 += tmp13 + MULTIPLY(z2, FIX(0.674957567));       /* c1+c11-c5 */
+
+    tmp13 = LEFT_SHIFT(z1 - z3, CONST_BITS) + z4;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 15x15 output block.
+ *
+ * Optimized algorithm with 22 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/30).
+ */
+
+GLOBAL(void)
+_jpeg_idct_15x15(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 15];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    z1 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= LEFT_SHIFT(tmp11 - tmp10, 1);     /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, -FIX(0.831253876));                /* -c9 */
+    tmp15 = MULTIPLY(z2, -FIX(1.344997024));                /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp20 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp21 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp22 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)RIGHT_SHIFT(tmp23 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp23 - tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp24 - tmp14, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25 + tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp25 - tmp15, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp26 + tmp16, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp26 - tmp16, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp27, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 15 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 15; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    z1 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    z1 = LEFT_SHIFT(z1, CONST_BITS);
+
+    z2 = (JLONG)wsptr[2];
+    z3 = (JLONG)wsptr[4];
+    z4 = (JLONG)wsptr[6];
+
+    tmp10 = MULTIPLY(z4, FIX(0.437016024)); /* c12 */
+    tmp11 = MULTIPLY(z4, FIX(1.144122806)); /* c6 */
+
+    tmp12 = z1 - tmp10;
+    tmp13 = z1 + tmp11;
+    z1 -= LEFT_SHIFT(tmp11 - tmp10, 1);     /* c0 = (c6-c12)*2 */
+
+    z4 = z2 - z3;
+    z3 += z2;
+    tmp10 = MULTIPLY(z3, FIX(1.337628990)); /* (c2+c4)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.045680613)); /* (c2-c4)/2 */
+    z2 = MULTIPLY(z2, FIX(1.439773946));    /* c4+c14 */
+
+    tmp20 = tmp13 + tmp10 + tmp11;
+    tmp23 = tmp12 - tmp10 + tmp11 + z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.547059574)); /* (c8+c14)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.399234004)); /* (c8-c14)/2 */
+
+    tmp25 = tmp13 - tmp10 - tmp11;
+    tmp26 = tmp12 + tmp10 - tmp11 - z2;
+
+    tmp10 = MULTIPLY(z3, FIX(0.790569415)); /* (c6+c12)/2 */
+    tmp11 = MULTIPLY(z4, FIX(0.353553391)); /* (c6-c12)/2 */
+
+    tmp21 = tmp12 + tmp10 + tmp11;
+    tmp24 = tmp13 - tmp10 + tmp11;
+    tmp11 += tmp11;
+    tmp22 = z1 + tmp11;                     /* c10 = c6-c12 */
+    tmp27 = z1 - tmp11 - tmp11;             /* c0 = (c6-c12)*2 */
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z4 = (JLONG)wsptr[5];
+    z3 = MULTIPLY(z4, FIX(1.224744871));                    /* c5 */
+    z4 = (JLONG)wsptr[7];
+
+    tmp13 = z2 - z4;
+    tmp15 = MULTIPLY(z1 + tmp13, FIX(0.831253876));         /* c9 */
+    tmp11 = tmp15 + MULTIPLY(z1, FIX(0.513743148));         /* c3-c9 */
+    tmp14 = tmp15 - MULTIPLY(tmp13, FIX(2.176250899));      /* c3+c9 */
+
+    tmp13 = MULTIPLY(z2, -FIX(0.831253876));                /* -c9 */
+    tmp15 = MULTIPLY(z2, -FIX(1.344997024));                /* -c3 */
+    z2 = z1 - z4;
+    tmp12 = z3 + MULTIPLY(z2, FIX(1.406466353));            /* c1 */
+
+    tmp10 = tmp12 + MULTIPLY(z4, FIX(2.457431844)) - tmp15; /* c1+c7 */
+    tmp16 = tmp12 - MULTIPLY(z1, FIX(1.112434820)) + tmp13; /* c1-c13 */
+    tmp12 = MULTIPLY(z2, FIX(1.224744871)) - z3;            /* c5 */
+    z2 = MULTIPLY(z1 + z4, FIX(0.575212477));               /* c11 */
+    tmp13 += z2 + MULTIPLY(z1, FIX(0.475753014)) - z3;      /* c7-c11 */
+    tmp15 += z2 - MULTIPLY(z4, FIX(0.869244010)) + z3;      /* c11+c13 */
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp14,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp15,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp16,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp16,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp27,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a 16x16 output block.
+ *
+ * Optimized algorithm with 28 multiplications in the 1-D kernel.
+ * cK represents sqrt(2) * cos(K*pi/32).
+ */
+
+GLOBAL(void)
+_jpeg_idct_16x16(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+                 JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+                 JDIMENSION output_col)
+{
+  JLONG tmp0, tmp1, tmp2, tmp3, tmp10, tmp11, tmp12, tmp13;
+  JLONG tmp20, tmp21, tmp22, tmp23, tmp24, tmp25, tmp26, tmp27;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[8 * 16];        /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = 0; ctr < 8; ctr++, inptr++, quantptr++, wsptr++) {
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+    /* Add fudge factor here for final descale. */
+    tmp0 += ONE << (CONST_BITS - PASS1_BITS - 1);
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 4], quantptr[DCTSIZE * 4]);
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, -FIX(0.666655658));       /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, -FIX(1.247225013));       /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, -FIX(1.353318001));  /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    wsptr[8 * 0]  = (int)RIGHT_SHIFT(tmp20 + tmp0,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 15] = (int)RIGHT_SHIFT(tmp20 - tmp0,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 1]  = (int)RIGHT_SHIFT(tmp21 + tmp1,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 14] = (int)RIGHT_SHIFT(tmp21 - tmp1,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 2]  = (int)RIGHT_SHIFT(tmp22 + tmp2,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 13] = (int)RIGHT_SHIFT(tmp22 - tmp2,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 3]  = (int)RIGHT_SHIFT(tmp23 + tmp3,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 12] = (int)RIGHT_SHIFT(tmp23 - tmp3,  CONST_BITS - PASS1_BITS);
+    wsptr[8 * 4]  = (int)RIGHT_SHIFT(tmp24 + tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 11] = (int)RIGHT_SHIFT(tmp24 - tmp10, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 5]  = (int)RIGHT_SHIFT(tmp25 + tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 10] = (int)RIGHT_SHIFT(tmp25 - tmp11, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 6]  = (int)RIGHT_SHIFT(tmp26 + tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 9]  = (int)RIGHT_SHIFT(tmp26 - tmp12, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 7]  = (int)RIGHT_SHIFT(tmp27 + tmp13, CONST_BITS - PASS1_BITS);
+    wsptr[8 * 8]  = (int)RIGHT_SHIFT(tmp27 - tmp13, CONST_BITS - PASS1_BITS);
+  }
+
+  /* Pass 2: process 16 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 16; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+
+    /* Even part */
+
+    /* Add fudge factor here for final descale. */
+    tmp0 = (JLONG)wsptr[0] + (ONE << (PASS1_BITS + 2));
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS);
+
+    z1 = (JLONG)wsptr[4];
+    tmp1 = MULTIPLY(z1, FIX(1.306562965));      /* c4[16] = c2[8] */
+    tmp2 = MULTIPLY(z1, FIX_0_541196100);       /* c12[16] = c6[8] */
+
+    tmp10 = tmp0 + tmp1;
+    tmp11 = tmp0 - tmp1;
+    tmp12 = tmp0 + tmp2;
+    tmp13 = tmp0 - tmp2;
+
+    z1 = (JLONG)wsptr[2];
+    z2 = (JLONG)wsptr[6];
+    z3 = z1 - z2;
+    z4 = MULTIPLY(z3, FIX(0.275899379));        /* c14[16] = c7[8] */
+    z3 = MULTIPLY(z3, FIX(1.387039845));        /* c2[16] = c1[8] */
+
+    tmp0 = z3 + MULTIPLY(z2, FIX_2_562915447);  /* (c6+c2)[16] = (c3+c1)[8] */
+    tmp1 = z4 + MULTIPLY(z1, FIX_0_899976223);  /* (c6-c14)[16] = (c3-c7)[8] */
+    tmp2 = z3 - MULTIPLY(z1, FIX(0.601344887)); /* (c2-c10)[16] = (c1-c5)[8] */
+    tmp3 = z4 - MULTIPLY(z2, FIX(0.509795579)); /* (c10-c14)[16] = (c5-c7)[8] */
+
+    tmp20 = tmp10 + tmp0;
+    tmp27 = tmp10 - tmp0;
+    tmp21 = tmp12 + tmp1;
+    tmp26 = tmp12 - tmp1;
+    tmp22 = tmp13 + tmp2;
+    tmp25 = tmp13 - tmp2;
+    tmp23 = tmp11 + tmp3;
+    tmp24 = tmp11 - tmp3;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[1];
+    z2 = (JLONG)wsptr[3];
+    z3 = (JLONG)wsptr[5];
+    z4 = (JLONG)wsptr[7];
+
+    tmp11 = z1 + z3;
+
+    tmp1  = MULTIPLY(z1 + z2, FIX(1.353318001));   /* c3 */
+    tmp2  = MULTIPLY(tmp11,   FIX(1.247225013));   /* c5 */
+    tmp3  = MULTIPLY(z1 + z4, FIX(1.093201867));   /* c7 */
+    tmp10 = MULTIPLY(z1 - z4, FIX(0.897167586));   /* c9 */
+    tmp11 = MULTIPLY(tmp11,   FIX(0.666655658));   /* c11 */
+    tmp12 = MULTIPLY(z1 - z2, FIX(0.410524528));   /* c13 */
+    tmp0  = tmp1 + tmp2 + tmp3 -
+            MULTIPLY(z1, FIX(2.286341144));        /* c7+c5+c3-c1 */
+    tmp13 = tmp10 + tmp11 + tmp12 -
+            MULTIPLY(z1, FIX(1.835730603));        /* c9+c11+c13-c15 */
+    z1    = MULTIPLY(z2 + z3, FIX(0.138617169));   /* c15 */
+    tmp1  += z1 + MULTIPLY(z2, FIX(0.071888074));  /* c9+c11-c3-c15 */
+    tmp2  += z1 - MULTIPLY(z3, FIX(1.125726048));  /* c5+c7+c15-c3 */
+    z1    = MULTIPLY(z3 - z2, FIX(1.407403738));   /* c1 */
+    tmp11 += z1 - MULTIPLY(z3, FIX(0.766367282));  /* c1+c11-c9-c13 */
+    tmp12 += z1 + MULTIPLY(z2, FIX(1.971951411));  /* c1+c5+c13-c7 */
+    z2    += z4;
+    z1    = MULTIPLY(z2, -FIX(0.666655658));       /* -c11 */
+    tmp1  += z1;
+    tmp3  += z1 + MULTIPLY(z4, FIX(1.065388962));  /* c3+c11+c15-c7 */
+    z2    = MULTIPLY(z2, -FIX(1.247225013));       /* -c5 */
+    tmp10 += z2 + MULTIPLY(z4, FIX(3.141271809));  /* c1+c5+c9-c13 */
+    tmp12 += z2;
+    z2    = MULTIPLY(z3 + z4, -FIX(1.353318001));  /* -c3 */
+    tmp2  += z2;
+    tmp3  += z2;
+    z2    = MULTIPLY(z4 - z3, FIX(0.410524528));   /* c13 */
+    tmp10 += z2;
+    tmp11 += z2;
+
+    /* Final output stage */
+
+    outptr[0]  = range_limit[(int)RIGHT_SHIFT(tmp20 + tmp0,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[15] = range_limit[(int)RIGHT_SHIFT(tmp20 - tmp0,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[1]  = range_limit[(int)RIGHT_SHIFT(tmp21 + tmp1,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[14] = range_limit[(int)RIGHT_SHIFT(tmp21 - tmp1,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[2]  = range_limit[(int)RIGHT_SHIFT(tmp22 + tmp2,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[13] = range_limit[(int)RIGHT_SHIFT(tmp22 - tmp2,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[3]  = range_limit[(int)RIGHT_SHIFT(tmp23 + tmp3,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[12] = range_limit[(int)RIGHT_SHIFT(tmp23 - tmp3,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[4]  = range_limit[(int)RIGHT_SHIFT(tmp24 + tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[11] = range_limit[(int)RIGHT_SHIFT(tmp24 - tmp10,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[5]  = range_limit[(int)RIGHT_SHIFT(tmp25 + tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[10] = range_limit[(int)RIGHT_SHIFT(tmp25 - tmp11,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[6]  = range_limit[(int)RIGHT_SHIFT(tmp26 + tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[9]  = range_limit[(int)RIGHT_SHIFT(tmp26 - tmp12,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[7]  = range_limit[(int)RIGHT_SHIFT(tmp27 + tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+    outptr[8]  = range_limit[(int)RIGHT_SHIFT(tmp27 - tmp13,
+                                              CONST_BITS + PASS1_BITS + 3) &
+                             RANGE_MASK];
+
+    wsptr += 8;         /* advance pointer to next row */
+  }
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
+#endif /* DCT_ISLOW_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jidctred.c b/thirdparty/libjpeg-turbo/src/jidctred.c
new file mode 100644
index 00000000000..6521e3ebbfc
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jidctred.c
@@ -0,0 +1,409 @@
+/*
+ * jidctred.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1994-1998, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015, 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains inverse-DCT routines that produce reduced-size output:
+ * either 4x4, 2x2, or 1x1 pixels from an 8x8 DCT block.
+ *
+ * The implementation is based on the Loeffler, Ligtenberg and Moschytz (LL&M)
+ * algorithm used in jidctint.c.  We simply replace each 8-to-8 1-D IDCT step
+ * with an 8-to-4 step that produces the four averages of two adjacent outputs
+ * (or an 8-to-2 step producing two averages of four outputs, for 2x2 output).
+ * These steps were derived by computing the corresponding values at the end
+ * of the normal LL&M code, then simplifying as much as possible.
+ *
+ * 1x1 is trivial: just take the DC coefficient divided by 8.
+ *
+ * See jidctint.c for additional comments.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jdct.h"               /* Private declarations for DCT subsystem */
+
+#ifdef IDCT_SCALING_SUPPORTED
+
+
+/*
+ * This module is specialized to the case DCTSIZE = 8.
+ */
+
+#if DCTSIZE != 8
+  Sorry, this code only copes with 8x8 DCTs. /* deliberate syntax err */
+#endif
+
+
+/* Scaling is the same as in jidctint.c. */
+
+#if BITS_IN_JSAMPLE == 8
+#define CONST_BITS  13
+#define PASS1_BITS  2
+#else
+#define CONST_BITS  13
+#define PASS1_BITS  1           /* lose a little precision to avoid overflow */
+#endif
+
+/* Some C compilers fail to reduce "FIX(constant)" at compile time, thus
+ * causing a lot of useless floating-point operations at run time.
+ * To get around this we use the following pre-calculated constants.
+ * If you change CONST_BITS you may want to add appropriate values.
+ * (With a reasonable C compiler, you can just rely on the FIX() macro...)
+ */
+
+#if CONST_BITS == 13
+#define FIX_0_211164243  ((JLONG)1730)          /* FIX(0.211164243) */
+#define FIX_0_509795579  ((JLONG)4176)          /* FIX(0.509795579) */
+#define FIX_0_601344887  ((JLONG)4926)          /* FIX(0.601344887) */
+#define FIX_0_720959822  ((JLONG)5906)          /* FIX(0.720959822) */
+#define FIX_0_765366865  ((JLONG)6270)          /* FIX(0.765366865) */
+#define FIX_0_850430095  ((JLONG)6967)          /* FIX(0.850430095) */
+#define FIX_0_899976223  ((JLONG)7373)          /* FIX(0.899976223) */
+#define FIX_1_061594337  ((JLONG)8697)          /* FIX(1.061594337) */
+#define FIX_1_272758580  ((JLONG)10426)         /* FIX(1.272758580) */
+#define FIX_1_451774981  ((JLONG)11893)         /* FIX(1.451774981) */
+#define FIX_1_847759065  ((JLONG)15137)         /* FIX(1.847759065) */
+#define FIX_2_172734803  ((JLONG)17799)         /* FIX(2.172734803) */
+#define FIX_2_562915447  ((JLONG)20995)         /* FIX(2.562915447) */
+#define FIX_3_624509785  ((JLONG)29692)         /* FIX(3.624509785) */
+#else
+#define FIX_0_211164243  FIX(0.211164243)
+#define FIX_0_509795579  FIX(0.509795579)
+#define FIX_0_601344887  FIX(0.601344887)
+#define FIX_0_720959822  FIX(0.720959822)
+#define FIX_0_765366865  FIX(0.765366865)
+#define FIX_0_850430095  FIX(0.850430095)
+#define FIX_0_899976223  FIX(0.899976223)
+#define FIX_1_061594337  FIX(1.061594337)
+#define FIX_1_272758580  FIX(1.272758580)
+#define FIX_1_451774981  FIX(1.451774981)
+#define FIX_1_847759065  FIX(1.847759065)
+#define FIX_2_172734803  FIX(2.172734803)
+#define FIX_2_562915447  FIX(2.562915447)
+#define FIX_3_624509785  FIX(3.624509785)
+#endif
+
+
+/* Multiply a JLONG variable by a JLONG constant to yield a JLONG result.
+ * For 8-bit samples with the recommended scaling, all the variable
+ * and constant values involved are no more than 16 bits wide, so a
+ * 16x16->32 bit multiply can be used instead of a full 32x32 multiply.
+ * For 12-bit samples, a full 32-bit multiplication will be needed.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+#define MULTIPLY(var, const)  MULTIPLY16C16(var, const)
+#else
+#define MULTIPLY(var, const)  ((var) * (const))
+#endif
+
+
+/* Dequantize a coefficient by multiplying it by the multiplier-table
+ * entry; produce an int result.  In this module, both inputs and result
+ * are 16 bits or less, so either int or short multiply will work.
+ */
+
+#define DEQUANTIZE(coef, quantval)  (((ISLOW_MULT_TYPE)(coef)) * (quantval))
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 4x4 output block.
+ */
+
+GLOBAL(void)
+_jpeg_idct_4x4(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp2, tmp10, tmp12;
+  JLONG z1, z2, z3, z4;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE * 4];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process column 4, because second pass won't use it */
+    if (ctr == DCTSIZE - 4)
+      continue;
+    if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 2] == 0 &&
+        inptr[DCTSIZE * 3] == 0 && inptr[DCTSIZE * 5] == 0 &&
+        inptr[DCTSIZE * 6] == 0 && inptr[DCTSIZE * 7] == 0) {
+      /* AC terms all zero; we need not examine term 4 for 4x4 output */
+      int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0],
+                                        quantptr[DCTSIZE * 0]), PASS1_BITS);
+
+      wsptr[DCTSIZE * 0] = dcval;
+      wsptr[DCTSIZE * 1] = dcval;
+      wsptr[DCTSIZE * 2] = dcval;
+      wsptr[DCTSIZE * 3] = dcval;
+
+      continue;
+    }
+
+    /* Even part */
+
+    tmp0 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp0 = LEFT_SHIFT(tmp0, CONST_BITS + 1);
+
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 2], quantptr[DCTSIZE * 2]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 6], quantptr[DCTSIZE * 6]);
+
+    tmp2 = MULTIPLY(z2, FIX_1_847759065) + MULTIPLY(z3, -FIX_0_765366865);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+    z2 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    z3 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    z4 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+
+    tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */
+           MULTIPLY(z2,  FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */
+           MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */
+           MULTIPLY(z4,  FIX_1_061594337);  /* sqrt(2) * ( c5+c7) */
+
+    tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */
+           MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */
+           MULTIPLY(z3,  FIX_0_899976223) + /* sqrt(2) * (c3-c7) */
+           MULTIPLY(z4,  FIX_2_562915447);  /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+
+    wsptr[DCTSIZE * 0] =
+      (int)DESCALE(tmp10 + tmp2, CONST_BITS - PASS1_BITS + 1);
+    wsptr[DCTSIZE * 3] =
+      (int)DESCALE(tmp10 - tmp2, CONST_BITS - PASS1_BITS + 1);
+    wsptr[DCTSIZE * 1] =
+      (int)DESCALE(tmp12 + tmp0, CONST_BITS - PASS1_BITS + 1);
+    wsptr[DCTSIZE * 2] =
+      (int)DESCALE(tmp12 - tmp0, CONST_BITS - PASS1_BITS + 1);
+  }
+
+  /* Pass 2: process 4 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 4; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[2] == 0 && wsptr[3] == 0 &&
+        wsptr[5] == 0 && wsptr[6] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+      outptr[2] = dcval;
+      outptr[3] = dcval;
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part */
+
+    tmp0 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 1);
+
+    tmp2 = MULTIPLY((JLONG)wsptr[2],  FIX_1_847759065) +
+           MULTIPLY((JLONG)wsptr[6], -FIX_0_765366865);
+
+    tmp10 = tmp0 + tmp2;
+    tmp12 = tmp0 - tmp2;
+
+    /* Odd part */
+
+    z1 = (JLONG)wsptr[7];
+    z2 = (JLONG)wsptr[5];
+    z3 = (JLONG)wsptr[3];
+    z4 = (JLONG)wsptr[1];
+
+    tmp0 = MULTIPLY(z1, -FIX_0_211164243) + /* sqrt(2) * ( c3-c1) */
+           MULTIPLY(z2,  FIX_1_451774981) + /* sqrt(2) * ( c3+c7) */
+           MULTIPLY(z3, -FIX_2_172734803) + /* sqrt(2) * (-c1-c5) */
+           MULTIPLY(z4,  FIX_1_061594337);  /* sqrt(2) * ( c5+c7) */
+
+    tmp2 = MULTIPLY(z1, -FIX_0_509795579) + /* sqrt(2) * (c7-c5) */
+           MULTIPLY(z2, -FIX_0_601344887) + /* sqrt(2) * (c5-c1) */
+           MULTIPLY(z3, FIX_0_899976223) +  /* sqrt(2) * (c3-c7) */
+           MULTIPLY(z4, FIX_2_562915447);   /* sqrt(2) * (c1+c3) */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp2,
+                                         CONST_BITS + PASS1_BITS + 3 + 1) &
+                            RANGE_MASK];
+    outptr[3] = range_limit[(int)DESCALE(tmp10 - tmp2,
+                                         CONST_BITS + PASS1_BITS + 3 + 1) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)DESCALE(tmp12 + tmp0,
+                                         CONST_BITS + PASS1_BITS + 3 + 1) &
+                            RANGE_MASK];
+    outptr[2] = range_limit[(int)DESCALE(tmp12 - tmp0,
+                                         CONST_BITS + PASS1_BITS + 3 + 1) &
+                            RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 2x2 output block.
+ */
+
+GLOBAL(void)
+_jpeg_idct_2x2(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  JLONG tmp0, tmp10, z1;
+  JCOEFPTR inptr;
+  ISLOW_MULT_TYPE *quantptr;
+  int *wsptr;
+  _JSAMPROW outptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  int ctr;
+  int workspace[DCTSIZE * 2];   /* buffers data between passes */
+  SHIFT_TEMPS
+
+  /* Pass 1: process columns from input, store into work array. */
+
+  inptr = coef_block;
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  wsptr = workspace;
+  for (ctr = DCTSIZE; ctr > 0; inptr++, quantptr++, wsptr++, ctr--) {
+    /* Don't bother to process columns 2,4,6 */
+    if (ctr == DCTSIZE - 2 || ctr == DCTSIZE - 4 || ctr == DCTSIZE - 6)
+      continue;
+    if (inptr[DCTSIZE * 1] == 0 && inptr[DCTSIZE * 3] == 0 &&
+        inptr[DCTSIZE * 5] == 0 && inptr[DCTSIZE * 7] == 0) {
+      /* AC terms all zero; we need not examine terms 2,4,6 for 2x2 output */
+      int dcval = LEFT_SHIFT(DEQUANTIZE(inptr[DCTSIZE * 0],
+                             quantptr[DCTSIZE * 0]), PASS1_BITS);
+
+      wsptr[DCTSIZE * 0] = dcval;
+      wsptr[DCTSIZE * 1] = dcval;
+
+      continue;
+    }
+
+    /* Even part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 0], quantptr[DCTSIZE * 0]);
+    tmp10 = LEFT_SHIFT(z1, CONST_BITS + 2);
+
+    /* Odd part */
+
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 7], quantptr[DCTSIZE * 7]);
+    tmp0 = MULTIPLY(z1, -FIX_0_720959822);  /* sqrt(2) * ( c7-c5+c3-c1) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 5], quantptr[DCTSIZE * 5]);
+    tmp0 += MULTIPLY(z1, FIX_0_850430095);  /* sqrt(2) * (-c1+c3+c5+c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 3], quantptr[DCTSIZE * 3]);
+    tmp0 += MULTIPLY(z1, -FIX_1_272758580); /* sqrt(2) * (-c1+c3-c5-c7) */
+    z1 = DEQUANTIZE(inptr[DCTSIZE * 1], quantptr[DCTSIZE * 1]);
+    tmp0 += MULTIPLY(z1, FIX_3_624509785);  /* sqrt(2) * ( c1+c3+c5+c7) */
+
+    /* Final output stage */
+
+    wsptr[DCTSIZE * 0] =
+      (int)DESCALE(tmp10 + tmp0, CONST_BITS - PASS1_BITS + 2);
+    wsptr[DCTSIZE * 1] =
+      (int)DESCALE(tmp10 - tmp0, CONST_BITS - PASS1_BITS + 2);
+  }
+
+  /* Pass 2: process 2 rows from work array, store into output array. */
+
+  wsptr = workspace;
+  for (ctr = 0; ctr < 2; ctr++) {
+    outptr = output_buf[ctr] + output_col;
+    /* It's not clear whether a zero row test is worthwhile here ... */
+
+#ifndef NO_ZERO_ROW_TEST
+    if (wsptr[1] == 0 && wsptr[3] == 0 && wsptr[5] == 0 && wsptr[7] == 0) {
+      /* AC terms all zero */
+      _JSAMPLE dcval = range_limit[(int)DESCALE((JLONG)wsptr[0],
+                                                PASS1_BITS + 3) & RANGE_MASK];
+
+      outptr[0] = dcval;
+      outptr[1] = dcval;
+
+      wsptr += DCTSIZE;         /* advance pointer to next row */
+      continue;
+    }
+#endif
+
+    /* Even part */
+
+    tmp10 = LEFT_SHIFT((JLONG)wsptr[0], CONST_BITS + 2);
+
+    /* Odd part */
+
+    tmp0 = MULTIPLY((JLONG)wsptr[7], -FIX_0_720959822) + /* sqrt(2) * ( c7-c5+c3-c1) */
+           MULTIPLY((JLONG)wsptr[5],  FIX_0_850430095) + /* sqrt(2) * (-c1+c3+c5+c7) */
+           MULTIPLY((JLONG)wsptr[3], -FIX_1_272758580) + /* sqrt(2) * (-c1+c3-c5-c7) */
+           MULTIPLY((JLONG)wsptr[1],  FIX_3_624509785);  /* sqrt(2) * ( c1+c3+c5+c7) */
+
+    /* Final output stage */
+
+    outptr[0] = range_limit[(int)DESCALE(tmp10 + tmp0,
+                                         CONST_BITS + PASS1_BITS + 3 + 2) &
+                            RANGE_MASK];
+    outptr[1] = range_limit[(int)DESCALE(tmp10 - tmp0,
+                                         CONST_BITS + PASS1_BITS + 3 + 2) &
+                            RANGE_MASK];
+
+    wsptr += DCTSIZE;           /* advance pointer to next row */
+  }
+}
+
+
+/*
+ * Perform dequantization and inverse DCT on one block of coefficients,
+ * producing a reduced-size 1x1 output block.
+ */
+
+GLOBAL(void)
+_jpeg_idct_1x1(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+               JCOEFPTR coef_block, _JSAMPARRAY output_buf,
+               JDIMENSION output_col)
+{
+  int dcval;
+  ISLOW_MULT_TYPE *quantptr;
+  _JSAMPLE *range_limit = IDCT_range_limit(cinfo);
+  SHIFT_TEMPS
+
+  /* We hardly need an inverse DCT routine for this: just take the
+   * average pixel value, which is one-eighth of the DC coefficient.
+   */
+  quantptr = (ISLOW_MULT_TYPE *)compptr->dct_table;
+  dcval = DEQUANTIZE(coef_block[0], quantptr[0]);
+  dcval = (int)DESCALE((JLONG)dcval, 3);
+
+  output_buf[0][output_col] = range_limit[dcval & RANGE_MASK];
+}
+
+#endif /* IDCT_SCALING_SUPPORTED */
diff --git a/thirdparty/libjpeg-turbo/src/jinclude.h b/thirdparty/libjpeg-turbo/src/jinclude.h
new file mode 100644
index 00000000000..56e7a4b296d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jinclude.h
@@ -0,0 +1,147 @@
+/*
+ * jinclude.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1994, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file exists to provide a single place to fix any problems with
+ * including the wrong system include files.  (Common problems are taken
+ * care of by the standard jconfig symbols, but on really weird systems
+ * you may have to edit this file.)
+ *
+ * NOTE: this file is NOT intended to be included by applications using the
+ * JPEG library.  Most applications need only include jpeglib.h.
+ */
+
+#ifndef __JINCLUDE_H__
+#define __JINCLUDE_H__
+
+/* Include auto-config file to find out which system include files we need. */
+
+#include "jconfig.h"            /* auto configuration options */
+#include "jconfigint.h"
+#define JCONFIG_INCLUDED        /* so that jpeglib.h doesn't do it again */
+
+/*
+ * Note that the core JPEG library does not require <stdio.h>;
+ * only the default error handler and data source/destination modules do.
+ * But we must pull it in because of the references to FILE in jpeglib.h.
+ * You can remove those references if you want to compile without <stdio.h>.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+/*
+ * These macros/inline functions facilitate using Microsoft's "safe string"
+ * functions with Visual Studio builds without the need to scatter #ifdefs
+ * throughout the code base.
+ */
+
+
+#ifdef _MSC_VER
+
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
+
+#else
+
+#define SNPRINTF  snprintf
+
+#endif
+
+
+#ifndef NO_GETENV
+
+#ifdef _MSC_VER
+
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  size_t required_size;
+
+  return (int)getenv_s(&required_size, buffer, buffer_size, name);
+}
+
+#else /* _MSC_VER */
+
+#include <errno.h>
+
+/* This provides a similar interface to the Microsoft/C11 getenv_s() function,
+ * but other than parameter validation, it has no advantages over getenv().
+ */
+
+static INLINE int GETENV_S(char *buffer, size_t buffer_size, const char *name)
+{
+  char *env;
+
+  if (!buffer) {
+    if (buffer_size == 0)
+      return 0;
+    else
+      return (errno = EINVAL);
+  }
+  if (buffer_size == 0)
+    return (errno = EINVAL);
+  if (!name) {
+    *buffer = 0;
+    return 0;
+  }
+
+  env = getenv(name);
+  if (!env)
+  {
+    *buffer = 0;
+    return 0;
+  }
+
+  if (strlen(env) + 1 > buffer_size) {
+    *buffer = 0;
+    return ERANGE;
+  }
+
+  strncpy(buffer, env, buffer_size);
+
+  return 0;
+}
+
+#endif /* _MSC_VER */
+
+#endif /* NO_GETENV */
+
+
+#ifndef NO_PUTENV
+
+#ifdef _WIN32
+
+#define PUTENV_S(name, value)  _putenv_s(name, value)
+
+#else
+
+#include <errno.h>
+
+/* This provides a similar interface to the Microsoft _putenv_s() function, but
+ * other than parameter validation, it has no advantages over setenv().
+ */
+
+static INLINE int PUTENV_S(const char *name, const char *value)
+{
+  if (!name || !value)
+    return (errno = EINVAL);
+
+  setenv(name, value, 1);
+
+  return errno;
+}
+
+#endif /* _WIN32 */
+
+#endif /* NO_PUTENV */
+
+
+#endif /* JINCLUDE_H */
diff --git a/thirdparty/libjpeg-turbo/src/jmemmgr.c b/thirdparty/libjpeg-turbo/src/jmemmgr.c
new file mode 100644
index 00000000000..eb199c8b3de
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jmemmgr.c
@@ -0,0 +1,1290 @@
+/*
+ * jmemmgr.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2016, 2021-2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains the JPEG system-independent memory management
+ * routines.  This code is usable across a wide variety of machines; most
+ * of the system dependencies have been isolated in a separate file.
+ * The major functions provided here are:
+ *   * pool-based allocation and freeing of memory;
+ *   * policy decisions about how to divide available memory among the
+ *     virtual arrays;
+ *   * control logic for swapping virtual arrays between main memory and
+ *     backing storage.
+ * The separate system-dependent file provides the actual backing-storage
+ * access code, and it contains the policy decision about how much total
+ * main memory to use.
+ * This file is system-dependent in the sense that some of its functions
+ * are unnecessary in some systems.  For example, if there is enough virtual
+ * memory so that backing storage will never be used, much of the virtual
+ * array control logic could be removed.  (Of course, if you have that much
+ * memory then you shouldn't care about a little bit of unused code...)
+ */
+
+#define JPEG_INTERNALS
+#define AM_MEMORY_MANAGER       /* we define jvirt_Xarray_control structs */
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"            /* import the system-dependent declarations */
+#if !defined(_MSC_VER) || _MSC_VER > 1600
+#include <stdint.h>
+#endif
+#include <limits.h>
+
+
+LOCAL(size_t)
+round_up_pow2(size_t a, size_t b)
+/* a rounded up to the next multiple of b, i.e. ceil(a/b)*b */
+/* Assumes a >= 0, b > 0, and b is a power of 2 */
+{
+  return ((a + b - 1) & (~(b - 1)));
+}
+
+
+/*
+ * Some important notes:
+ *   The allocation routines provided here must never return NULL.
+ *   They should exit to error_exit if unsuccessful.
+ *
+ *   It's not a good idea to try to merge the sarray and barray routines,
+ *   even though they are textually almost the same, because samples are
+ *   usually stored as bytes while coefficients are shorts or ints.  Thus,
+ *   in machines where byte pointers have a different representation from
+ *   word pointers, the resulting machine code could not be the same.
+ */
+
+
+/*
+ * Many machines require storage alignment: longs must start on 4-byte
+ * boundaries, doubles on 8-byte boundaries, etc.  On such machines, malloc()
+ * always returns pointers that are multiples of the worst-case alignment
+ * requirement, and we had better do so too.
+ * There isn't any really portable way to determine the worst-case alignment
+ * requirement.  This module assumes that the alignment requirement is
+ * multiples of ALIGN_SIZE.
+ * By default, we define ALIGN_SIZE as the maximum of sizeof(double) and
+ * sizeof(void *).  This is necessary on some workstations (where doubles
+ * really do need 8-byte alignment) and will work fine on nearly everything.
+ * We use the maximum of sizeof(double) and sizeof(void *) since sizeof(double)
+ * may be insufficient, for example, on CHERI-enabled platforms with 16-byte
+ * pointers and a 16-byte alignment requirement.  If your machine has lesser
+ * alignment needs, you can save a few bytes by making ALIGN_SIZE smaller.
+ * The only place I know of where this will NOT work is certain Macintosh
+ * 680x0 compilers that define double as a 10-byte IEEE extended float.
+ * Doing 10-byte alignment is counterproductive because longwords won't be
+ * aligned well.  Put "#define ALIGN_SIZE 4" in jconfig.h if you have
+ * such a compiler.
+ */
+
+#ifndef ALIGN_SIZE              /* so can override from jconfig.h */
+#ifndef WITH_SIMD
+#define ALIGN_SIZE  MAX(sizeof(void *), sizeof(double))
+#else
+#define ALIGN_SIZE  32 /* Most of the SIMD instructions we support require
+                          16-byte (128-bit) alignment, but AVX2 requires
+                          32-byte alignment. */
+#endif
+#endif
+
+/*
+ * We allocate objects from "pools", where each pool is gotten with a single
+ * request to jpeg_get_small() or jpeg_get_large().  There is no per-object
+ * overhead within a pool, except for alignment padding.  Each pool has a
+ * header with a link to the next pool of the same class.
+ * Small and large pool headers are identical.
+ */
+
+typedef struct small_pool_struct *small_pool_ptr;
+
+typedef struct small_pool_struct {
+  small_pool_ptr next;          /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
+} small_pool_hdr;
+
+typedef struct large_pool_struct *large_pool_ptr;
+
+typedef struct large_pool_struct {
+  large_pool_ptr next;          /* next in list of pools */
+  size_t bytes_used;            /* how many bytes already used within pool */
+  size_t bytes_left;            /* bytes still available in this pool */
+} large_pool_hdr;
+
+/*
+ * Here is the full definition of a memory manager object.
+ */
+
+typedef struct {
+  struct jpeg_memory_mgr pub;   /* public fields */
+
+  /* Each pool identifier (lifetime class) names a linked list of pools. */
+  small_pool_ptr small_list[JPOOL_NUMPOOLS];
+  large_pool_ptr large_list[JPOOL_NUMPOOLS];
+
+  /* Since we only have one lifetime class of virtual arrays, only one
+   * linked list is necessary (for each datatype).  Note that the virtual
+   * array control blocks being linked together are actually stored somewhere
+   * in the small-pool list.
+   */
+  jvirt_sarray_ptr virt_sarray_list;
+  jvirt_barray_ptr virt_barray_list;
+
+  /* This counts total space obtained from jpeg_get_small/large */
+  size_t total_space_allocated;
+
+  /* alloc_sarray and alloc_barray set this value for use by virtual
+   * array routines.
+   */
+  JDIMENSION last_rowsperchunk; /* from most recent alloc_sarray/barray */
+} my_memory_mgr;
+
+typedef my_memory_mgr *my_mem_ptr;
+
+
+/*
+ * The control blocks for virtual arrays.
+ * Note that these blocks are allocated in the "small" pool area.
+ * System-dependent info for the associated backing store (if any) is hidden
+ * inside the backing_store_info struct.
+ */
+
+struct jvirt_sarray_control {
+  JSAMPARRAY mem_buffer;        /* => the in-memory buffer (if
+                                   cinfo->data_precision > 8, then this is
+                                   actually a J12SAMPARRAY or a
+                                   J16SAMPARRAY) */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION samplesperrow;     /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_sarray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_sarray_ptr next;        /* link to next virtual sarray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
+};
+
+struct jvirt_barray_control {
+  JBLOCKARRAY mem_buffer;       /* => the in-memory buffer */
+  JDIMENSION rows_in_array;     /* total virtual array height */
+  JDIMENSION blocksperrow;      /* width of array (and of memory buffer) */
+  JDIMENSION maxaccess;         /* max rows accessed by access_virt_barray */
+  JDIMENSION rows_in_mem;       /* height of memory buffer */
+  JDIMENSION rowsperchunk;      /* allocation chunk size in mem_buffer */
+  JDIMENSION cur_start_row;     /* first logical row # in the buffer */
+  JDIMENSION first_undef_row;   /* row # of first uninitialized row */
+  boolean pre_zero;             /* pre-zero mode requested? */
+  boolean dirty;                /* do current buffer contents need written? */
+  boolean b_s_open;             /* is backing-store data valid? */
+  jvirt_barray_ptr next;        /* link to next virtual barray control block */
+  backing_store_info b_s_info;  /* System-dependent control info */
+};
+
+
+#ifdef MEM_STATS                /* optional extra stuff for statistics */
+
+LOCAL(void)
+print_mem_stats(j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+
+  /* Since this is only a debugging stub, we can cheat a little by using
+   * fprintf directly rather than going through the trace message code.
+   * This is helpful because message parm array can't handle longs.
+   */
+  fprintf(stderr, "Freeing pool %d, total space = %ld\n",
+          pool_id, mem->total_space_allocated);
+
+  for (lhdr_ptr = mem->large_list[pool_id]; lhdr_ptr != NULL;
+       lhdr_ptr = lhdr_ptr->next) {
+    fprintf(stderr, "  Large chunk used %ld\n", (long)lhdr_ptr->bytes_used);
+  }
+
+  for (shdr_ptr = mem->small_list[pool_id]; shdr_ptr != NULL;
+       shdr_ptr = shdr_ptr->next) {
+    fprintf(stderr, "  Small chunk used %ld free %ld\n",
+            (long)shdr_ptr->bytes_used, (long)shdr_ptr->bytes_left);
+  }
+}
+
+#endif /* MEM_STATS */
+
+
+LOCAL(void)
+out_of_memory(j_common_ptr cinfo, int which)
+/* Report an out-of-memory error and stop execution */
+/* If we compiled MEM_STATS support, report alloc requests before dying */
+{
+#ifdef MEM_STATS
+  cinfo->err->trace_level = 2;  /* force self_destruct to report stats */
+#endif
+  ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, which);
+}
+
+
+/*
+ * Allocation of "small" objects.
+ *
+ * For these, we use pooled storage.  When a new pool must be created,
+ * we try to get enough space for the current request plus a "slop" factor,
+ * where the slop will be the amount of leftover space in the new pool.
+ * The speed vs. space tradeoff is largely determined by the slop values.
+ * A different slop value is provided for each pool class (lifetime),
+ * and we also distinguish the first pool of a class from later ones.
+ * NOTE: the values given work fairly well on both 16- and 32-bit-int
+ * machines, but may be too small if longs are 64 bits or more.
+ *
+ * Since we do not know what alignment malloc() gives us, we have to
+ * allocate ALIGN_SIZE-1 extra space per pool to have room for alignment
+ * adjustment.
+ */
+
+static const size_t first_pool_slop[JPOOL_NUMPOOLS] = {
+  1600,                         /* first PERMANENT pool */
+  16000                         /* first IMAGE pool */
+};
+
+static const size_t extra_pool_slop[JPOOL_NUMPOOLS] = {
+  0,                            /* additional PERMANENT pools */
+  5000                          /* additional IMAGE pools */
+};
+
+#define MIN_SLOP  50            /* greater than 0 to avoid futile looping */
+
+
+METHODDEF(void *)
+alloc_small(j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "small" object */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  small_pool_ptr hdr_ptr, prev_hdr_ptr;
+  char *data_ptr;
+  size_t min_request, slop;
+
+  /*
+   * Round up the requested size to a multiple of ALIGN_SIZE in order
+   * to assure alignment for the next object allocated in the same pool
+   * and so that algorithms can straddle outside the proper area up
+   * to the next alignment.
+   */
+  if (sizeofobject > MAX_ALLOC_CHUNK) {
+    /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject
+       is close to SIZE_MAX. */
+    out_of_memory(cinfo, 7);
+  }
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if ((sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) >
+      MAX_ALLOC_CHUNK)
+    out_of_memory(cinfo, 1);    /* request exceeds malloc's ability */
+
+  /* See if space is available in any existing pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
+  prev_hdr_ptr = NULL;
+  hdr_ptr = mem->small_list[pool_id];
+  while (hdr_ptr != NULL) {
+    if (hdr_ptr->bytes_left >= sizeofobject)
+      break;                    /* found pool with enough space */
+    prev_hdr_ptr = hdr_ptr;
+    hdr_ptr = hdr_ptr->next;
+  }
+
+  /* Time to make a new pool? */
+  if (hdr_ptr == NULL) {
+    /* min_request is what we need now, slop is what will be leftover */
+    min_request = sizeof(small_pool_hdr) + sizeofobject + ALIGN_SIZE - 1;
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
+      slop = first_pool_slop[pool_id];
+    else
+      slop = extra_pool_slop[pool_id];
+    /* Don't ask for more than MAX_ALLOC_CHUNK */
+    if (slop > (size_t)(MAX_ALLOC_CHUNK - min_request))
+      slop = (size_t)(MAX_ALLOC_CHUNK - min_request);
+    /* Try to get space, if fail reduce slop and try again */
+    for (;;) {
+      hdr_ptr = (small_pool_ptr)jpeg_get_small(cinfo, min_request + slop);
+      if (hdr_ptr != NULL)
+        break;
+      slop /= 2;
+      if (slop < MIN_SLOP)      /* give up when it gets real small */
+        out_of_memory(cinfo, 2); /* jpeg_get_small failed */
+    }
+    mem->total_space_allocated += min_request + slop;
+    /* Success, initialize the new pool header and add to end of list */
+    hdr_ptr->next = NULL;
+    hdr_ptr->bytes_used = 0;
+    hdr_ptr->bytes_left = sizeofobject + slop;
+    if (prev_hdr_ptr == NULL)   /* first pool in class? */
+      mem->small_list[pool_id] = hdr_ptr;
+    else
+      prev_hdr_ptr->next = hdr_ptr;
+  }
+
+  /* OK, allocate the object from the current pool */
+  data_ptr = (char *)hdr_ptr; /* point to first data byte in pool... */
+  data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */
+  if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
+    data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
+  data_ptr += hdr_ptr->bytes_used; /* point to place for object */
+  hdr_ptr->bytes_used += sizeofobject;
+  hdr_ptr->bytes_left -= sizeofobject;
+
+  return (void *)data_ptr;
+}
+
+
+/*
+ * Allocation of "large" objects.
+ *
+ * The external semantics of these are the same as "small" objects.  However,
+ * the pool management heuristics are quite different.  We assume that each
+ * request is large enough that it may as well be passed directly to
+ * jpeg_get_large; the pool management just links everything together
+ * so that we can free it all on demand.
+ * Note: the major use of "large" objects is in
+ * JSAMPARRAY/J12SAMPARRAY/J16SAMPARRAY and JBLOCKARRAY structures.  The
+ * routines that create these structures (see below) deliberately bunch rows
+ * together to ensure a large request size.
+ */
+
+METHODDEF(void *)
+alloc_large(j_common_ptr cinfo, int pool_id, size_t sizeofobject)
+/* Allocate a "large" object */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  large_pool_ptr hdr_ptr;
+  char *data_ptr;
+
+  /*
+   * Round up the requested size to a multiple of ALIGN_SIZE so that
+   * algorithms can straddle outside the proper area up to the next
+   * alignment.
+   */
+  if (sizeofobject > MAX_ALLOC_CHUNK) {
+    /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject
+       is close to SIZE_MAX. */
+    out_of_memory(cinfo, 8);
+  }
+  sizeofobject = round_up_pow2(sizeofobject, ALIGN_SIZE);
+
+  /* Check for unsatisfiable request (do now to ensure no overflow below) */
+  if ((sizeof(large_pool_hdr) + sizeofobject + ALIGN_SIZE - 1) >
+      MAX_ALLOC_CHUNK)
+    out_of_memory(cinfo, 3);    /* request exceeds malloc's ability */
+
+  /* Always make a new pool */
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
+
+  hdr_ptr = (large_pool_ptr)jpeg_get_large(cinfo, sizeofobject +
+                                           sizeof(large_pool_hdr) +
+                                           ALIGN_SIZE - 1);
+  if (hdr_ptr == NULL)
+    out_of_memory(cinfo, 4);    /* jpeg_get_large failed */
+  mem->total_space_allocated += sizeofobject + sizeof(large_pool_hdr) +
+                                ALIGN_SIZE - 1;
+
+  /* Success, initialize the new pool header and add to list */
+  hdr_ptr->next = mem->large_list[pool_id];
+  /* We maintain space counts in each pool header for statistical purposes,
+   * even though they are not needed for allocation.
+   */
+  hdr_ptr->bytes_used = sizeofobject;
+  hdr_ptr->bytes_left = 0;
+  mem->large_list[pool_id] = hdr_ptr;
+
+  data_ptr = (char *)hdr_ptr; /* point to first data byte in pool... */
+  data_ptr += sizeof(small_pool_hdr); /* ...by skipping the header... */
+  if ((size_t)data_ptr % ALIGN_SIZE) /* ...and adjust for alignment */
+    data_ptr += ALIGN_SIZE - (size_t)data_ptr % ALIGN_SIZE;
+
+  return (void *)data_ptr;
+}
+
+
+/*
+ * Creation of 2-D sample arrays.
+ *
+ * To minimize allocation overhead and to allow I/O of large contiguous
+ * blocks, we allocate the sample rows in groups of as many rows as possible
+ * without exceeding MAX_ALLOC_CHUNK total bytes per allocation request.
+ * NB: the virtual array control routines, later in this file, know about
+ * this chunking of rows.  The rowsperchunk value is left in the mem manager
+ * object so that it can be saved away if this sarray is the workspace for
+ * a virtual array.
+ *
+ * Since we are often upsampling with a factor 2, we align the size (not
+ * the start) to 2 * ALIGN_SIZE so that the upsampling routines don't have
+ * to be as careful about size.
+ */
+
+METHODDEF(JSAMPARRAY)
+alloc_sarray(j_common_ptr cinfo, int pool_id, JDIMENSION samplesperrow,
+             JDIMENSION numrows)
+/* Allocate a 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  JSAMPARRAY result;
+  JSAMPROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+  J12SAMPARRAY result12;
+  J12SAMPROW workspace12;
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+  J16SAMPARRAY result16;
+  J16SAMPROW workspace16;
+#endif
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision > 12 ?
+                       sizeof(J16SAMPLE) : (data_precision > 8 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
+
+  /* Make sure each row is properly aligned */
+  if ((ALIGN_SIZE % sample_size) != 0)
+    out_of_memory(cinfo, 5);    /* safety check */
+
+  if (samplesperrow > MAX_ALLOC_CHUNK) {
+    /* This prevents overflow/wrap-around in round_up_pow2() if sizeofobject
+       is close to SIZE_MAX. */
+    out_of_memory(cinfo, 9);
+  }
+  samplesperrow = (JDIMENSION)round_up_pow2(samplesperrow, (2 * ALIGN_SIZE) /
+                                                           sample_size);
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) /
+          ((long)samplesperrow * (long)sample_size);
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long)numrows)
+    rowsperchunk = (JDIMENSION)ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  if (data_precision <= 8) {
+    /* Get space for row pointers (small object) */
+    result = (JSAMPARRAY)alloc_small(cinfo, pool_id,
+                                     (size_t)(numrows * sizeof(JSAMPROW)));
+
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace = (JSAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result[currow++] = workspace;
+        workspace += samplesperrow;
+      }
+    }
+
+    return result;
+  } else if (data_precision <= 12) {
+    /* Get space for row pointers (small object) */
+    result12 = (J12SAMPARRAY)alloc_small(cinfo, pool_id,
+                                         (size_t)(numrows *
+                                                  sizeof(J12SAMPROW)));
+
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace12 = (J12SAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result12[currow++] = workspace12;
+        workspace12 += samplesperrow;
+      }
+    }
+
+    return (JSAMPARRAY)result12;
+  } else {
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+    /* Get space for row pointers (small object) */
+    result16 = (J16SAMPARRAY)alloc_small(cinfo, pool_id,
+                                         (size_t)(numrows *
+                                                  sizeof(J16SAMPROW)));
+
+    /* Get the rows themselves (large objects) */
+    currow = 0;
+    while (currow < numrows) {
+      rowsperchunk = MIN(rowsperchunk, numrows - currow);
+      workspace16 = (J16SAMPROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)samplesperrow * sample_size));
+      for (i = rowsperchunk; i > 0; i--) {
+        result16[currow++] = workspace16;
+        workspace16 += samplesperrow;
+      }
+    }
+
+    return (JSAMPARRAY)result16;
+#else
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
+    return NULL;
+#endif
+  }
+}
+
+
+/*
+ * Creation of 2-D coefficient-block arrays.
+ * This is essentially the same as the code for sample arrays, above.
+ */
+
+METHODDEF(JBLOCKARRAY)
+alloc_barray(j_common_ptr cinfo, int pool_id, JDIMENSION blocksperrow,
+             JDIMENSION numrows)
+/* Allocate a 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  JBLOCKARRAY result;
+  JBLOCKROW workspace;
+  JDIMENSION rowsperchunk, currow, i;
+  long ltemp;
+
+  /* Make sure each row is properly aligned */
+  if ((sizeof(JBLOCK) % ALIGN_SIZE) != 0)
+    out_of_memory(cinfo, 6);    /* safety check */
+
+  /* Calculate max # of rows allowed in one allocation chunk */
+  ltemp = (MAX_ALLOC_CHUNK - sizeof(large_pool_hdr)) /
+          ((long)blocksperrow * sizeof(JBLOCK));
+  if (ltemp <= 0)
+    ERREXIT(cinfo, JERR_WIDTH_OVERFLOW);
+  if (ltemp < (long)numrows)
+    rowsperchunk = (JDIMENSION)ltemp;
+  else
+    rowsperchunk = numrows;
+  mem->last_rowsperchunk = rowsperchunk;
+
+  /* Get space for row pointers (small object) */
+  result = (JBLOCKARRAY)alloc_small(cinfo, pool_id,
+                                    (size_t)(numrows * sizeof(JBLOCKROW)));
+
+  /* Get the rows themselves (large objects) */
+  currow = 0;
+  while (currow < numrows) {
+    rowsperchunk = MIN(rowsperchunk, numrows - currow);
+    workspace = (JBLOCKROW)alloc_large(cinfo, pool_id,
+        (size_t)((size_t)rowsperchunk * (size_t)blocksperrow *
+                  sizeof(JBLOCK)));
+    for (i = rowsperchunk; i > 0; i--) {
+      result[currow++] = workspace;
+      workspace += blocksperrow;
+    }
+  }
+
+  return result;
+}
+
+
+/*
+ * About virtual array management:
+ *
+ * The above "normal" array routines are only used to allocate strip buffers
+ * (as wide as the image, but just a few rows high).  Full-image-sized buffers
+ * are handled as "virtual" arrays.  The array is still accessed a strip at a
+ * time, but the memory manager must save the whole array for repeated
+ * accesses.  The intended implementation is that there is a strip buffer in
+ * memory (as high as is possible given the desired memory limit), plus a
+ * backing file that holds the rest of the array.
+ *
+ * The request_virt_array routines are told the total size of the image and
+ * the maximum number of rows that will be accessed at once.  The in-memory
+ * buffer must be at least as large as the maxaccess value.
+ *
+ * The request routines create control blocks but not the in-memory buffers.
+ * That is postponed until realize_virt_arrays is called.  At that time the
+ * total amount of space needed is known (approximately, anyway), so free
+ * memory can be divided up fairly.
+ *
+ * The access_virt_array routines are responsible for making a specific strip
+ * area accessible (after reading or writing the backing file, if necessary).
+ * Note that the access routines are told whether the caller intends to modify
+ * the accessed strip; during a read-only pass this saves having to rewrite
+ * data to disk.  The access routines are also responsible for pre-zeroing
+ * any newly accessed rows, if pre-zeroing was requested.
+ *
+ * In current usage, the access requests are usually for nonoverlapping
+ * strips; that is, successive access start_row numbers differ by exactly
+ * num_rows = maxaccess.  This means we can get good performance with simple
+ * buffer dump/reload logic, by making the in-memory buffer be a multiple
+ * of the access height; then there will never be accesses across bufferload
+ * boundaries.  The code will still work with overlapping access requests,
+ * but it doesn't handle bufferload overlaps very efficiently.
+ */
+
+
+METHODDEF(jvirt_sarray_ptr)
+request_virt_sarray(j_common_ptr cinfo, int pool_id, boolean pre_zero,
+                    JDIMENSION samplesperrow, JDIMENSION numrows,
+                    JDIMENSION maxaccess)
+/* Request a virtual 2-D sample array */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  jvirt_sarray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
+
+  /* get control block */
+  result = (jvirt_sarray_ptr)alloc_small(cinfo, pool_id,
+                                         sizeof(struct jvirt_sarray_control));
+
+  result->mem_buffer = NULL;    /* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->samplesperrow = samplesperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;     /* no associated backing-store object */
+  result->next = mem->virt_sarray_list; /* add to list of virtual arrays */
+  mem->virt_sarray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(jvirt_barray_ptr)
+request_virt_barray(j_common_ptr cinfo, int pool_id, boolean pre_zero,
+                    JDIMENSION blocksperrow, JDIMENSION numrows,
+                    JDIMENSION maxaccess)
+/* Request a virtual 2-D coefficient-block array */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  jvirt_barray_ptr result;
+
+  /* Only IMAGE-lifetime virtual arrays are currently supported */
+  if (pool_id != JPOOL_IMAGE)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
+
+  /* get control block */
+  result = (jvirt_barray_ptr)alloc_small(cinfo, pool_id,
+                                         sizeof(struct jvirt_barray_control));
+
+  result->mem_buffer = NULL;    /* marks array not yet realized */
+  result->rows_in_array = numrows;
+  result->blocksperrow = blocksperrow;
+  result->maxaccess = maxaccess;
+  result->pre_zero = pre_zero;
+  result->b_s_open = FALSE;     /* no associated backing-store object */
+  result->next = mem->virt_barray_list; /* add to list of virtual arrays */
+  mem->virt_barray_list = result;
+
+  return result;
+}
+
+
+METHODDEF(void)
+realize_virt_arrays(j_common_ptr cinfo)
+/* Allocate the in-memory buffers for any unrealized virtual arrays */
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  size_t space_per_minheight, maximum_space, avail_mem;
+  size_t minheights, max_minheights;
+  jvirt_sarray_ptr sptr;
+  jvirt_barray_ptr bptr;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision > 12 ?
+                       sizeof(J16SAMPLE) : (data_precision > 8 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
+
+  /* Compute the minimum space needed (maxaccess rows in each buffer)
+   * and the maximum space needed (full image height in each buffer).
+   * These may be of use to the system-dependent jpeg_mem_available routine.
+   */
+  space_per_minheight = 0;
+  maximum_space = 0;
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      size_t new_space = (long)sptr->rows_in_array *
+                         (long)sptr->samplesperrow * sample_size;
+
+      space_per_minheight += (long)sptr->maxaccess *
+                             (long)sptr->samplesperrow * sample_size;
+      if (SIZE_MAX - maximum_space < new_space)
+        out_of_memory(cinfo, 10);
+      maximum_space += new_space;
+    }
+  }
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      size_t new_space = (long)bptr->rows_in_array *
+                         (long)bptr->blocksperrow * sizeof(JBLOCK);
+
+      space_per_minheight += (long)bptr->maxaccess *
+                             (long)bptr->blocksperrow * sizeof(JBLOCK);
+      if (SIZE_MAX - maximum_space < new_space)
+        out_of_memory(cinfo, 11);
+      maximum_space += new_space;
+    }
+  }
+
+  if (space_per_minheight <= 0)
+    return;                     /* no unrealized arrays, no work */
+
+  /* Determine amount of memory to actually use; this is system-dependent. */
+  avail_mem = jpeg_mem_available(cinfo, space_per_minheight, maximum_space,
+                                 mem->total_space_allocated);
+
+  /* If the maximum space needed is available, make all the buffers full
+   * height; otherwise parcel it out with the same number of minheights
+   * in each buffer.
+   */
+  if (avail_mem >= maximum_space)
+    max_minheights = 1000000000L;
+  else {
+    max_minheights = avail_mem / space_per_minheight;
+    /* If there doesn't seem to be enough space, try to get the minimum
+     * anyway.  This allows a "stub" implementation of jpeg_mem_available().
+     */
+    if (max_minheights <= 0)
+      max_minheights = 1;
+  }
+
+  /* Allocate the in-memory buffers and initialize backing store as needed. */
+
+  for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+    if (sptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long)sptr->rows_in_array - 1L) / sptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+        /* This buffer fits in memory */
+        sptr->rows_in_mem = sptr->rows_in_array;
+      } else {
+        /* It doesn't fit in memory, create backing store. */
+        sptr->rows_in_mem = (JDIMENSION)(max_minheights * sptr->maxaccess);
+        jpeg_open_backing_store(cinfo, &sptr->b_s_info,
+                                (long)sptr->rows_in_array *
+                                (long)sptr->samplesperrow *
+                                (long)sample_size);
+        sptr->b_s_open = TRUE;
+      }
+      sptr->mem_buffer = alloc_sarray(cinfo, JPOOL_IMAGE,
+                                      sptr->samplesperrow, sptr->rows_in_mem);
+      sptr->rowsperchunk = mem->last_rowsperchunk;
+      sptr->cur_start_row = 0;
+      sptr->first_undef_row = 0;
+      sptr->dirty = FALSE;
+    }
+  }
+
+  for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+    if (bptr->mem_buffer == NULL) { /* if not realized yet */
+      minheights = ((long)bptr->rows_in_array - 1L) / bptr->maxaccess + 1L;
+      if (minheights <= max_minheights) {
+        /* This buffer fits in memory */
+        bptr->rows_in_mem = bptr->rows_in_array;
+      } else {
+        /* It doesn't fit in memory, create backing store. */
+        bptr->rows_in_mem = (JDIMENSION)(max_minheights * bptr->maxaccess);
+        jpeg_open_backing_store(cinfo, &bptr->b_s_info,
+                                (long)bptr->rows_in_array *
+                                (long)bptr->blocksperrow *
+                                (long)sizeof(JBLOCK));
+        bptr->b_s_open = TRUE;
+      }
+      bptr->mem_buffer = alloc_barray(cinfo, JPOOL_IMAGE,
+                                      bptr->blocksperrow, bptr->rows_in_mem);
+      bptr->rowsperchunk = mem->last_rowsperchunk;
+      bptr->cur_start_row = 0;
+      bptr->first_undef_row = 0;
+      bptr->dirty = FALSE;
+    }
+  }
+}
+
+
+LOCAL(void)
+do_sarray_io(j_common_ptr cinfo, jvirt_sarray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual sample array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision > 12 ?
+                       sizeof(J16SAMPLE) : (data_precision > 8 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
+
+  bytesperrow = (long)ptr->samplesperrow * (long)sample_size;
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long)ptr->rowsperchunk, (long)ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long)ptr->cur_start_row + i;
+    rows = MIN(rows, (long)ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long)ptr->rows_in_array - thisrow);
+    if (rows <= 0)              /* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (data_precision <= 8) {
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)ptr->mem_buffer[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)ptr->mem_buffer[i],
+                                             file_offset, byte_count);
+    } else if (data_precision <= 12) {
+      J12SAMPARRAY mem_buffer12 = (J12SAMPARRAY)ptr->mem_buffer;
+
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)mem_buffer12[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)mem_buffer12[i],
+                                             file_offset, byte_count);
+    } else {
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+      J16SAMPARRAY mem_buffer16 = (J16SAMPARRAY)ptr->mem_buffer;
+
+      if (writing)
+        (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                              (void *)mem_buffer16[i],
+                                              file_offset, byte_count);
+      else
+        (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                             (void *)mem_buffer16[i],
+                                             file_offset, byte_count);
+#else
+      ERREXIT1(cinfo, JERR_BAD_PRECISION, data_precision);
+#endif
+    }
+    file_offset += byte_count;
+  }
+}
+
+
+LOCAL(void)
+do_barray_io(j_common_ptr cinfo, jvirt_barray_ptr ptr, boolean writing)
+/* Do backing store read or write of a virtual coefficient-block array */
+{
+  long bytesperrow, file_offset, byte_count, rows, thisrow, i;
+
+  bytesperrow = (long)ptr->blocksperrow * sizeof(JBLOCK);
+  file_offset = ptr->cur_start_row * bytesperrow;
+  /* Loop to read or write each allocation chunk in mem_buffer */
+  for (i = 0; i < (long)ptr->rows_in_mem; i += ptr->rowsperchunk) {
+    /* One chunk, but check for short chunk at end of buffer */
+    rows = MIN((long)ptr->rowsperchunk, (long)ptr->rows_in_mem - i);
+    /* Transfer no more than is currently defined */
+    thisrow = (long)ptr->cur_start_row + i;
+    rows = MIN(rows, (long)ptr->first_undef_row - thisrow);
+    /* Transfer no more than fits in file */
+    rows = MIN(rows, (long)ptr->rows_in_array - thisrow);
+    if (rows <= 0)              /* this chunk might be past end of file! */
+      break;
+    byte_count = rows * bytesperrow;
+    if (writing)
+      (*ptr->b_s_info.write_backing_store) (cinfo, &ptr->b_s_info,
+                                            (void *)ptr->mem_buffer[i],
+                                            file_offset, byte_count);
+    else
+      (*ptr->b_s_info.read_backing_store) (cinfo, &ptr->b_s_info,
+                                           (void *)ptr->mem_buffer[i],
+                                           file_offset, byte_count);
+    file_offset += byte_count;
+  }
+}
+
+
+METHODDEF(JSAMPARRAY)
+access_virt_sarray(j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+                   JDIMENSION start_row, JDIMENSION num_rows, boolean writable)
+/* Access the part of a virtual sample array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+  int data_precision = cinfo->is_decompressor ?
+                        ((j_decompress_ptr)cinfo)->data_precision :
+                        ((j_compress_ptr)cinfo)->data_precision;
+  size_t sample_size = data_precision > 12 ?
+                       sizeof(J16SAMPLE) : (data_precision > 8 ?
+                                            sizeof(J12SAMPLE) :
+                                            sizeof(JSAMPLE));
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
+    if (!ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_sarray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long)end_row - (long)ptr->rows_in_mem;
+      if (ltemp < 0)
+        ltemp = 0;              /* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION)ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_sarray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t)ptr->samplesperrow * sample_size;
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+        jzero_far((void *)ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
+      }
+    } else {
+      if (!writable)            /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+METHODDEF(JBLOCKARRAY)
+access_virt_barray(j_common_ptr cinfo, jvirt_barray_ptr ptr,
+                   JDIMENSION start_row, JDIMENSION num_rows, boolean writable)
+/* Access the part of a virtual block array starting at start_row */
+/* and extending for num_rows rows.  writable is true if  */
+/* caller intends to modify the accessed area. */
+{
+  JDIMENSION end_row = start_row + num_rows;
+  JDIMENSION undef_row;
+
+  /* debugging check */
+  if (end_row > ptr->rows_in_array || num_rows > ptr->maxaccess ||
+      ptr->mem_buffer == NULL)
+    ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+
+  /* Make the desired part of the virtual array accessible */
+  if (start_row < ptr->cur_start_row ||
+      end_row > ptr->cur_start_row + ptr->rows_in_mem) {
+    if (!ptr->b_s_open)
+      ERREXIT(cinfo, JERR_VIRTUAL_BUG);
+    /* Flush old buffer contents if necessary */
+    if (ptr->dirty) {
+      do_barray_io(cinfo, ptr, TRUE);
+      ptr->dirty = FALSE;
+    }
+    /* Decide what part of virtual array to access.
+     * Algorithm: if target address > current window, assume forward scan,
+     * load starting at target address.  If target address < current window,
+     * assume backward scan, load so that target area is top of window.
+     * Note that when switching from forward write to forward read, will have
+     * start_row = 0, so the limiting case applies and we load from 0 anyway.
+     */
+    if (start_row > ptr->cur_start_row) {
+      ptr->cur_start_row = start_row;
+    } else {
+      /* use long arithmetic here to avoid overflow & unsigned problems */
+      long ltemp;
+
+      ltemp = (long)end_row - (long)ptr->rows_in_mem;
+      if (ltemp < 0)
+        ltemp = 0;              /* don't fall off front end of file */
+      ptr->cur_start_row = (JDIMENSION)ltemp;
+    }
+    /* Read in the selected part of the array.
+     * During the initial write pass, we will do no actual read
+     * because the selected part is all undefined.
+     */
+    do_barray_io(cinfo, ptr, FALSE);
+  }
+  /* Ensure the accessed part of the array is defined; prezero if needed.
+   * To improve locality of access, we only prezero the part of the array
+   * that the caller is about to access, not the entire in-memory array.
+   */
+  if (ptr->first_undef_row < end_row) {
+    if (ptr->first_undef_row < start_row) {
+      if (writable)             /* writer skipped over a section of array */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+      undef_row = start_row;    /* but reader is allowed to read ahead */
+    } else {
+      undef_row = ptr->first_undef_row;
+    }
+    if (writable)
+      ptr->first_undef_row = end_row;
+    if (ptr->pre_zero) {
+      size_t bytesperrow = (size_t)ptr->blocksperrow * sizeof(JBLOCK);
+      undef_row -= ptr->cur_start_row; /* make indexes relative to buffer */
+      end_row -= ptr->cur_start_row;
+      while (undef_row < end_row) {
+        jzero_far((void *)ptr->mem_buffer[undef_row], bytesperrow);
+        undef_row++;
+      }
+    } else {
+      if (!writable)            /* reader looking at undefined data */
+        ERREXIT(cinfo, JERR_BAD_VIRTUAL_ACCESS);
+    }
+  }
+  /* Flag the buffer dirty if caller will write in it */
+  if (writable)
+    ptr->dirty = TRUE;
+  /* Return address of proper part of the buffer */
+  return ptr->mem_buffer + (start_row - ptr->cur_start_row);
+}
+
+
+/*
+ * Release all objects belonging to a specified pool.
+ */
+
+METHODDEF(void)
+free_pool(j_common_ptr cinfo, int pool_id)
+{
+  my_mem_ptr mem = (my_mem_ptr)cinfo->mem;
+  small_pool_ptr shdr_ptr;
+  large_pool_ptr lhdr_ptr;
+  size_t space_freed;
+
+  if (pool_id < 0 || pool_id >= JPOOL_NUMPOOLS)
+    ERREXIT1(cinfo, JERR_BAD_POOL_ID, pool_id); /* safety check */
+
+#ifdef MEM_STATS
+  if (cinfo->err->trace_level > 1)
+    print_mem_stats(cinfo, pool_id); /* print pool's memory usage statistics */
+#endif
+
+  /* If freeing IMAGE pool, close any virtual arrays first */
+  if (pool_id == JPOOL_IMAGE) {
+    jvirt_sarray_ptr sptr;
+    jvirt_barray_ptr bptr;
+
+    for (sptr = mem->virt_sarray_list; sptr != NULL; sptr = sptr->next) {
+      if (sptr->b_s_open) {     /* there may be no backing store */
+        sptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*sptr->b_s_info.close_backing_store) (cinfo, &sptr->b_s_info);
+      }
+    }
+    mem->virt_sarray_list = NULL;
+    for (bptr = mem->virt_barray_list; bptr != NULL; bptr = bptr->next) {
+      if (bptr->b_s_open) {     /* there may be no backing store */
+        bptr->b_s_open = FALSE; /* prevent recursive close if error */
+        (*bptr->b_s_info.close_backing_store) (cinfo, &bptr->b_s_info);
+      }
+    }
+    mem->virt_barray_list = NULL;
+  }
+
+  /* Release large objects */
+  lhdr_ptr = mem->large_list[pool_id];
+  mem->large_list[pool_id] = NULL;
+
+  while (lhdr_ptr != NULL) {
+    large_pool_ptr next_lhdr_ptr = lhdr_ptr->next;
+    space_freed = lhdr_ptr->bytes_used +
+                  lhdr_ptr->bytes_left +
+                  sizeof(large_pool_hdr) + ALIGN_SIZE - 1;
+    jpeg_free_large(cinfo, (void *)lhdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    lhdr_ptr = next_lhdr_ptr;
+  }
+
+  /* Release small objects */
+  shdr_ptr = mem->small_list[pool_id];
+  mem->small_list[pool_id] = NULL;
+
+  while (shdr_ptr != NULL) {
+    small_pool_ptr next_shdr_ptr = shdr_ptr->next;
+    space_freed = shdr_ptr->bytes_used + shdr_ptr->bytes_left +
+                  sizeof(small_pool_hdr) + ALIGN_SIZE - 1;
+    jpeg_free_small(cinfo, (void *)shdr_ptr, space_freed);
+    mem->total_space_allocated -= space_freed;
+    shdr_ptr = next_shdr_ptr;
+  }
+}
+
+
+/*
+ * Close up shop entirely.
+ * Note that this cannot be called unless cinfo->mem is non-NULL.
+ */
+
+METHODDEF(void)
+self_destruct(j_common_ptr cinfo)
+{
+  int pool;
+
+  /* Close all backing store, release all memory.
+   * Releasing pools in reverse order might help avoid fragmentation
+   * with some (brain-damaged) malloc libraries.
+   */
+  for (pool = JPOOL_NUMPOOLS - 1; pool >= JPOOL_PERMANENT; pool--) {
+    free_pool(cinfo, pool);
+  }
+
+  /* Release the memory manager control block too. */
+  jpeg_free_small(cinfo, (void *)cinfo->mem, sizeof(my_memory_mgr));
+  cinfo->mem = NULL;            /* ensures I will be called only once */
+
+  jpeg_mem_term(cinfo);         /* system-dependent cleanup */
+}
+
+
+/*
+ * Memory manager initialization.
+ * When this is called, only the error manager pointer is valid in cinfo!
+ */
+
+GLOBAL(void)
+jinit_memory_mgr(j_common_ptr cinfo)
+{
+  my_mem_ptr mem;
+  long max_to_use;
+  int pool;
+  size_t test_mac;
+
+  cinfo->mem = NULL;            /* for safety if init fails */
+
+  /* Check for configuration errors.
+   * sizeof(ALIGN_TYPE) should be a power of 2; otherwise, it probably
+   * doesn't reflect any real hardware alignment requirement.
+   * The test is a little tricky: for X>0, X and X-1 have no one-bits
+   * in common if and only if X is a power of 2, ie has only one one-bit.
+   * Some compilers may give an "unreachable code" warning here; ignore it.
+   */
+  if ((ALIGN_SIZE & (ALIGN_SIZE - 1)) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALIGN_TYPE);
+  /* MAX_ALLOC_CHUNK must be representable as type size_t, and must be
+   * a multiple of ALIGN_SIZE.
+   * Again, an "unreachable code" warning may be ignored here.
+   * But a "constant too large" warning means you need to fix MAX_ALLOC_CHUNK.
+   */
+  test_mac = (size_t)MAX_ALLOC_CHUNK;
+  if ((long)test_mac != MAX_ALLOC_CHUNK ||
+      (MAX_ALLOC_CHUNK % ALIGN_SIZE) != 0)
+    ERREXIT(cinfo, JERR_BAD_ALLOC_CHUNK);
+
+  max_to_use = jpeg_mem_init(cinfo); /* system-dependent initialization */
+
+  /* Attempt to allocate memory manager's control block */
+  mem = (my_mem_ptr)jpeg_get_small(cinfo, sizeof(my_memory_mgr));
+
+  if (mem == NULL) {
+    jpeg_mem_term(cinfo);       /* system-dependent cleanup */
+    ERREXIT1(cinfo, JERR_OUT_OF_MEMORY, 0);
+  }
+
+  /* OK, fill in the method pointers */
+  mem->pub.alloc_small = alloc_small;
+  mem->pub.alloc_large = alloc_large;
+  mem->pub.alloc_sarray = alloc_sarray;
+  mem->pub.alloc_barray = alloc_barray;
+  mem->pub.request_virt_sarray = request_virt_sarray;
+  mem->pub.request_virt_barray = request_virt_barray;
+  mem->pub.realize_virt_arrays = realize_virt_arrays;
+  mem->pub.access_virt_sarray = access_virt_sarray;
+  mem->pub.access_virt_barray = access_virt_barray;
+  mem->pub.free_pool = free_pool;
+  mem->pub.self_destruct = self_destruct;
+
+  /* Make MAX_ALLOC_CHUNK accessible to other modules */
+  mem->pub.max_alloc_chunk = MAX_ALLOC_CHUNK;
+
+  /* Initialize working state */
+  mem->pub.max_memory_to_use = max_to_use;
+
+  for (pool = JPOOL_NUMPOOLS - 1; pool >= JPOOL_PERMANENT; pool--) {
+    mem->small_list[pool] = NULL;
+    mem->large_list[pool] = NULL;
+  }
+  mem->virt_sarray_list = NULL;
+  mem->virt_barray_list = NULL;
+
+  mem->total_space_allocated = sizeof(my_memory_mgr);
+
+  /* Declare ourselves open for business */
+  cinfo->mem = &mem->pub;
+
+  /* Check for an environment variable JPEGMEM; if found, override the
+   * default max_memory setting from jpeg_mem_init.  Note that the
+   * surrounding application may again override this value.
+   * If your system doesn't support getenv(), define NO_GETENV to disable
+   * this feature.
+   */
+#ifndef NO_GETENV
+  {
+    char memenv[30] = { 0 };
+
+    if (!GETENV_S(memenv, 30, "JPEGMEM") && strlen(memenv) > 0) {
+      char ch = 'x';
+
+#ifdef _MSC_VER
+      if (sscanf_s(memenv, "%ld%c", &max_to_use, &ch, 1) > 0) {
+#else
+      if (sscanf(memenv, "%ld%c", &max_to_use, &ch) > 0) {
+#endif
+        if (ch == 'm' || ch == 'M')
+          max_to_use *= 1000L;
+        mem->pub.max_memory_to_use = max_to_use * 1000L;
+      }
+    }
+  }
+#endif
+
+}
diff --git a/thirdparty/libjpeg-turbo/src/jmemnobs.c b/thirdparty/libjpeg-turbo/src/jmemnobs.c
new file mode 100644
index 00000000000..692775f5a85
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jmemnobs.c
@@ -0,0 +1,110 @@
+/*
+ * jmemnobs.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1992-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2017-2018, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides a really simple implementation of the system-
+ * dependent portion of the JPEG memory manager.  This implementation
+ * assumes that no backing-store files are needed: all required space
+ * can be obtained from malloc().
+ * This is very portable in the sense that it'll compile on almost anything,
+ * but you'd better have lots of main memory (or virtual memory) if you want
+ * to process big images.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jmemsys.h"            /* import the system-dependent declarations */
+
+
+/*
+ * Memory allocation and freeing are controlled by the regular library
+ * routines malloc() and free().
+ */
+
+GLOBAL(void *)
+jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *)MALLOC(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_small(j_common_ptr cinfo, void *object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * "Large" objects are treated the same as "small" ones.
+ */
+
+GLOBAL(void *)
+jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject)
+{
+  return (void *)MALLOC(sizeofobject);
+}
+
+GLOBAL(void)
+jpeg_free_large(j_common_ptr cinfo, void *object, size_t sizeofobject)
+{
+  free(object);
+}
+
+
+/*
+ * This routine computes the total memory space available for allocation.
+ */
+
+GLOBAL(size_t)
+jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed,
+                   size_t max_bytes_needed, size_t already_allocated)
+{
+  if (cinfo->mem->max_memory_to_use) {
+    if ((size_t)cinfo->mem->max_memory_to_use > already_allocated)
+      return cinfo->mem->max_memory_to_use - already_allocated;
+    else
+      return 0;
+  } else {
+    /* Here we always say, "we got all you want bud!" */
+    return max_bytes_needed;
+  }
+}
+
+
+/*
+ * Backing store (temporary file) management.
+ * Since jpeg_mem_available always promised the moon,
+ * this should never be called and we can just error out.
+ */
+
+GLOBAL(void)
+jpeg_open_backing_store(j_common_ptr cinfo, backing_store_ptr info,
+                        long total_bytes_needed)
+{
+  ERREXIT(cinfo, JERR_NO_BACKING_STORE);
+}
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  Here, there isn't any.
+ */
+
+GLOBAL(long)
+jpeg_mem_init(j_common_ptr cinfo)
+{
+  return 0;                     /* just set max_memory_to_use to 0 */
+}
+
+GLOBAL(void)
+jpeg_mem_term(j_common_ptr cinfo)
+{
+  /* no work */
+}
diff --git a/thirdparty/libjpeg-turbo/src/jmemsys.h b/thirdparty/libjpeg-turbo/src/jmemsys.h
new file mode 100644
index 00000000000..ac09ef4c36d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jmemsys.h
@@ -0,0 +1,147 @@
+/*
+ * jmemsys.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1992-1997, Thomas G. Lane.
+ * It was modified by The libjpeg-turbo Project to include only code and
+ * information relevant to libjpeg-turbo.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This include file defines the interface between the system-independent
+ * and system-dependent portions of the JPEG memory manager.  No other
+ * modules need include it.  (The system-independent portion is jmemmgr.c;
+ * there are several different versions of the system-dependent portion.)
+ *
+ * This file works as-is for the system-dependent memory managers supplied
+ * in the IJG distribution.  You may need to modify it if you write a
+ * custom memory manager.  If system-dependent changes are needed in
+ * this file, the best method is to #ifdef them based on a configuration
+ * symbol supplied in jconfig.h.
+ */
+
+
+/*
+ * These two functions are used to allocate and release small chunks of
+ * memory.  (Typically the total amount requested through jpeg_get_small is
+ * no more than 20K or so; this will be requested in chunks of a few K each.)
+ * Behavior should be the same as for the standard library functions malloc
+ * and free; in particular, jpeg_get_small must return NULL on failure.
+ * On most systems, these ARE malloc and free.  jpeg_free_small is passed the
+ * size of the object being freed, just in case it's needed.
+ */
+
+EXTERN(void *) jpeg_get_small(j_common_ptr cinfo, size_t sizeofobject);
+EXTERN(void) jpeg_free_small(j_common_ptr cinfo, void *object,
+                             size_t sizeofobject);
+
+/*
+ * These two functions are used to allocate and release large chunks of
+ * memory (up to the total free space designated by jpeg_mem_available).
+ * These are identical to the jpeg_get/free_small routines; but we keep them
+ * separate anyway, in case a different allocation strategy is desirable for
+ * large chunks.
+ */
+
+EXTERN(void *) jpeg_get_large(j_common_ptr cinfo, size_t sizeofobject);
+EXTERN(void) jpeg_free_large(j_common_ptr cinfo, void *object,
+                             size_t sizeofobject);
+
+/*
+ * The macro MAX_ALLOC_CHUNK designates the maximum number of bytes that may
+ * be requested in a single call to jpeg_get_large (and jpeg_get_small for that
+ * matter, but that case should never come into play).  This macro was needed
+ * to model the 64Kb-segment-size limit of far addressing on 80x86 machines.
+ * On machines with flat address spaces, any large constant may be used.
+ *
+ * NB: jmemmgr.c expects that MAX_ALLOC_CHUNK will be representable as type
+ * size_t and will be a multiple of sizeof(align_type).
+ */
+
+#ifndef MAX_ALLOC_CHUNK         /* may be overridden in jconfig.h */
+#define MAX_ALLOC_CHUNK  1000000000L
+#endif
+
+/*
+ * This routine computes the total space still available for allocation by
+ * jpeg_get_large.  If more space than this is needed, backing store will be
+ * used.  NOTE: any memory already allocated must not be counted.
+ *
+ * There is a minimum space requirement, corresponding to the minimum
+ * feasible buffer sizes; jmemmgr.c will request that much space even if
+ * jpeg_mem_available returns zero.  The maximum space needed, enough to hold
+ * all working storage in memory, is also passed in case it is useful.
+ * Finally, the total space already allocated is passed.  If no better
+ * method is available, cinfo->mem->max_memory_to_use - already_allocated
+ * is often a suitable calculation.
+ *
+ * It is OK for jpeg_mem_available to underestimate the space available
+ * (that'll just lead to more backing-store access than is really necessary).
+ * However, an overestimate will lead to failure.  Hence it's wise to subtract
+ * a slop factor from the true available space.  5% should be enough.
+ *
+ * On machines with lots of virtual memory, any large constant may be returned.
+ * Conversely, zero may be returned to always use the minimum amount of memory.
+ */
+
+EXTERN(size_t) jpeg_mem_available(j_common_ptr cinfo, size_t min_bytes_needed,
+                                  size_t max_bytes_needed,
+                                  size_t already_allocated);
+
+
+/*
+ * This structure holds whatever state is needed to access a single
+ * backing-store object.  The read/write/close method pointers are called
+ * by jmemmgr.c to manipulate the backing-store object; all other fields
+ * are private to the system-dependent backing store routines.
+ */
+
+#define TEMP_NAME_LENGTH   64   /* max length of a temporary file's name */
+
+
+typedef struct backing_store_struct *backing_store_ptr;
+
+typedef struct backing_store_struct {
+  /* Methods for reading/writing/closing this backing-store object */
+  void (*read_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
+                              void *buffer_address, long file_offset,
+                              long byte_count);
+  void (*write_backing_store) (j_common_ptr cinfo, backing_store_ptr info,
+                               void *buffer_address, long file_offset,
+                               long byte_count);
+  void (*close_backing_store) (j_common_ptr cinfo, backing_store_ptr info);
+
+  /* Private fields for system-dependent backing-store management */
+  /* For a typical implementation with temp files, we need: */
+  FILE *temp_file;              /* stdio reference to temp file */
+  char temp_name[TEMP_NAME_LENGTH]; /* name of temp file */
+} backing_store_info;
+
+
+/*
+ * Initial opening of a backing-store object.  This must fill in the
+ * read/write/close pointers in the object.  The read/write routines
+ * may take an error exit if the specified maximum file size is exceeded.
+ * (If jpeg_mem_available always returns a large value, this routine can
+ * just take an error exit.)
+ */
+
+EXTERN(void) jpeg_open_backing_store(j_common_ptr cinfo,
+                                     backing_store_ptr info,
+                                     long total_bytes_needed);
+
+
+/*
+ * These routines take care of any system-dependent initialization and
+ * cleanup required.  jpeg_mem_init will be called before anything is
+ * allocated (and, therefore, nothing in cinfo is of use except the error
+ * manager pointer).  It should return a suitable default value for
+ * max_memory_to_use; this may subsequently be overridden by the surrounding
+ * application.  (Note that max_memory_to_use is only important if
+ * jpeg_mem_available chooses to consult it ... no one else will.)
+ * jpeg_mem_term may assume that all requested memory has been freed and that
+ * all opened backing-store objects have been closed.
+ */
+
+EXTERN(long) jpeg_mem_init(j_common_ptr cinfo);
+EXTERN(void) jpeg_mem_term(j_common_ptr cinfo);
diff --git a/thirdparty/libjpeg-turbo/src/jmorecfg.h b/thirdparty/libjpeg-turbo/src/jmorecfg.h
new file mode 100644
index 00000000000..f7de737edbe
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jmorecfg.h
@@ -0,0 +1,389 @@
+// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
+
+/*
+ * jmorecfg.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2011, 2014-2015, 2018, 2020, 2022, D. R. Commander.
+ * Godot modifications:
+ * Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md).
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains additional configuration options that customize the
+ * JPEG software for special applications or support machine-dependent
+ * optimizations.  Most users will not need to touch this file.
+ */
+
+
+/*
+ * Maximum number of components (color channels) allowed in JPEG image.
+ * To meet the letter of Rec. ITU-T T.81 | ISO/IEC 10918-1, set this to 255.
+ * However, darn few applications need more than 4 channels (maybe 5 for CMYK +
+ * alpha mask).  We recommend 10 as a reasonable compromise; use 4 if you are
+ * really short on memory.  (Each allowed component costs a hundred or so
+ * bytes of storage, whether actually used in an image or not.)
+ */
+
+#define MAX_COMPONENTS  10      /* maximum number of image components */
+
+
+/*
+ * Basic data types.
+ * You may need to change these if you have a machine with unusual data
+ * type sizes; for example, "char" not 8 bits, "short" not 16 bits,
+ * or "long" not 32 bits.  We don't care whether "int" is 16 or 32 bits,
+ * but it had better be at least 16.
+ */
+
+/* Representation of a single sample (pixel element value).
+ * We frequently allocate large arrays of these, so it's important to keep
+ * them small.  But if you have memory to burn and access to char or short
+ * arrays is very slow on your hardware, you might want to change these.
+ */
+
+/* JSAMPLE should be the smallest type that will hold the values 0..255. */
+
+typedef unsigned char JSAMPLE;
+#define GETJSAMPLE(value)  ((int)(value))
+
+#define MAXJSAMPLE       255
+#define CENTERJSAMPLE    128
+
+
+/* J12SAMPLE should be the smallest type that will hold the values 0..4095. */
+
+typedef short J12SAMPLE;
+
+#define MAXJ12SAMPLE     4095
+#define CENTERJ12SAMPLE  2048
+
+
+/* J16SAMPLE should be the smallest type that will hold the values 0..65535. */
+
+typedef unsigned short J16SAMPLE;
+
+#define MAXJ16SAMPLE     65535
+#define CENTERJ16SAMPLE  32768
+
+
+/* Representation of a DCT frequency coefficient.
+ * This should be a signed value of at least 16 bits; "short" is usually OK.
+ * Again, we allocate large arrays of these, but you can change to int
+ * if you have memory to burn and "short" is really slow.
+ */
+
+typedef short JCOEF;
+
+
+/* Compressed datastreams are represented as arrays of JOCTET.
+ * These must be EXACTLY 8 bits wide, at least once they are written to
+ * external storage.  Note that when using the stdio data source/destination
+ * managers, this is also the data type passed to fread/fwrite.
+ */
+
+typedef unsigned char JOCTET;
+#define GETJOCTET(value)  (value)
+
+
+/* These typedefs are used for various table entries and so forth.
+ * They must be at least as wide as specified; but making them too big
+ * won't cost a huge amount of memory, so we don't provide special
+ * extraction code like we did for JSAMPLE.  (In other words, these
+ * typedefs live at a different point on the speed/space tradeoff curve.)
+ */
+
+/* UINT8 must hold at least the values 0..255. */
+
+typedef unsigned char UINT8;
+
+/* UINT16 must hold at least the values 0..65535. */
+
+typedef unsigned short UINT16;
+
+/* INT16 must hold at least the values -32768..32767. */
+
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT16 */
+typedef short INT16;
+#endif
+
+/* INT32 must hold at least signed 32-bit values.
+ *
+ * NOTE: The INT32 typedef dates back to libjpeg v5 (1994.)  Integers were
+ * sometimes 16-bit back then (MS-DOS), which is why INT32 is typedef'd to
+ * long.  It also wasn't common (or at least as common) in 1994 for INT32 to be
+ * defined by platform headers.  Since then, however, INT32 is defined in
+ * several other common places:
+ *
+ * Xmd.h (X11 header) typedefs INT32 to int on 64-bit platforms and long on
+ * 32-bit platforms (i.e always a 32-bit signed type.)
+ *
+ * basetsd.h (Win32 header) typedefs INT32 to int (always a 32-bit signed type
+ * on modern platforms.)
+ *
+ * qglobal.h (Qt header) typedefs INT32 to int (always a 32-bit signed type on
+ * modern platforms.)
+ *
+ * This is a recipe for conflict, since "long" and "int" aren't always
+ * compatible types.  Since the definition of INT32 has technically been part
+ * of the libjpeg API for more than 20 years, we can't remove it, but we do not
+ * use it internally any longer.  We instead define a separate type (JLONG)
+ * for internal use, which ensures that internal behavior will always be the
+ * same regardless of any external headers that may be included.
+ */
+
+#ifndef XMD_H                   /* X11/xmd.h correctly defines INT32 */
+#ifndef _BASETSD_H_             /* Microsoft defines it in basetsd.h */
+#ifndef _BASETSD_H              /* MinGW is slightly different */
+#ifndef QGLOBAL_H               /* Qt defines it in qglobal.h */
+typedef long INT32;
+#endif
+#endif
+#endif
+#endif
+
+/* Datatype used for image dimensions.  The JPEG standard only supports
+ * images up to 64K*64K due to 16-bit fields in SOF markers.  Therefore
+ * "unsigned int" is sufficient on all machines.  However, if you need to
+ * handle larger images and you don't mind deviating from the spec, you
+ * can change this datatype.  (Note that changing this datatype will
+ * potentially require modifying the SIMD code.  The x86-64 SIMD extensions,
+ * in particular, assume a 32-bit JDIMENSION.)
+ */
+
+typedef unsigned int JDIMENSION;
+
+#define JPEG_MAX_DIMENSION  65500L  /* a tad under 64K to prevent overflows */
+
+
+/* These macros are used in all function definitions and extern declarations.
+ * You could modify them if you need to change function linkage conventions;
+ * in particular, you'll need to do that to make the library a Windows DLL.
+ * Another application is to make all functions global for use with debuggers
+ * or code profilers that require it.
+ */
+
+/* a function called through method pointers: */
+#define METHODDEF(type)         static type
+/* a function used only in its module: */
+#define LOCAL(type)             static type
+/* a function referenced thru EXTERNs: */
+#define GLOBAL(type)            type
+/* a reference to a GLOBAL function: */
+#define EXTERN(type)            extern type
+
+
+/* Originally, this macro was used as a way of defining function prototypes
+ * for both modern compilers as well as older compilers that did not support
+ * prototype parameters.  libjpeg-turbo has never supported these older,
+ * non-ANSI compilers, but the macro is still included because there is some
+ * software out there that uses it.
+ */
+
+#define JMETHOD(type, methodname, arglist)  type (*methodname) arglist
+
+
+/* libjpeg-turbo no longer supports platforms that have far symbols (MS-DOS),
+ * but again, some software relies on this macro.
+ */
+
+#undef FAR
+#define FAR
+
+
+/*
+ * On a few systems, type boolean and/or its values FALSE, TRUE may appear
+ * in standard header files.  Or you may have conflicts with application-
+ * specific header files that you want to include together with these files.
+ * Defining HAVE_BOOLEAN before including jpeglib.h should make it work.
+ */
+
+#ifndef HAVE_BOOLEAN
+typedef int boolean;
+#endif
+#ifndef FALSE                   /* in case these macros already exist */
+#define FALSE   0               /* values of boolean */
+#endif
+#ifndef TRUE
+#define TRUE    1
+#endif
+
+
+/*
+ * The remaining options affect code selection within the JPEG library,
+ * but they don't need to be visible to most applications using the library.
+ * To minimize application namespace pollution, the symbols won't be
+ * defined unless JPEG_INTERNALS or JPEG_INTERNAL_OPTIONS has been defined.
+ */
+
+#ifdef JPEG_INTERNALS
+#define JPEG_INTERNAL_OPTIONS
+#endif
+
+#ifdef JPEG_INTERNAL_OPTIONS
+
+
+/*
+ * These defines indicate whether to include various optional functions.
+ * Undefining some of these symbols will produce a smaller but less capable
+ * library.  Note that you can leave certain source files out of the
+ * compilation/linking process if you've #undef'd the corresponding symbols.
+ * (You may HAVE to do that if your compiler doesn't like null source files.)
+ */
+
+/* Capability options common to encoder and decoder: */
+
+#define DCT_ISLOW_SUPPORTED     /* accurate integer method */
+#define DCT_IFAST_SUPPORTED     /* less accurate int method [legacy feature] */
+#define DCT_FLOAT_SUPPORTED     /* floating-point method [legacy feature] */
+
+/* Encoder capability options: */
+
+#define C_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define C_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+//#define C_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
+#define ENTROPY_OPT_SUPPORTED       /* Optimization of entropy coding parms? */
+/* Note: if you selected 12-bit data precision, it is dangerous to turn off
+ * ENTROPY_OPT_SUPPORTED.  The standard Huffman tables are only good for 8-bit
+ * precision, so jchuff.c normally uses entropy optimization to compute
+ * usable tables for higher precision.  If you don't want to do optimization,
+ * you'll have to supply different default Huffman tables.
+ * The exact same statements apply for progressive and lossless JPEG:
+ * the default tables don't work for progressive mode or lossless mode.
+ * (This may get fixed, however.)
+ */
+#define INPUT_SMOOTHING_SUPPORTED   /* Input image smoothing option? */
+
+/* Decoder capability options: */
+
+#define D_MULTISCAN_FILES_SUPPORTED /* Multiple-scan JPEG files? */
+#define D_PROGRESSIVE_SUPPORTED     /* Progressive JPEG? (Requires MULTISCAN)*/
+//#define D_LOSSLESS_SUPPORTED        /* Lossless JPEG? */
+#define SAVE_MARKERS_SUPPORTED      /* jpeg_save_markers() needed? */
+#define BLOCK_SMOOTHING_SUPPORTED   /* Block smoothing? (Progressive only) */
+#define IDCT_SCALING_SUPPORTED      /* Output rescaling via IDCT? */
+#undef  UPSAMPLE_SCALING_SUPPORTED  /* Output rescaling at upsample stage? */
+#define UPSAMPLE_MERGING_SUPPORTED  /* Fast path for sloppy upsampling? */
+#define QUANT_1PASS_SUPPORTED       /* 1-pass color quantization? */
+#define QUANT_2PASS_SUPPORTED       /* 2-pass color quantization? */
+
+/* more capability options later, no doubt */
+
+
+/*
+ * The RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros are a vestigial
+ * feature of libjpeg.  The idea was that, if an application developer needed
+ * to compress from/decompress to a BGR/BGRX/RGBX/XBGR/XRGB buffer, they could
+ * change these macros, rebuild libjpeg, and link their application statically
+ * with it.  In reality, few people ever did this, because there were some
+ * severe restrictions involved (cjpeg and djpeg no longer worked properly,
+ * compressing/decompressing RGB JPEGs no longer worked properly, and the color
+ * quantizer wouldn't work with pixel sizes other than 3.)  Furthermore, since
+ * all of the O/S-supplied versions of libjpeg were built with the default
+ * values of RGB_RED, RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE, many applications
+ * have come to regard these values as immutable.
+ *
+ * The libjpeg-turbo colorspace extensions provide a much cleaner way of
+ * compressing from/decompressing to buffers with arbitrary component orders
+ * and pixel sizes.  Thus, we do not support changing the values of RGB_RED,
+ * RGB_GREEN, RGB_BLUE, or RGB_PIXELSIZE.  In addition to the restrictions
+ * listed above, changing these values will also break the SIMD extensions and
+ * the regression tests.
+ */
+
+#define RGB_RED         0       /* Offset of Red in an RGB scanline element */
+#define RGB_GREEN       1       /* Offset of Green */
+#define RGB_BLUE        2       /* Offset of Blue */
+#define RGB_PIXELSIZE   3       /* JSAMPLEs per RGB scanline element */
+
+#define JPEG_NUMCS  17
+
+#define EXT_RGB_RED         0
+#define EXT_RGB_GREEN       1
+#define EXT_RGB_BLUE        2
+#define EXT_RGB_PIXELSIZE   3
+
+#define EXT_RGBX_RED        0
+#define EXT_RGBX_GREEN      1
+#define EXT_RGBX_BLUE       2
+#define EXT_RGBX_PIXELSIZE  4
+
+#define EXT_BGR_RED         2
+#define EXT_BGR_GREEN       1
+#define EXT_BGR_BLUE        0
+#define EXT_BGR_PIXELSIZE   3
+
+#define EXT_BGRX_RED        2
+#define EXT_BGRX_GREEN      1
+#define EXT_BGRX_BLUE       0
+#define EXT_BGRX_PIXELSIZE  4
+
+#define EXT_XBGR_RED        3
+#define EXT_XBGR_GREEN      2
+#define EXT_XBGR_BLUE       1
+#define EXT_XBGR_PIXELSIZE  4
+
+#define EXT_XRGB_RED        1
+#define EXT_XRGB_GREEN      2
+#define EXT_XRGB_BLUE       3
+#define EXT_XRGB_PIXELSIZE  4
+
+static const int rgb_red[JPEG_NUMCS] = {
+  -1, -1, RGB_RED, -1, -1, -1, EXT_RGB_RED, EXT_RGBX_RED,
+  EXT_BGR_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
+  EXT_RGBX_RED, EXT_BGRX_RED, EXT_XBGR_RED, EXT_XRGB_RED,
+  -1
+};
+
+static const int rgb_green[JPEG_NUMCS] = {
+  -1, -1, RGB_GREEN, -1, -1, -1, EXT_RGB_GREEN, EXT_RGBX_GREEN,
+  EXT_BGR_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
+  EXT_RGBX_GREEN, EXT_BGRX_GREEN, EXT_XBGR_GREEN, EXT_XRGB_GREEN,
+  -1
+};
+
+static const int rgb_blue[JPEG_NUMCS] = {
+  -1, -1, RGB_BLUE, -1, -1, -1, EXT_RGB_BLUE, EXT_RGBX_BLUE,
+  EXT_BGR_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
+  EXT_RGBX_BLUE, EXT_BGRX_BLUE, EXT_XBGR_BLUE, EXT_XRGB_BLUE,
+  -1
+};
+
+static const int rgb_pixelsize[JPEG_NUMCS] = {
+  -1, -1, RGB_PIXELSIZE, -1, -1, -1, EXT_RGB_PIXELSIZE, EXT_RGBX_PIXELSIZE,
+  EXT_BGR_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
+  EXT_RGBX_PIXELSIZE, EXT_BGRX_PIXELSIZE, EXT_XBGR_PIXELSIZE, EXT_XRGB_PIXELSIZE,
+  -1
+};
+
+/* Definitions for speed-related optimizations. */
+
+/* On some machines (notably 68000 series) "int" is 32 bits, but multiplying
+ * two 16-bit shorts is faster than multiplying two ints.  Define MULTIPLIER
+ * as short on such a machine.  MULTIPLIER must be at least 16 bits wide.
+ */
+
+#ifndef MULTIPLIER
+#ifndef WITH_SIMD
+#define MULTIPLIER  int         /* type for fastest integer multiply */
+#else
+#define MULTIPLIER  short       /* prefer 16-bit with SIMD for parellelism */
+#endif
+#endif
+
+
+/* FAST_FLOAT should be either float or double, whichever is done faster
+ * by your compiler.  (Note that this type is only used in the floating point
+ * DCT routines, so it only matters if you've defined DCT_FLOAT_SUPPORTED.)
+ */
+
+#ifndef FAST_FLOAT
+#define FAST_FLOAT  float
+#endif
+
+#endif /* JPEG_INTERNAL_OPTIONS */
diff --git a/thirdparty/libjpeg-turbo/src/jpeg_nbits.c b/thirdparty/libjpeg-turbo/src/jpeg_nbits.c
new file mode 100644
index 00000000000..c8ee6b056cb
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jpeg_nbits.c
@@ -0,0 +1,4134 @@
+/*
+ * Copyright (C) 2024, D. R. Commander.
+ *
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+#include "jpeg_nbits.h"
+#include "jconfigint.h"
+
+
+#ifndef USE_CLZ_INTRINSIC
+
+#define INCLUDE_JPEG_NBITS_TABLE
+
+/* When building for x86[-64] with the SIMD extensions enabled, the C Huffman
+ * encoders can reuse jpeg_nbits_table from the SSE2 baseline Huffman encoder.
+ */
+#if (defined(__x86_64__) || defined(__i386__) || defined(_M_IX86) || \
+     defined(_M_X64)) && defined(WITH_SIMD)
+#undef INCLUDE_JPEG_NBITS_TABLE
+#endif
+
+#endif
+
+
+#ifdef INCLUDE_JPEG_NBITS_TABLE
+
+const unsigned char HIDDEN jpeg_nbits_table[65536] = {
+   0,  1,  2,  2,  3,  3,  3,  3,  4,  4,  4,  4,  4,  4,  4,  4,
+   5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,  5,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,  6,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,  7,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,  8,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+   9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,  9,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14, 14,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
+  16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16
+};
+
+#else
+
+/* Suppress compiler warnings about empty translation unit. */
+
+typedef int dummy_jpeg_nbits_table;
+
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jpeg_nbits.h b/thirdparty/libjpeg-turbo/src/jpeg_nbits.h
new file mode 100644
index 00000000000..6481a1228d1
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jpeg_nbits.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (C) 2014, 2021, 2024, D. R. Commander.
+ * Copyright (C) 2014, Olle Liljenzin.
+ * Copyright (C) 2020, Arm Limited.
+ *
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+/*
+ * NOTE: If USE_CLZ_INTRINSIC is defined, then clz/bsr instructions will be
+ * used for bit counting rather than the lookup table.  This will reduce the
+ * memory footprint by 64k, which is important for some mobile applications
+ * that create many isolated instances of libjpeg-turbo (web browsers, for
+ * instance.)  This may improve performance on some mobile platforms as well.
+ * This feature is enabled by default only on Arm processors, because some x86
+ * chips have a slow implementation of bsr, and the use of clz/bsr cannot be
+ * shown to have a significant performance impact even on the x86 chips that
+ * have a fast implementation of it.  When building for Armv6, you can
+ * explicitly disable the use of clz/bsr by adding -mthumb to the compiler
+ * flags (this defines __thumb__).
+ */
+
+/* NOTE: Both GCC and Clang define __GNUC__ */
+#if (defined(__GNUC__) && (defined(__arm__) || defined(__aarch64__))) || \
+    defined(_M_ARM) || defined(_M_ARM64)
+#if !defined(__thumb__) || defined(__thumb2__)
+#define USE_CLZ_INTRINSIC
+#endif
+#endif
+
+#ifdef USE_CLZ_INTRINSIC
+#if defined(_MSC_VER) && !defined(__clang__)
+#define JPEG_NBITS_NONZERO(x)  (32 - _CountLeadingZeros(x))
+#else
+#define JPEG_NBITS_NONZERO(x)  (32 - __builtin_clz(x))
+#endif
+#define JPEG_NBITS(x)          (x ? JPEG_NBITS_NONZERO(x) : 0)
+#else
+extern const unsigned char jpeg_nbits_table[65536];
+#define JPEG_NBITS(x)          (jpeg_nbits_table[x])
+#define JPEG_NBITS_NONZERO(x)  JPEG_NBITS(x)
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jpegapicomp.h b/thirdparty/libjpeg-turbo/src/jpegapicomp.h
new file mode 100644
index 00000000000..bb3912eb2f1
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jpegapicomp.h
@@ -0,0 +1,32 @@
+/*
+ * jpegapicomp.h
+ *
+ * Copyright (C) 2010, 2020, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * JPEG compatibility macros
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+#if JPEG_LIB_VERSION >= 70
+#define _DCT_scaled_size  DCT_h_scaled_size
+#define _DCT_h_scaled_size  DCT_h_scaled_size
+#define _DCT_v_scaled_size  DCT_v_scaled_size
+#define _min_DCT_scaled_size  min_DCT_h_scaled_size
+#define _min_DCT_h_scaled_size  min_DCT_h_scaled_size
+#define _min_DCT_v_scaled_size  min_DCT_v_scaled_size
+#define _jpeg_width  jpeg_width
+#define _jpeg_height  jpeg_height
+#define JERR_ARITH_NOTIMPL  JERR_NOT_COMPILED
+#else
+#define _DCT_scaled_size  DCT_scaled_size
+#define _DCT_h_scaled_size  DCT_scaled_size
+#define _DCT_v_scaled_size  DCT_scaled_size
+#define _min_DCT_scaled_size  min_DCT_scaled_size
+#define _min_DCT_h_scaled_size  min_DCT_scaled_size
+#define _min_DCT_v_scaled_size  min_DCT_scaled_size
+#define _jpeg_width  image_width
+#define _jpeg_height  image_height
+#endif
diff --git a/thirdparty/libjpeg-turbo/src/jpegint.h b/thirdparty/libjpeg-turbo/src/jpegint.h
new file mode 100644
index 00000000000..a90493f9e08
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jpegint.h
@@ -0,0 +1,600 @@
+/*
+ * jpegint.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1997, Thomas G. Lane.
+ * Modified 1997-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2015-2017, 2019, 2021-2022, 2024, D. R. Commander.
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2021, Alex Richardson.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file provides common declarations for the various JPEG modules.
+ * These declarations are considered internal to the JPEG library; most
+ * applications using the library shouldn't need to include this file.
+ */
+
+
+/* Representation of a spatial difference value.
+ * This should be a signed value of at least 16 bits; int is usually OK.
+ */
+
+typedef int JDIFF;
+
+typedef JDIFF FAR *JDIFFROW;    /* pointer to one row of difference values */
+typedef JDIFFROW *JDIFFARRAY;   /* ptr to some rows (a 2-D diff array) */
+typedef JDIFFARRAY *JDIFFIMAGE; /* a 3-D diff array: top index is color */
+
+
+/* Declarations for both compression & decompression */
+
+typedef enum {            /* Operating modes for buffer controllers */
+  JBUF_PASS_THRU,         /* Plain stripwise operation */
+  /* Remaining modes require a full-image buffer to have been created */
+  JBUF_SAVE_SOURCE,       /* Run source subobject only, save output */
+  JBUF_CRANK_DEST,        /* Run dest subobject only, using saved data */
+  JBUF_SAVE_AND_PASS      /* Run both subobjects, save output */
+} J_BUF_MODE;
+
+/* Values of global_state field (jdapi.c has some dependencies on ordering!) */
+#define CSTATE_START     100    /* after create_compress */
+#define CSTATE_SCANNING  101    /* start_compress done, write_scanlines OK */
+#define CSTATE_RAW_OK    102    /* start_compress done, write_raw_data OK */
+#define CSTATE_WRCOEFS   103    /* jpeg_write_coefficients done */
+#define DSTATE_START     200    /* after create_decompress */
+#define DSTATE_INHEADER  201    /* reading header markers, no SOS yet */
+#define DSTATE_READY     202    /* found SOS, ready for start_decompress */
+#define DSTATE_PRELOAD   203    /* reading multiscan file in start_decompress*/
+#define DSTATE_PRESCAN   204    /* performing dummy pass for 2-pass quant */
+#define DSTATE_SCANNING  205    /* start_decompress done, read_scanlines OK */
+#define DSTATE_RAW_OK    206    /* start_decompress done, read_raw_data OK */
+#define DSTATE_BUFIMAGE  207    /* expecting jpeg_start_output */
+#define DSTATE_BUFPOST   208    /* looking for SOS/EOI in jpeg_finish_output */
+#define DSTATE_RDCOEFS   209    /* reading file in jpeg_read_coefficients */
+#define DSTATE_STOPPING  210    /* looking for EOI in jpeg_finish_decompress */
+
+
+/* JLONG must hold at least signed 32-bit values. */
+typedef long JLONG;
+
+/* JUINTPTR must hold pointer values. */
+#ifdef __UINTPTR_TYPE__
+/*
+ * __UINTPTR_TYPE__ is GNU-specific and available in GCC 4.6+ and Clang 3.0+.
+ * Fortunately, that is sufficient to support the few architectures for which
+ * sizeof(void *) != sizeof(size_t).  The only other options would require C99
+ * or Clang-specific builtins.
+ */
+typedef __UINTPTR_TYPE__ JUINTPTR;
+#else
+typedef size_t JUINTPTR;
+#endif
+
+#define IsExtRGB(cs) \
+  (cs == JCS_RGB || (cs >= JCS_EXT_RGB && cs <= JCS_EXT_ARGB))
+
+/*
+ * Left shift macro that handles a negative operand without causing any
+ * sanitizer warnings
+ */
+
+#define LEFT_SHIFT(a, b)  ((JLONG)((unsigned long)(a) << (b)))
+
+
+/* Declarations for compression modules */
+
+/* Master control module */
+struct jpeg_comp_master {
+  void (*prepare_for_pass) (j_compress_ptr cinfo);
+  void (*pass_startup) (j_compress_ptr cinfo);
+  void (*finish_pass) (j_compress_ptr cinfo);
+
+  /* State variables made visible to other modules */
+  boolean call_pass_startup;    /* True if pass_startup must be called */
+  boolean is_last_pass;         /* True during last pass */
+  boolean lossless;             /* True if lossless mode is enabled */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_c_main_controller {
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                        JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+  void (*process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                           JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                           JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail);
+#endif
+};
+
+/* Compression preprocessing (downsampling input buffer control) */
+struct jpeg_c_prep_controller {
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*pre_process_data) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                            JDIMENSION *in_row_ctr, JDIMENSION in_rows_avail,
+                            JSAMPIMAGE output_buf,
+                            JDIMENSION *out_row_group_ctr,
+                            JDIMENSION out_row_groups_avail);
+  void (*pre_process_data_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                               JDIMENSION *in_row_ctr,
+                               JDIMENSION in_rows_avail,
+                               J12SAMPIMAGE output_buf,
+                               JDIMENSION *out_row_group_ctr,
+                               JDIMENSION out_row_groups_avail);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*pre_process_data_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                               JDIMENSION *in_row_ctr,
+                               JDIMENSION in_rows_avail,
+                               J16SAMPIMAGE output_buf,
+                               JDIMENSION *out_row_group_ctr,
+                               JDIMENSION out_row_groups_avail);
+#endif
+};
+
+/* Lossy mode: Coefficient buffer control
+ * Lossless mode: Difference buffer control
+ */
+struct jpeg_c_coef_controller {
+  void (*start_pass) (j_compress_ptr cinfo, J_BUF_MODE pass_mode);
+  boolean (*compress_data) (j_compress_ptr cinfo, JSAMPIMAGE input_buf);
+  boolean (*compress_data_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf);
+#ifdef C_LOSSLESS_SUPPORTED
+  boolean (*compress_data_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf);
+#endif
+};
+
+/* Colorspace conversion */
+struct jpeg_color_converter {
+  void (*start_pass) (j_compress_ptr cinfo);
+  void (*color_convert) (j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                         JSAMPIMAGE output_buf, JDIMENSION output_row,
+                         int num_rows);
+  void (*color_convert_12) (j_compress_ptr cinfo, J12SAMPARRAY input_buf,
+                            J12SAMPIMAGE output_buf, JDIMENSION output_row,
+                            int num_rows);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*color_convert_16) (j_compress_ptr cinfo, J16SAMPARRAY input_buf,
+                            J16SAMPIMAGE output_buf, JDIMENSION output_row,
+                            int num_rows);
+#endif
+};
+
+/* Downsampling */
+struct jpeg_downsampler {
+  void (*start_pass) (j_compress_ptr cinfo);
+  void (*downsample) (j_compress_ptr cinfo, JSAMPIMAGE input_buf,
+                      JDIMENSION in_row_index, JSAMPIMAGE output_buf,
+                      JDIMENSION out_row_group_index);
+  void (*downsample_12) (j_compress_ptr cinfo, J12SAMPIMAGE input_buf,
+                         JDIMENSION in_row_index, J12SAMPIMAGE output_buf,
+                         JDIMENSION out_row_group_index);
+#ifdef C_LOSSLESS_SUPPORTED
+  void (*downsample_16) (j_compress_ptr cinfo, J16SAMPIMAGE input_buf,
+                         JDIMENSION in_row_index, J16SAMPIMAGE output_buf,
+                         JDIMENSION out_row_group_index);
+#endif
+
+  boolean need_context_rows;    /* TRUE if need rows above & below */
+};
+
+/* Lossy mode: Forward DCT (also controls coefficient quantization)
+ * Lossless mode: Prediction, sample differencing, and point transform
+ */
+struct jpeg_forward_dct {
+  void (*start_pass) (j_compress_ptr cinfo);
+
+  /* Lossy mode */
+  /* perhaps this should be an array??? */
+  void (*forward_DCT) (j_compress_ptr cinfo, jpeg_component_info *compptr,
+                       JSAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                       JDIMENSION start_row, JDIMENSION start_col,
+                       JDIMENSION num_blocks);
+  void (*forward_DCT_12) (j_compress_ptr cinfo, jpeg_component_info *compptr,
+                          J12SAMPARRAY sample_data, JBLOCKROW coef_blocks,
+                          JDIMENSION start_row, JDIMENSION start_col,
+                          JDIMENSION num_blocks);
+};
+
+/* Entropy encoding */
+struct jpeg_entropy_encoder {
+  void (*start_pass) (j_compress_ptr cinfo, boolean gather_statistics);
+
+  /* Lossy mode */
+  boolean (*encode_mcu) (j_compress_ptr cinfo, JBLOCKROW *MCU_data);
+  /* Lossless mode */
+  JDIMENSION (*encode_mcus) (j_compress_ptr cinfo, JDIFFIMAGE diff_buf,
+                             JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                             JDIMENSION nMCU);
+
+  void (*finish_pass) (j_compress_ptr cinfo);
+};
+
+/* Marker writing */
+struct jpeg_marker_writer {
+  void (*write_file_header) (j_compress_ptr cinfo);
+  void (*write_frame_header) (j_compress_ptr cinfo);
+  void (*write_scan_header) (j_compress_ptr cinfo);
+  void (*write_file_trailer) (j_compress_ptr cinfo);
+  void (*write_tables_only) (j_compress_ptr cinfo);
+  /* These routines are exported to allow insertion of extra markers */
+  /* Probably only COM and APPn markers should be written this way */
+  void (*write_marker_header) (j_compress_ptr cinfo, int marker,
+                               unsigned int datalen);
+  void (*write_marker_byte) (j_compress_ptr cinfo, int val);
+};
+
+
+/* Declarations for decompression modules */
+
+/* Master control module */
+struct jpeg_decomp_master {
+  void (*prepare_for_output_pass) (j_decompress_ptr cinfo);
+  void (*finish_output_pass) (j_decompress_ptr cinfo);
+
+  /* State variables made visible to other modules */
+  boolean is_dummy_pass;        /* True during 1st pass for 2-pass quant */
+  boolean lossless;             /* True if decompressing a lossless image */
+
+  /* Partial decompression variables */
+  JDIMENSION first_iMCU_col;
+  JDIMENSION last_iMCU_col;
+  JDIMENSION first_MCU_col[MAX_COMPONENTS];
+  JDIMENSION last_MCU_col[MAX_COMPONENTS];
+  boolean jinit_upsampler_no_alloc;
+
+  /* Last iMCU row that was successfully decoded */
+  JDIMENSION last_good_iMCU_row;
+
+  /* Tail of list of saved markers */
+  jpeg_saved_marker_ptr marker_list_end;
+};
+
+/* Input control module */
+struct jpeg_input_controller {
+  int (*consume_input) (j_decompress_ptr cinfo);
+  void (*reset_input_controller) (j_decompress_ptr cinfo);
+  void (*start_input_pass) (j_decompress_ptr cinfo);
+  void (*finish_input_pass) (j_decompress_ptr cinfo);
+
+  /* State variables made visible to other modules */
+  boolean has_multiple_scans;   /* True if file has multiple scans */
+  boolean eoi_reached;          /* True when EOI has been consumed */
+};
+
+/* Main buffer control (downsampled-data buffer) */
+struct jpeg_d_main_controller {
+  void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*process_data) (j_decompress_ptr cinfo, JSAMPARRAY output_buf,
+                        JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+  void (*process_data_12) (j_decompress_ptr cinfo, J12SAMPARRAY output_buf,
+                           JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*process_data_16) (j_decompress_ptr cinfo, J16SAMPARRAY output_buf,
+                           JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#endif
+};
+
+/* Lossy mode: Coefficient buffer control
+ * Lossless mode: Difference buffer control
+ */
+struct jpeg_d_coef_controller {
+  void (*start_input_pass) (j_decompress_ptr cinfo);
+  int (*consume_data) (j_decompress_ptr cinfo);
+  void (*start_output_pass) (j_decompress_ptr cinfo);
+  int (*decompress_data) (j_decompress_ptr cinfo, JSAMPIMAGE output_buf);
+  int (*decompress_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE output_buf);
+#ifdef D_LOSSLESS_SUPPORTED
+  int (*decompress_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE output_buf);
+#endif
+
+  /* These variables keep track of the current location of the input side. */
+  /* cinfo->input_iMCU_row is also used for this. */
+  JDIMENSION MCU_ctr;           /* counts MCUs processed in current row */
+  int MCU_vert_offset;          /* counts MCU rows within iMCU row */
+  int MCU_rows_per_iMCU_row;    /* number of such rows needed */
+
+  /* The output side's location is represented by cinfo->output_iMCU_row. */
+
+  /* Lossy mode */
+  /* Pointer to array of coefficient virtual arrays, or NULL if none */
+  jvirt_barray_ptr *coef_arrays;
+};
+
+/* Decompression postprocessing (color quantization buffer control) */
+struct jpeg_d_post_controller {
+  void (*start_pass) (j_decompress_ptr cinfo, J_BUF_MODE pass_mode);
+  void (*post_process_data) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                             JDIMENSION *in_row_group_ctr,
+                             JDIMENSION in_row_groups_avail,
+                             JSAMPARRAY output_buf, JDIMENSION *out_row_ctr,
+                             JDIMENSION out_rows_avail);
+  void (*post_process_data_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                                JDIMENSION *in_row_group_ctr,
+                                JDIMENSION in_row_groups_avail,
+                                J12SAMPARRAY output_buf,
+                                JDIMENSION *out_row_ctr,
+                                JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*post_process_data_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                                JDIMENSION *in_row_group_ctr,
+                                JDIMENSION in_row_groups_avail,
+                                J16SAMPARRAY output_buf,
+                                JDIMENSION *out_row_ctr,
+                                JDIMENSION out_rows_avail);
+#endif
+};
+
+/* Marker reading & parsing */
+struct jpeg_marker_reader {
+  void (*reset_marker_reader) (j_decompress_ptr cinfo);
+  /* Read markers until SOS or EOI.
+   * Returns same codes as are defined for jpeg_consume_input:
+   * JPEG_SUSPENDED, JPEG_REACHED_SOS, or JPEG_REACHED_EOI.
+   */
+  int (*read_markers) (j_decompress_ptr cinfo);
+  /* Read a restart marker --- exported for use by entropy decoder only */
+  jpeg_marker_parser_method read_restart_marker;
+
+  /* State of marker reader --- nominally internal, but applications
+   * supplying COM or APPn handlers might like to know the state.
+   */
+  boolean saw_SOI;              /* found SOI? */
+  boolean saw_SOF;              /* found SOF? */
+  int next_restart_num;         /* next restart number expected (0-7) */
+  unsigned int discarded_bytes; /* # of bytes skipped looking for a marker */
+};
+
+/* Entropy decoding */
+struct jpeg_entropy_decoder {
+  void (*start_pass) (j_decompress_ptr cinfo);
+
+  /* Lossy mode */
+  boolean (*decode_mcu) (j_decompress_ptr cinfo, JBLOCKROW *MCU_data);
+  /* Lossless mode */
+  JDIMENSION (*decode_mcus) (j_decompress_ptr cinfo, JDIFFIMAGE diff_buf,
+                             JDIMENSION MCU_row_num, JDIMENSION MCU_col_num,
+                             JDIMENSION nMCU);
+  boolean (*process_restart) (j_decompress_ptr cinfo);
+
+  /* This is here to share code between baseline and progressive decoders; */
+  /* other modules probably should not use it */
+  boolean insufficient_data;    /* set TRUE after emitting warning */
+};
+
+/* Lossy mode: Inverse DCT (also performs dequantization)
+ * Lossless mode: Prediction, sample undifferencing, point transform, and
+ * sample size scaling
+ */
+typedef void (*inverse_DCT_method_ptr) (j_decompress_ptr cinfo,
+                                        jpeg_component_info *compptr,
+                                        JCOEFPTR coef_block,
+                                        JSAMPARRAY output_buf,
+                                        JDIMENSION output_col);
+typedef void (*inverse_DCT_12_method_ptr) (j_decompress_ptr cinfo,
+                                           jpeg_component_info *compptr,
+                                           JCOEFPTR coef_block,
+                                           J12SAMPARRAY output_buf,
+                                           JDIMENSION output_col);
+
+struct jpeg_inverse_dct {
+  void (*start_pass) (j_decompress_ptr cinfo);
+
+  /* Lossy mode */
+  /* It is useful to allow each component to have a separate IDCT method. */
+  inverse_DCT_method_ptr inverse_DCT[MAX_COMPONENTS];
+  inverse_DCT_12_method_ptr inverse_DCT_12[MAX_COMPONENTS];
+};
+
+/* Upsampling (note that upsampler must also call color converter) */
+struct jpeg_upsampler {
+  void (*start_pass) (j_decompress_ptr cinfo);
+  void (*upsample) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                    JDIMENSION *in_row_group_ctr,
+                    JDIMENSION in_row_groups_avail, JSAMPARRAY output_buf,
+                    JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+  void (*upsample_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                       JDIMENSION *in_row_group_ctr,
+                       JDIMENSION in_row_groups_avail, J12SAMPARRAY output_buf,
+                       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*upsample_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                       JDIMENSION *in_row_group_ctr,
+                       JDIMENSION in_row_groups_avail, J16SAMPARRAY output_buf,
+                       JDIMENSION *out_row_ctr, JDIMENSION out_rows_avail);
+#endif
+
+  boolean need_context_rows;    /* TRUE if need rows above & below */
+};
+
+/* Colorspace conversion */
+struct jpeg_color_deconverter {
+  void (*start_pass) (j_decompress_ptr cinfo);
+  void (*color_convert) (j_decompress_ptr cinfo, JSAMPIMAGE input_buf,
+                         JDIMENSION input_row, JSAMPARRAY output_buf,
+                         int num_rows);
+  void (*color_convert_12) (j_decompress_ptr cinfo, J12SAMPIMAGE input_buf,
+                            JDIMENSION input_row, J12SAMPARRAY output_buf,
+                            int num_rows);
+#ifdef D_LOSSLESS_SUPPORTED
+  void (*color_convert_16) (j_decompress_ptr cinfo, J16SAMPIMAGE input_buf,
+                            JDIMENSION input_row, J16SAMPARRAY output_buf,
+                            int num_rows);
+#endif
+};
+
+/* Color quantization or color precision reduction */
+struct jpeg_color_quantizer {
+  void (*start_pass) (j_decompress_ptr cinfo, boolean is_pre_scan);
+  void (*color_quantize) (j_decompress_ptr cinfo, JSAMPARRAY input_buf,
+                          JSAMPARRAY output_buf, int num_rows);
+  void (*color_quantize_12) (j_decompress_ptr cinfo, J12SAMPARRAY input_buf,
+                             J12SAMPARRAY output_buf, int num_rows);
+  void (*finish_pass) (j_decompress_ptr cinfo);
+  void (*new_color_map) (j_decompress_ptr cinfo);
+};
+
+
+/* Miscellaneous useful macros */
+
+#undef MAX
+#define MAX(a, b)       ((a) > (b) ? (a) : (b))
+#undef MIN
+#define MIN(a, b)       ((a) < (b) ? (a) : (b))
+
+#ifdef ZERO_BUFFERS
+#define MALLOC(size)  calloc(1, size)
+#else
+#define MALLOC(size)  malloc(size)
+#endif
+
+
+/* We assume that right shift corresponds to signed division by 2 with
+ * rounding towards minus infinity.  This is correct for typical "arithmetic
+ * shift" instructions that shift in copies of the sign bit.  But some
+ * C compilers implement >> with an unsigned shift.  For these machines you
+ * must define RIGHT_SHIFT_IS_UNSIGNED.
+ * RIGHT_SHIFT provides a proper signed right shift of a JLONG quantity.
+ * It is only applied with constant shift counts.  SHIFT_TEMPS must be
+ * included in the variables of any routine using RIGHT_SHIFT.
+ */
+
+#ifdef RIGHT_SHIFT_IS_UNSIGNED
+#define SHIFT_TEMPS     JLONG shift_temp;
+#define RIGHT_SHIFT(x, shft) \
+  ((shift_temp = (x)) < 0 ? \
+   (shift_temp >> (shft)) | ((~((JLONG)0)) << (32 - (shft))) : \
+   (shift_temp >> (shft)))
+#else
+#define SHIFT_TEMPS
+#define RIGHT_SHIFT(x, shft)    ((x) >> (shft))
+#endif
+
+
+/* Compression module initialization routines */
+EXTERN(void) jinit_compress_master(j_compress_ptr cinfo);
+EXTERN(void) jinit_c_master_control(j_compress_ptr cinfo,
+                                    boolean transcode_only);
+EXTERN(void) jinit_c_main_controller(j_compress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_c_main_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_c_prep_controller(j_compress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_c_prep_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_c_coef_controller(j_compress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_c_coef_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_color_converter(j_compress_ptr cinfo);
+EXTERN(void) j12init_color_converter(j_compress_ptr cinfo);
+EXTERN(void) jinit_downsampler(j_compress_ptr cinfo);
+EXTERN(void) j12init_downsampler(j_compress_ptr cinfo);
+EXTERN(void) jinit_forward_dct(j_compress_ptr cinfo);
+EXTERN(void) j12init_forward_dct(j_compress_ptr cinfo);
+EXTERN(void) jinit_huff_encoder(j_compress_ptr cinfo);
+EXTERN(void) jinit_phuff_encoder(j_compress_ptr cinfo);
+EXTERN(void) jinit_arith_encoder(j_compress_ptr cinfo);
+EXTERN(void) jinit_marker_writer(j_compress_ptr cinfo);
+#ifdef C_LOSSLESS_SUPPORTED
+EXTERN(void) j16init_c_main_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_c_prep_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_color_converter(j_compress_ptr cinfo);
+EXTERN(void) j16init_downsampler(j_compress_ptr cinfo);
+EXTERN(void) jinit_c_diff_controller(j_compress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_c_diff_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_c_diff_controller(j_compress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_lhuff_encoder(j_compress_ptr cinfo);
+EXTERN(void) jinit_lossless_compressor(j_compress_ptr cinfo);
+EXTERN(void) j12init_lossless_compressor(j_compress_ptr cinfo);
+EXTERN(void) j16init_lossless_compressor(j_compress_ptr cinfo);
+#endif
+
+/* Decompression module initialization routines */
+EXTERN(void) jinit_master_decompress(j_decompress_ptr cinfo);
+EXTERN(void) jinit_d_main_controller(j_decompress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_d_main_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_d_coef_controller(j_decompress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_d_coef_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_d_post_controller(j_decompress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_d_post_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_input_controller(j_decompress_ptr cinfo);
+EXTERN(void) jinit_marker_reader(j_decompress_ptr cinfo);
+EXTERN(void) jinit_huff_decoder(j_decompress_ptr cinfo);
+EXTERN(void) jinit_phuff_decoder(j_decompress_ptr cinfo);
+EXTERN(void) jinit_arith_decoder(j_decompress_ptr cinfo);
+EXTERN(void) jinit_inverse_dct(j_decompress_ptr cinfo);
+EXTERN(void) j12init_inverse_dct(j_decompress_ptr cinfo);
+EXTERN(void) jinit_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j12init_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) jinit_color_deconverter(j_decompress_ptr cinfo);
+EXTERN(void) j12init_color_deconverter(j_decompress_ptr cinfo);
+EXTERN(void) jinit_1pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) j12init_1pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) jinit_2pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) j12init_2pass_quantizer(j_decompress_ptr cinfo);
+EXTERN(void) jinit_merged_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j12init_merged_upsampler(j_decompress_ptr cinfo);
+#ifdef D_LOSSLESS_SUPPORTED
+EXTERN(void) j16init_d_main_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_d_post_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_upsampler(j_decompress_ptr cinfo);
+EXTERN(void) j16init_color_deconverter(j_decompress_ptr cinfo);
+EXTERN(void) jinit_d_diff_controller(j_decompress_ptr cinfo,
+                                     boolean need_full_buffer);
+EXTERN(void) j12init_d_diff_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) j16init_d_diff_controller(j_decompress_ptr cinfo,
+                                       boolean need_full_buffer);
+EXTERN(void) jinit_lhuff_decoder(j_decompress_ptr cinfo);
+EXTERN(void) jinit_lossless_decompressor(j_decompress_ptr cinfo);
+EXTERN(void) j12init_lossless_decompressor(j_decompress_ptr cinfo);
+EXTERN(void) j16init_lossless_decompressor(j_decompress_ptr cinfo);
+#endif
+
+/* Memory manager initialization */
+EXTERN(void) jinit_memory_mgr(j_common_ptr cinfo);
+
+/* Utility routines in jutils.c */
+EXTERN(long) jdiv_round_up(long a, long b);
+EXTERN(long) jround_up(long a, long b);
+EXTERN(void) jcopy_sample_rows(JSAMPARRAY input_array, int source_row,
+                               JSAMPARRAY output_array, int dest_row,
+                               int num_rows, JDIMENSION num_cols);
+EXTERN(void) j12copy_sample_rows(J12SAMPARRAY input_array, int source_row,
+                                 J12SAMPARRAY output_array, int dest_row,
+                                 int num_rows, JDIMENSION num_cols);
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+EXTERN(void) j16copy_sample_rows(J16SAMPARRAY input_array, int source_row,
+                                 J16SAMPARRAY output_array, int dest_row,
+                                 int num_rows, JDIMENSION num_cols);
+#endif
+EXTERN(void) jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
+                             JDIMENSION num_blocks);
+EXTERN(void) jzero_far(void *target, size_t bytestozero);
+/* Constant tables in jutils.c */
+#if 0                           /* This table is not actually needed in v6a */
+extern const int jpeg_zigzag_order[]; /* natural coef order to zigzag order */
+#endif
+extern const int jpeg_natural_order[]; /* zigzag coef order to natural order */
+
+/* Arithmetic coding probability estimation tables in jaricom.c */
+extern const JLONG jpeg_aritab[];
diff --git a/thirdparty/libjpeg-turbo/src/jpeglib.h b/thirdparty/libjpeg-turbo/src/jpeglib.h
new file mode 100644
index 00000000000..f7076a18d01
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jpeglib.h
@@ -0,0 +1,1223 @@
+/*
+ * jpeglib.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * Modified 2002-2009 by Guido Vollbeding.
+ * Lossless JPEG Modifications:
+ * Copyright (C) 1999, Ken Murchison.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009-2011, 2013-2014, 2016-2017, 2020, 2022-2024,
+             D. R. Commander.
+ * Copyright (C) 2015, Google, Inc.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file defines the application interface for the JPEG library.
+ * Most applications using the library need only include this file,
+ * and perhaps jerror.h if they want to know the exact error codes.
+ */
+
+/* NOTE: This header file does not include stdio.h, despite the fact that it
+ * uses FILE and size_t.  That is by design, since the libjpeg API predates the
+ * widespread adoption of ANSI/ISO C.  Referring to libjpeg.txt, it is a
+ * documented requirement that calling programs "include system headers that
+ * define at least the typedefs FILE and size_t" before including jpeglib.h.
+ * Technically speaking, changing that requirement by including stdio.h here
+ * would break backward API compatibility.  Please do not file bug reports,
+ * feature requests, or pull requests regarding this.
+ */
+
+#ifndef JPEGLIB_H
+#define JPEGLIB_H
+
+/*
+ * First we include the configuration files that record how this
+ * installation of the JPEG library is set up.  jconfig.h can be
+ * generated automatically for many systems.  jmorecfg.h contains
+ * manual configuration options that most people need not worry about.
+ */
+
+#ifndef JCONFIG_INCLUDED        /* in case jinclude.h already did */
+#include "jconfig.h"            /* widely used configuration options */
+#endif
+#include "jmorecfg.h"           /* seldom changed options */
+
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+extern "C" {
+#endif
+#endif
+
+
+/* Various constants determining the sizes of things.
+ * All of these are specified by the JPEG standard, so don't change them
+ * if you want to be compatible.
+ */
+
+/* NOTE: In lossless mode, an MCU contains one or more samples rather than one
+ * or more 8x8 DCT blocks, so the term "data unit" is used to generically
+ * describe a sample in lossless mode or an 8x8 DCT block in lossy mode.  To
+ * preserve backward API/ABI compatibility, the field and macro names retain
+ * the "block" terminology.
+ */
+
+#define DCTSIZE             8   /* The basic DCT block is 8x8 samples */
+#define DCTSIZE2            64  /* DCTSIZE squared; # of elements in a block */
+#define NUM_QUANT_TBLS      4   /* Quantization tables are numbered 0..3 */
+#define NUM_HUFF_TBLS       4   /* Huffman tables are numbered 0..3 */
+#define NUM_ARITH_TBLS      16  /* Arith-coding tables are numbered 0..15 */
+#define MAX_COMPS_IN_SCAN   4   /* JPEG limit on # of components in one scan */
+#define MAX_SAMP_FACTOR     4   /* JPEG limit on sampling factors */
+/* Unfortunately, some bozo at Adobe saw no reason to be bound by the standard;
+ * the PostScript DCT filter can emit files with many more than 10 blocks/MCU.
+ * If you happen to run across such a file, you can up D_MAX_BLOCKS_IN_MCU
+ * to handle it.  We even let you do this from the jconfig.h file.  However,
+ * we strongly discourage changing C_MAX_BLOCKS_IN_MCU; just because Adobe
+ * sometimes emits noncompliant files doesn't mean you should too.
+ */
+#define C_MAX_BLOCKS_IN_MCU   10 /* compressor's limit on data units/MCU */
+#ifndef D_MAX_BLOCKS_IN_MCU
+#define D_MAX_BLOCKS_IN_MCU   10 /* decompressor's limit on data units/MCU */
+#endif
+
+
+/* Data structures for images (arrays of samples and of DCT coefficients).
+ */
+
+typedef JSAMPLE *JSAMPROW;      /* ptr to one image row of pixel samples with
+                                   2-bit through 8-bit data precision. */
+typedef JSAMPROW *JSAMPARRAY;   /* ptr to some JSAMPLE rows (a 2-D JSAMPLE
+                                   array) */
+typedef JSAMPARRAY *JSAMPIMAGE; /* a 3-D JSAMPLE array: top index is color */
+
+typedef J12SAMPLE *J12SAMPROW;      /* ptr to one image row of pixel samples
+                                       with 9-bit through 12-bit data
+                                       precision. */
+typedef J12SAMPROW *J12SAMPARRAY;   /* ptr to some J12SAMPLE rows (a 2-D
+                                       J12SAMPLE array) */
+typedef J12SAMPARRAY *J12SAMPIMAGE; /* a 3-D J12SAMPLE array: top index is
+                                       color */
+
+typedef J16SAMPLE *J16SAMPROW;      /* ptr to one image row of pixel samples
+                                       with 13-bit through 16-bit data
+                                       precision. */
+typedef J16SAMPROW *J16SAMPARRAY;   /* ptr to some J16SAMPLE rows (a 2-D
+                                       J16SAMPLE array) */
+typedef J16SAMPARRAY *J16SAMPIMAGE; /* a 3-D J16SAMPLE array: top index is
+                                       color */
+
+typedef JCOEF JBLOCK[DCTSIZE2]; /* one block of coefficients */
+typedef JBLOCK *JBLOCKROW;      /* pointer to one row of coefficient blocks */
+typedef JBLOCKROW *JBLOCKARRAY;         /* a 2-D array of coefficient blocks */
+typedef JBLOCKARRAY *JBLOCKIMAGE;       /* a 3-D array of coefficient blocks */
+
+typedef JCOEF *JCOEFPTR;        /* useful in a couple of places */
+
+
+/* Types for JPEG compression parameters and working tables. */
+
+
+/* DCT coefficient quantization tables. */
+
+typedef struct {
+  /* This array gives the coefficient quantizers in natural array order
+   * (not the zigzag order in which they are stored in a JPEG DQT marker).
+   * CAUTION: IJG versions prior to v6a kept this array in zigzag order.
+   */
+  UINT16 quantval[DCTSIZE2];    /* quantization step for each coefficient */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;           /* TRUE when table has been output */
+} JQUANT_TBL;
+
+
+/* Huffman coding tables. */
+
+typedef struct {
+  /* These two fields directly represent the contents of a JPEG DHT marker */
+  UINT8 bits[17];               /* bits[k] = # of symbols with codes of */
+                                /* length k bits; bits[0] is unused */
+  UINT8 huffval[256];           /* The symbols, in order of incr code length */
+  /* This field is used only during compression.  It's initialized FALSE when
+   * the table is created, and set TRUE when it's been output to the file.
+   * You could suppress output of a table by setting this to TRUE.
+   * (See jpeg_suppress_tables for an example.)
+   */
+  boolean sent_table;           /* TRUE when table has been output */
+} JHUFF_TBL;
+
+
+/* Basic info about one component (color channel). */
+
+typedef struct {
+  /* These values are fixed over the whole image. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOF marker. */
+  int component_id;             /* identifier for this component (0..255) */
+  int component_index;          /* its index in SOF or cinfo->comp_info[] */
+  int h_samp_factor;            /* horizontal sampling factor (1..4) */
+  int v_samp_factor;            /* vertical sampling factor (1..4) */
+  int quant_tbl_no;             /* quantization table selector (0..3) */
+  /* These values may vary between scans. */
+  /* For compression, they must be supplied by parameter setup; */
+  /* for decompression, they are read from the SOS marker. */
+  /* The decompressor output side may not use these variables. */
+  int dc_tbl_no;                /* DC entropy table selector (0..3) */
+  int ac_tbl_no;                /* AC entropy table selector (0..3) */
+
+  /* Remaining fields should be treated as private by applications. */
+
+  /* These values are computed during compression or decompression startup: */
+  /* Component's size in data units.
+   * In lossy mode, any dummy blocks added to complete an MCU are not counted;
+   * therefore these values do not depend on whether a scan is interleaved or
+   * not.  In lossless mode, these are always equal to the image width and
+   * height.
+   */
+  JDIMENSION width_in_blocks;
+  JDIMENSION height_in_blocks;
+  /* Size of a data unit in samples.  Always DCTSIZE for lossy compression.
+   * For lossy decompression this is the size of the output from one DCT block,
+   * reflecting any scaling we choose to apply during the IDCT step.
+   * Values from 1 to 16 are supported.  Note that different components may
+   * receive different IDCT scalings.  In lossless mode, this is always equal
+   * to 1.
+   */
+#if JPEG_LIB_VERSION >= 70
+  int DCT_h_scaled_size;
+  int DCT_v_scaled_size;
+#else
+  int DCT_scaled_size;
+#endif
+  /* The downsampled dimensions are the component's actual, unpadded number
+   * of samples at the main buffer (preprocessing/compression interface), thus
+   * downsampled_width = ceil(image_width * Hi/Hmax)
+   * and similarly for height.  For lossy decompression, IDCT scaling is
+   * included, so
+   * downsampled_width = ceil(image_width * Hi/Hmax * DCT_[h_]scaled_size/DCTSIZE)
+   * In lossless mode, these are always equal to the image width and height.
+   */
+  JDIMENSION downsampled_width;  /* actual width in samples */
+  JDIMENSION downsampled_height; /* actual height in samples */
+  /* This flag is used only for decompression.  In cases where some of the
+   * components will be ignored (eg grayscale output from YCbCr image),
+   * we can skip most computations for the unused components.
+   */
+  boolean component_needed;     /* do we need the value of this component? */
+
+  /* These values are computed before starting a scan of the component. */
+  /* The decompressor output side may not use these variables. */
+  int MCU_width;                /* number of data units per MCU, horizontally */
+  int MCU_height;               /* number of data units per MCU, vertically */
+  int MCU_blocks;               /* MCU_width * MCU_height */
+  int MCU_sample_width;         /* MCU width in samples, MCU_width*DCT_[h_]scaled_size */
+  int last_col_width;           /* # of non-dummy data units across in last MCU */
+  int last_row_height;          /* # of non-dummy data units down in last MCU */
+
+  /* Saved quantization table for component; NULL if none yet saved.
+   * See jdinput.c comments about the need for this information.
+   * This field is currently used only for decompression.
+   */
+  JQUANT_TBL *quant_table;
+
+  /* Private per-component storage for DCT or IDCT subsystem. */
+  void *dct_table;
+} jpeg_component_info;
+
+
+/* The script for encoding a multiple-scan file is an array of these: */
+
+typedef struct {
+  int comps_in_scan;            /* number of components encoded in this scan */
+  int component_index[MAX_COMPS_IN_SCAN]; /* their SOF/comp_info[] indexes */
+  int Ss, Se;                   /* progressive JPEG spectral selection parms
+                                   (Ss is the predictor selection value in
+                                   lossless mode) */
+  int Ah, Al;                   /* progressive JPEG successive approx. parms
+                                   (Al is the point transform value in lossless
+                                   mode) */
+} jpeg_scan_info;
+
+/* The decompressor can save APPn and COM markers in a list of these: */
+
+typedef struct jpeg_marker_struct *jpeg_saved_marker_ptr;
+
+struct jpeg_marker_struct {
+  jpeg_saved_marker_ptr next;   /* next in list, or NULL */
+  UINT8 marker;                 /* marker code: JPEG_COM, or JPEG_APP0+n */
+  unsigned int original_length; /* # bytes of data in the file */
+  unsigned int data_length;     /* # bytes of data saved at data[] */
+  JOCTET *data;                 /* the data contained in the marker */
+  /* the marker length word is not counted in data_length or original_length */
+};
+
+/* Known color spaces. */
+
+#define JCS_EXTENSIONS  1
+#define JCS_ALPHA_EXTENSIONS  1
+
+typedef enum {
+  JCS_UNKNOWN,            /* error/unspecified */
+  JCS_GRAYSCALE,          /* monochrome */
+  JCS_RGB,                /* red/green/blue as specified by the RGB_RED,
+                             RGB_GREEN, RGB_BLUE, and RGB_PIXELSIZE macros */
+  JCS_YCbCr,              /* Y/Cb/Cr (also known as YUV) */
+  JCS_CMYK,               /* C/M/Y/K */
+  JCS_YCCK,               /* Y/Cb/Cr/K */
+  JCS_EXT_RGB,            /* red/green/blue */
+  JCS_EXT_RGBX,           /* red/green/blue/x */
+  JCS_EXT_BGR,            /* blue/green/red */
+  JCS_EXT_BGRX,           /* blue/green/red/x */
+  JCS_EXT_XBGR,           /* x/blue/green/red */
+  JCS_EXT_XRGB,           /* x/red/green/blue */
+  /* When out_color_space it set to JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR,
+     or JCS_EXT_XRGB during decompression, the X byte is undefined, and in
+     order to ensure the best performance, libjpeg-turbo can set that byte to
+     whatever value it wishes.  Use the following colorspace constants to
+     ensure that the X byte is set to 0xFF, so that it can be interpreted as an
+     opaque alpha channel. */
+  JCS_EXT_RGBA,           /* red/green/blue/alpha */
+  JCS_EXT_BGRA,           /* blue/green/red/alpha */
+  JCS_EXT_ABGR,           /* alpha/blue/green/red */
+  JCS_EXT_ARGB,           /* alpha/red/green/blue */
+  JCS_RGB565              /* 5-bit red/6-bit green/5-bit blue
+                             [decompression only] */
+} J_COLOR_SPACE;
+
+/* DCT/IDCT algorithm options. */
+
+typedef enum {
+  JDCT_ISLOW,             /* accurate integer method */
+  JDCT_IFAST,             /* less accurate integer method [legacy feature] */
+  JDCT_FLOAT              /* floating-point method [legacy feature] */
+} J_DCT_METHOD;
+
+#ifndef JDCT_DEFAULT            /* may be overridden in jconfig.h */
+#define JDCT_DEFAULT  JDCT_ISLOW
+#endif
+#ifndef JDCT_FASTEST            /* may be overridden in jconfig.h */
+#define JDCT_FASTEST  JDCT_IFAST
+#endif
+
+/* Dithering options for decompression. */
+
+typedef enum {
+  JDITHER_NONE,           /* no dithering */
+  JDITHER_ORDERED,        /* simple ordered dither */
+  JDITHER_FS              /* Floyd-Steinberg error diffusion dither */
+} J_DITHER_MODE;
+
+
+/* Common fields between JPEG compression and decompression master structs. */
+
+#define jpeg_common_fields \
+  struct jpeg_error_mgr *err;   /* Error handler module */ \
+  struct jpeg_memory_mgr *mem;  /* Memory manager module */ \
+  struct jpeg_progress_mgr *progress; /* Progress monitor, or NULL if none */ \
+  void *client_data;            /* Available for use by application */ \
+  boolean is_decompressor;      /* So common code can tell which is which */ \
+  int global_state              /* For checking call sequence validity */
+
+/* Routines that are to be used by both halves of the library are declared
+ * to receive a pointer to this structure.  There are no actual instances of
+ * jpeg_common_struct, only of jpeg_compress_struct and jpeg_decompress_struct.
+ */
+struct jpeg_common_struct {
+  jpeg_common_fields;           /* Fields common to both master struct types */
+  /* Additional fields follow in an actual jpeg_compress_struct or
+   * jpeg_decompress_struct.  All three structs must agree on these
+   * initial fields!  (This would be a lot cleaner in C++.)
+   */
+};
+
+typedef struct jpeg_common_struct *j_common_ptr;
+typedef struct jpeg_compress_struct *j_compress_ptr;
+typedef struct jpeg_decompress_struct *j_decompress_ptr;
+
+
+/* Master record for a compression instance */
+
+struct jpeg_compress_struct {
+  jpeg_common_fields;           /* Fields shared with jpeg_decompress_struct */
+
+  /* Destination for compressed data */
+  struct jpeg_destination_mgr *dest;
+
+  /* Description of source image --- these fields must be filled in by
+   * outer application before starting compression.  in_color_space must
+   * be correct before you can even call jpeg_set_defaults().
+   */
+
+  JDIMENSION image_width;       /* input image width */
+  JDIMENSION image_height;      /* input image height */
+  int input_components;         /* # of color components in input image */
+  J_COLOR_SPACE in_color_space; /* colorspace of input image */
+
+  double input_gamma;           /* image gamma of input image */
+
+  /* Compression parameters --- these fields must be set before calling
+   * jpeg_start_compress().  We recommend calling jpeg_set_defaults() to
+   * initialize everything to reasonable defaults, then changing anything
+   * the application specifically wants to change.  That way you won't get
+   * burnt when new parameters are added.  Also note that there are several
+   * helper routines to simplify changing parameters.
+   */
+
+#if JPEG_LIB_VERSION >= 70
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  JDIMENSION jpeg_width;        /* scaled JPEG image width */
+  JDIMENSION jpeg_height;       /* scaled JPEG image height */
+  /* Dimensions of actual JPEG image that will be written to file,
+   * derived from input dimensions by scaling factors above.
+   * These fields are computed by jpeg_start_compress().
+   * You can also use jpeg_calc_jpeg_dimensions() to determine these values
+   * in advance of calling jpeg_start_compress().
+   */
+#endif
+
+  int data_precision;           /* bits of precision in image data */
+
+  int num_components;           /* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  jpeg_component_info *comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+  JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS];
+#if JPEG_LIB_VERSION >= 70
+  int q_scale_factor[NUM_QUANT_TBLS];
+#endif
+  /* ptrs to coefficient quantization tables, or NULL if not defined,
+   * and corresponding scale factors (percentage, initialized 100).
+   */
+
+  JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  int num_scans;                /* # of entries in scan_info array */
+  const jpeg_scan_info *scan_info; /* script for multi-scan file, or NULL */
+  /* The default value of scan_info is NULL, which causes a single-scan
+   * sequential JPEG file to be emitted.  To create a multi-scan file,
+   * set num_scans and scan_info to point to an array of scan definitions.
+   */
+
+  boolean raw_data_in;          /* TRUE=caller supplies downsampled data */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
+  boolean optimize_coding;      /* TRUE=optimize entropy encoding parms */
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
+#if JPEG_LIB_VERSION >= 70
+  boolean do_fancy_downsampling; /* TRUE=apply fancy downsampling */
+#endif
+  int smoothing_factor;         /* 1..100, or 0 for no input smoothing */
+  J_DCT_METHOD dct_method;      /* DCT algorithm selector */
+
+  /* The restart interval can be specified in absolute MCUs by setting
+   * restart_interval, or in MCU rows by setting restart_in_rows
+   * (in which case the correct restart_interval will be figured
+   * for each scan).
+   */
+  unsigned int restart_interval; /* MCUs per restart, or 0 for no restart */
+  int restart_in_rows;          /* if > 0, MCU rows per restart interval */
+
+  /* Parameters controlling emission of special markers. */
+
+  boolean write_JFIF_header;    /* should a JFIF marker be written? */
+  UINT8 JFIF_major_version;     /* What to write for the JFIF version number */
+  UINT8 JFIF_minor_version;
+  /* These three values are not used by the JPEG code, merely copied */
+  /* into the JFIF APP0 marker.  density_unit can be 0 for unknown, */
+  /* 1 for dots/inch, or 2 for dots/cm.  Note that the pixel aspect */
+  /* ratio is defined by X_density/Y_density even when density_unit=0. */
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean write_Adobe_marker;   /* should an Adobe marker be written? */
+
+  /* State variable: index of next scanline to be written to
+   * jpeg_write_scanlines().  Application may use this to control its
+   * processing loop, e.g., "while (next_scanline < image_height)".
+   */
+
+  JDIMENSION next_scanline;     /* 0 .. image_height-1  */
+
+  /* Remaining fields are known throughout compressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during compression startup
+   */
+  boolean progressive_mode;     /* TRUE if scan script uses progressive mode */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
+
+#if JPEG_LIB_VERSION >= 70
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
+#endif
+
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows to be input to coefficient or
+                                   difference controller */
+  /* The coefficient or difference controller receives data in units of MCU
+   * rows as defined for fully interleaved scans (whether the JPEG file is
+   * interleaved or not).  In lossy mode, there are v_samp_factor * DCTSIZE
+   * sample rows of each component in an "iMCU" (interleaved MCU) row.  In
+   * lossless mode, total_iMCU_rows is always equal to the image height.
+   */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   */
+  int comps_in_scan;            /* # of JPEG components in this scan */
+  jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
+
+  int blocks_in_MCU;            /* # of data units per MCU */
+  int MCU_membership[C_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th data unit in an MCU */
+
+  int Ss, Se, Ah, Al;           /* progressive/lossless JPEG parameters for
+                                   scan */
+
+#if JPEG_LIB_VERSION >= 80
+  int block_size;               /* the basic DCT block size: 1..16 */
+  const int *natural_order;     /* natural-order position array */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) */
+#endif
+
+  /*
+   * Links to compression subobjects (methods and private variables of modules)
+   */
+  struct jpeg_comp_master *master;
+  struct jpeg_c_main_controller *main;
+  struct jpeg_c_prep_controller *prep;
+  struct jpeg_c_coef_controller *coef;
+  struct jpeg_marker_writer *marker;
+  struct jpeg_color_converter *cconvert;
+  struct jpeg_downsampler *downsample;
+  struct jpeg_forward_dct *fdct;
+  struct jpeg_entropy_encoder *entropy;
+  jpeg_scan_info *script_space; /* workspace for jpeg_simple_progression */
+  int script_space_size;
+};
+
+
+/* Master record for a decompression instance */
+
+struct jpeg_decompress_struct {
+  jpeg_common_fields;           /* Fields shared with jpeg_compress_struct */
+
+  /* Source of compressed data */
+  struct jpeg_source_mgr *src;
+
+  /* Basic description of image --- filled in by jpeg_read_header(). */
+  /* Application may inspect these values to decide how to process image. */
+
+  JDIMENSION image_width;       /* nominal image width (from SOF marker) */
+  JDIMENSION image_height;      /* nominal image height */
+  int num_components;           /* # of color components in JPEG image */
+  J_COLOR_SPACE jpeg_color_space; /* colorspace of JPEG image */
+
+  /* Decompression processing parameters --- these fields must be set before
+   * calling jpeg_start_decompress().  Note that jpeg_read_header() initializes
+   * them to default values.
+   */
+
+  J_COLOR_SPACE out_color_space; /* colorspace for output */
+
+  unsigned int scale_num, scale_denom; /* fraction by which to scale image */
+
+  double output_gamma;          /* image gamma wanted in output */
+
+  boolean buffered_image;       /* TRUE=multiple output passes */
+  boolean raw_data_out;         /* TRUE=downsampled data wanted */
+
+  J_DCT_METHOD dct_method;      /* IDCT algorithm selector */
+  boolean do_fancy_upsampling;  /* TRUE=apply fancy upsampling */
+  boolean do_block_smoothing;   /* TRUE=apply interblock smoothing */
+
+  boolean quantize_colors;      /* TRUE=colormapped output wanted */
+  /* the following are ignored if not quantize_colors: */
+  J_DITHER_MODE dither_mode;    /* type of color dithering to use */
+  boolean two_pass_quantize;    /* TRUE=use two-pass color quantization */
+  int desired_number_of_colors; /* max # colors to use in created colormap */
+  /* these are significant only in buffered-image mode: */
+  boolean enable_1pass_quant;   /* enable future use of 1-pass quantizer */
+  boolean enable_external_quant;/* enable future use of external colormap */
+  boolean enable_2pass_quant;   /* enable future use of 2-pass quantizer */
+
+  /* Description of actual output image that will be returned to application.
+   * These fields are computed by jpeg_start_decompress().
+   * You can also use jpeg_calc_output_dimensions() to determine these values
+   * in advance of calling jpeg_start_decompress().
+   */
+
+  JDIMENSION output_width;      /* scaled image width */
+  JDIMENSION output_height;     /* scaled image height */
+  int out_color_components;     /* # of color components in out_color_space */
+  int output_components;        /* # of color components returned */
+  /* output_components is 1 (a colormap index) when quantizing colors;
+   * otherwise it equals out_color_components.
+   */
+  int rec_outbuf_height;        /* min recommended height of scanline buffer */
+  /* If the buffer passed to jpeg_read_scanlines() is less than this many rows
+   * high, space and time will be wasted due to unnecessary data copying.
+   * Usually rec_outbuf_height will be 1 or 2, at most 4.
+   */
+
+  /* When quantizing colors, the output colormap is described by these fields.
+   * The application can supply a colormap by setting colormap non-NULL before
+   * calling jpeg_start_decompress; otherwise a colormap is created during
+   * jpeg_start_decompress or jpeg_start_output.
+   * The map has out_color_components rows and actual_number_of_colors columns.
+   */
+  int actual_number_of_colors;  /* number of entries in use */
+  JSAMPARRAY colormap;          /* The color map as a 2-D pixel array
+                                   If data_precision is 12, then this is
+                                   actually a J12SAMPARRAY, so callers must
+                                   type-cast it in order to read/write 12-bit
+                                   samples from/to the array. */
+
+  /* State variables: these variables indicate the progress of decompression.
+   * The application may examine these but must not modify them.
+   */
+
+  /* Row index of next scanline to be read from jpeg_read_scanlines().
+   * Application may use this to control its processing loop, e.g.,
+   * "while (output_scanline < output_height)".
+   */
+  JDIMENSION output_scanline;   /* 0 .. output_height-1  */
+
+  /* Current input scan number and number of iMCU rows completed in scan.
+   * These indicate the progress of the decompressor input side.
+   */
+  int input_scan_number;        /* Number of SOS markers seen so far */
+  JDIMENSION input_iMCU_row;    /* Number of iMCU rows completed */
+
+  /* The "output scan number" is the notional scan being displayed by the
+   * output side.  The decompressor will not allow output scan/row number
+   * to get ahead of input scan/row, but it can fall arbitrarily far behind.
+   */
+  int output_scan_number;       /* Nominal scan number being displayed */
+  JDIMENSION output_iMCU_row;   /* Number of iMCU rows read */
+
+  /* Current progression status.  coef_bits[c][i] indicates the precision
+   * with which component c's DCT coefficient i (in zigzag order) is known.
+   * It is -1 when no data has yet been received, otherwise it is the point
+   * transform (shift) value for the most recent scan of the coefficient
+   * (thus, 0 at completion of the progression).
+   * This pointer is NULL when reading a non-progressive file.
+   */
+  int (*coef_bits)[DCTSIZE2];   /* -1 or current Al value for each coef */
+
+  /* Internal JPEG parameters --- the application usually need not look at
+   * these fields.  Note that the decompressor output side may not use
+   * any parameters that can change between scans.
+   */
+
+  /* Quantization and Huffman tables are carried forward across input
+   * datastreams when processing abbreviated JPEG datastreams.
+   */
+
+  JQUANT_TBL *quant_tbl_ptrs[NUM_QUANT_TBLS];
+  /* ptrs to coefficient quantization tables, or NULL if not defined */
+
+  JHUFF_TBL *dc_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  JHUFF_TBL *ac_huff_tbl_ptrs[NUM_HUFF_TBLS];
+  /* ptrs to Huffman coding tables, or NULL if not defined */
+
+  /* These parameters are never carried across datastreams, since they
+   * are given in SOF/SOS markers or defined to be reset by SOI.
+   */
+
+  int data_precision;           /* bits of precision in image data */
+
+  jpeg_component_info *comp_info;
+  /* comp_info[i] describes component that appears i'th in SOF */
+
+#if JPEG_LIB_VERSION >= 80
+  boolean is_baseline;          /* TRUE if Baseline SOF0 encountered */
+#endif
+  boolean progressive_mode;     /* TRUE if SOFn specifies progressive mode */
+  boolean arith_code;           /* TRUE=arithmetic coding, FALSE=Huffman */
+
+  UINT8 arith_dc_L[NUM_ARITH_TBLS]; /* L values for DC arith-coding tables */
+  UINT8 arith_dc_U[NUM_ARITH_TBLS]; /* U values for DC arith-coding tables */
+  UINT8 arith_ac_K[NUM_ARITH_TBLS]; /* Kx values for AC arith-coding tables */
+
+  unsigned int restart_interval; /* MCUs per restart interval, or 0 for no restart */
+
+  /* These fields record data obtained from optional markers recognized by
+   * the JPEG library.
+   */
+  boolean saw_JFIF_marker;      /* TRUE iff a JFIF APP0 marker was found */
+  /* Data copied from JFIF marker; only valid if saw_JFIF_marker is TRUE: */
+  UINT8 JFIF_major_version;     /* JFIF version number */
+  UINT8 JFIF_minor_version;
+  UINT8 density_unit;           /* JFIF code for pixel size units */
+  UINT16 X_density;             /* Horizontal pixel density */
+  UINT16 Y_density;             /* Vertical pixel density */
+  boolean saw_Adobe_marker;     /* TRUE iff an Adobe APP14 marker was found */
+  UINT8 Adobe_transform;        /* Color transform code from Adobe marker */
+
+  boolean CCIR601_sampling;     /* TRUE=first samples are cosited */
+
+  /* Aside from the specific data retained from APPn markers known to the
+   * library, the uninterpreted contents of any or all APPn and COM markers
+   * can be saved in a list for examination by the application.
+   */
+  jpeg_saved_marker_ptr marker_list; /* Head of list of saved markers */
+
+  /* Remaining fields are known throughout decompressor, but generally
+   * should not be touched by a surrounding application.
+   */
+
+  /*
+   * These fields are computed during decompression startup
+   */
+  int max_h_samp_factor;        /* largest h_samp_factor */
+  int max_v_samp_factor;        /* largest v_samp_factor */
+
+#if JPEG_LIB_VERSION >= 70
+  int min_DCT_h_scaled_size;    /* smallest DCT_h_scaled_size of any component */
+  int min_DCT_v_scaled_size;    /* smallest DCT_v_scaled_size of any component */
+#else
+  int min_DCT_scaled_size;      /* smallest DCT_scaled_size of any component */
+#endif
+
+  JDIMENSION total_iMCU_rows;   /* # of iMCU rows in image */
+  /* The coefficient or difference controller's input and output progress is
+   * measured in units of "iMCU" (interleaved MCU) rows.  These are the same as
+   * MCU rows in fully interleaved JPEG scans, but are used whether the scan is
+   * interleaved or not.  In lossy mode, we define an iMCU row as v_samp_factor
+   * DCT block rows of each component.  Therefore, the IDCT output contains
+   * v_samp_factor*DCT_[v_]scaled_size sample rows of a component per iMCU row.
+   * In lossless mode, total_iMCU_rows is always equal to the image height.
+   */
+
+  JSAMPLE *sample_range_limit;  /* table for fast range-limiting
+                                   If data_precision is 9 to 12, then this is
+                                   actually a J12SAMPLE pointer, and if
+                                   data_precision is 13 to 16, then this is
+                                   actually a J16SAMPLE pointer, so callers
+                                   must type-cast it in order to read samples
+                                   from the array. */
+
+  /*
+   * These fields are valid during any one scan.
+   * They describe the components and MCUs actually appearing in the scan.
+   * Note that the decompressor output side must not use these fields.
+   */
+  int comps_in_scan;            /* # of JPEG components in this scan */
+  jpeg_component_info *cur_comp_info[MAX_COMPS_IN_SCAN];
+  /* *cur_comp_info[i] describes component that appears i'th in SOS */
+
+  JDIMENSION MCUs_per_row;      /* # of MCUs across the image */
+  JDIMENSION MCU_rows_in_scan;  /* # of MCU rows in the image */
+
+  int blocks_in_MCU;            /* # of data units per MCU */
+  int MCU_membership[D_MAX_BLOCKS_IN_MCU];
+  /* MCU_membership[i] is index in cur_comp_info of component owning */
+  /* i'th data unit in an MCU */
+
+  int Ss, Se, Ah, Al;           /* progressive/lossless JPEG parameters for
+                                   scan */
+
+#if JPEG_LIB_VERSION >= 80
+  /* These fields are derived from Se of first SOS marker.
+   */
+  int block_size;               /* the basic DCT block size: 1..16 */
+  const int *natural_order; /* natural-order position array for entropy decode */
+  int lim_Se;                   /* min( Se, DCTSIZE2-1 ) for entropy decode */
+#endif
+
+  /* This field is shared between entropy decoder and marker parser.
+   * It is either zero or the code of a JPEG marker that has been
+   * read from the data source, but has not yet been processed.
+   */
+  int unread_marker;
+
+  /*
+   * Links to decompression subobjects (methods, private variables of modules)
+   */
+  struct jpeg_decomp_master *master;
+  struct jpeg_d_main_controller *main;
+  struct jpeg_d_coef_controller *coef;
+  struct jpeg_d_post_controller *post;
+  struct jpeg_input_controller *inputctl;
+  struct jpeg_marker_reader *marker;
+  struct jpeg_entropy_decoder *entropy;
+  struct jpeg_inverse_dct *idct;
+  struct jpeg_upsampler *upsample;
+  struct jpeg_color_deconverter *cconvert;
+  struct jpeg_color_quantizer *cquantize;
+};
+
+
+/* "Object" declarations for JPEG modules that may be supplied or called
+ * directly by the surrounding application.
+ * As with all objects in the JPEG library, these structs only define the
+ * publicly visible methods and state variables of a module.  Additional
+ * private fields may exist after the public ones.
+ */
+
+
+/* Error handler object */
+
+struct jpeg_error_mgr {
+  /* Error exit handler: does not return to caller */
+  void (*error_exit) (j_common_ptr cinfo);
+  /* Conditionally emit a trace or warning message */
+  void (*emit_message) (j_common_ptr cinfo, int msg_level);
+  /* Routine that actually outputs a trace or error message */
+  void (*output_message) (j_common_ptr cinfo);
+  /* Format a message string for the most recent JPEG error or message */
+  void (*format_message) (j_common_ptr cinfo, char *buffer);
+#define JMSG_LENGTH_MAX  200    /* recommended size of format_message buffer */
+  /* Reset error state variables at start of a new image */
+  void (*reset_error_mgr) (j_common_ptr cinfo);
+
+  /* The message ID code and any parameters are saved here.
+   * A message can have one string parameter or up to 8 int parameters.
+   */
+  int msg_code;
+#define JMSG_STR_PARM_MAX  80
+  union {
+    int i[8];
+    char s[JMSG_STR_PARM_MAX];
+  } msg_parm;
+
+  /* Standard state variables for error facility */
+
+  int trace_level;              /* max msg_level that will be displayed */
+
+  /* For recoverable corrupt-data errors, we emit a warning message,
+   * but keep going unless emit_message chooses to abort.  emit_message
+   * should count warnings in num_warnings.  The surrounding application
+   * can check for bad data by seeing if num_warnings is nonzero at the
+   * end of processing.
+   */
+  long num_warnings;            /* number of corrupt-data warnings */
+
+  /* These fields point to the table(s) of error message strings.
+   * An application can change the table pointer to switch to a different
+   * message list (typically, to change the language in which errors are
+   * reported).  Some applications may wish to add additional error codes
+   * that will be handled by the JPEG library error mechanism; the second
+   * table pointer is used for this purpose.
+   *
+   * First table includes all errors generated by JPEG library itself.
+   * Error code 0 is reserved for a "no such error string" message.
+   */
+  const char * const *jpeg_message_table; /* Library errors */
+  int last_jpeg_message;    /* Table contains strings 0..last_jpeg_message */
+  /* Second table can be added by application (see cjpeg/djpeg for example).
+   * It contains strings numbered first_addon_message..last_addon_message.
+   */
+  const char * const *addon_message_table; /* Non-library errors */
+  int first_addon_message;      /* code for first string in addon table */
+  int last_addon_message;       /* code for last string in addon table */
+};
+
+
+/* Progress monitor object */
+
+struct jpeg_progress_mgr {
+  void (*progress_monitor) (j_common_ptr cinfo);
+
+  long pass_counter;            /* work units completed in this pass */
+  long pass_limit;              /* total number of work units in this pass */
+  int completed_passes;         /* passes completed so far */
+  int total_passes;             /* total number of passes expected */
+};
+
+
+/* Data destination object for compression */
+
+struct jpeg_destination_mgr {
+  JOCTET *next_output_byte;     /* => next byte to write in buffer */
+  size_t free_in_buffer;        /* # of byte spaces remaining in buffer */
+
+  void (*init_destination) (j_compress_ptr cinfo);
+  boolean (*empty_output_buffer) (j_compress_ptr cinfo);
+  void (*term_destination) (j_compress_ptr cinfo);
+};
+
+
+/* Data source object for decompression */
+
+struct jpeg_source_mgr {
+  const JOCTET *next_input_byte; /* => next byte to read from buffer */
+  size_t bytes_in_buffer;       /* # of bytes remaining in buffer */
+
+  void (*init_source) (j_decompress_ptr cinfo);
+  boolean (*fill_input_buffer) (j_decompress_ptr cinfo);
+  void (*skip_input_data) (j_decompress_ptr cinfo, long num_bytes);
+  boolean (*resync_to_restart) (j_decompress_ptr cinfo, int desired);
+  void (*term_source) (j_decompress_ptr cinfo);
+};
+
+
+/* Memory manager object.
+ * Allocates "small" objects (a few K total), "large" objects (tens of K),
+ * and "really big" objects (virtual arrays with backing store if needed).
+ * The memory manager does not allow individual objects to be freed; rather,
+ * each created object is assigned to a pool, and whole pools can be freed
+ * at once.  This is faster and more convenient than remembering exactly what
+ * to free, especially where malloc()/free() are not too speedy.
+ * NB: alloc routines never return NULL.  They exit to error_exit if not
+ * successful.
+ */
+
+#define JPOOL_PERMANENT  0      /* lasts until master record is destroyed */
+#define JPOOL_IMAGE      1      /* lasts until done with image/datastream */
+#define JPOOL_NUMPOOLS   2
+
+typedef struct jvirt_sarray_control *jvirt_sarray_ptr;
+typedef struct jvirt_barray_control *jvirt_barray_ptr;
+
+
+struct jpeg_memory_mgr {
+  /* Method pointers */
+  void *(*alloc_small) (j_common_ptr cinfo, int pool_id, size_t sizeofobject);
+  void *(*alloc_large) (j_common_ptr cinfo, int pool_id,
+                        size_t sizeofobject);
+  /* If cinfo->data_precision is 12 or 16, then this method and the
+   * access_virt_sarray method actually return a J12SAMPARRAY or a
+   * J16SAMPARRAY, so callers must type-cast the return value in order to
+   * read/write 12-bit or 16-bit samples from/to the array.
+   */
+  JSAMPARRAY (*alloc_sarray) (j_common_ptr cinfo, int pool_id,
+                              JDIMENSION samplesperrow, JDIMENSION numrows);
+  JBLOCKARRAY (*alloc_barray) (j_common_ptr cinfo, int pool_id,
+                               JDIMENSION blocksperrow, JDIMENSION numrows);
+  jvirt_sarray_ptr (*request_virt_sarray) (j_common_ptr cinfo, int pool_id,
+                                           boolean pre_zero,
+                                           JDIMENSION samplesperrow,
+                                           JDIMENSION numrows,
+                                           JDIMENSION maxaccess);
+  jvirt_barray_ptr (*request_virt_barray) (j_common_ptr cinfo, int pool_id,
+                                           boolean pre_zero,
+                                           JDIMENSION blocksperrow,
+                                           JDIMENSION numrows,
+                                           JDIMENSION maxaccess);
+  void (*realize_virt_arrays) (j_common_ptr cinfo);
+  JSAMPARRAY (*access_virt_sarray) (j_common_ptr cinfo, jvirt_sarray_ptr ptr,
+                                    JDIMENSION start_row, JDIMENSION num_rows,
+                                    boolean writable);
+  JBLOCKARRAY (*access_virt_barray) (j_common_ptr cinfo, jvirt_barray_ptr ptr,
+                                     JDIMENSION start_row, JDIMENSION num_rows,
+                                     boolean writable);
+  void (*free_pool) (j_common_ptr cinfo, int pool_id);
+  void (*self_destruct) (j_common_ptr cinfo);
+
+  /* Limit on memory allocation for this JPEG object.  (Note that this is
+   * merely advisory, not a guaranteed maximum; it only affects the space
+   * used for virtual-array buffers.)  May be changed by outer application
+   * after creating the JPEG object.
+   */
+  long max_memory_to_use;
+
+  /* Maximum allocation request accepted by alloc_large. */
+  long max_alloc_chunk;
+};
+
+
+/* Routine signature for application-supplied marker processing methods.
+ * Need not pass marker code since it is stored in cinfo->unread_marker.
+ */
+typedef boolean (*jpeg_marker_parser_method) (j_decompress_ptr cinfo);
+
+
+/* Originally, this macro was used as a way of defining function prototypes
+ * for both modern compilers as well as older compilers that did not support
+ * prototype parameters.  libjpeg-turbo has never supported these older,
+ * non-ANSI compilers, but the macro is still included because there is some
+ * software out there that uses it.
+ */
+
+#define JPP(arglist)    arglist
+
+
+/* Default error-management setup */
+EXTERN(struct jpeg_error_mgr *) jpeg_std_error(struct jpeg_error_mgr *err);
+
+/* Initialization of JPEG compression objects.
+ * jpeg_create_compress() and jpeg_create_decompress() are the exported
+ * names that applications should call.  These expand to calls on
+ * jpeg_CreateCompress and jpeg_CreateDecompress with additional information
+ * passed for version mismatch checking.
+ * NB: you must set up the error-manager BEFORE calling jpeg_create_xxx.
+ */
+#define jpeg_create_compress(cinfo) \
+  jpeg_CreateCompress((cinfo), JPEG_LIB_VERSION, \
+                      (size_t)sizeof(struct jpeg_compress_struct))
+#define jpeg_create_decompress(cinfo) \
+  jpeg_CreateDecompress((cinfo), JPEG_LIB_VERSION, \
+                        (size_t)sizeof(struct jpeg_decompress_struct))
+EXTERN(void) jpeg_CreateCompress(j_compress_ptr cinfo, int version,
+                                 size_t structsize);
+EXTERN(void) jpeg_CreateDecompress(j_decompress_ptr cinfo, int version,
+                                   size_t structsize);
+/* Destruction of JPEG compression objects */
+EXTERN(void) jpeg_destroy_compress(j_compress_ptr cinfo);
+EXTERN(void) jpeg_destroy_decompress(j_decompress_ptr cinfo);
+
+/* Standard data source and destination managers: stdio streams. */
+/* Caller is responsible for opening the file before and closing after. */
+EXTERN(void) jpeg_stdio_dest(j_compress_ptr cinfo, FILE *outfile);
+EXTERN(void) jpeg_stdio_src(j_decompress_ptr cinfo, FILE *infile);
+
+/* Data source and destination managers: memory buffers. */
+EXTERN(void) jpeg_mem_dest(j_compress_ptr cinfo, unsigned char **outbuffer,
+                           unsigned long *outsize);
+EXTERN(void) jpeg_mem_src(j_decompress_ptr cinfo,
+                          const unsigned char *inbuffer, unsigned long insize);
+
+/* Default parameter setup for compression */
+EXTERN(void) jpeg_set_defaults(j_compress_ptr cinfo);
+/* Compression parameter setup aids */
+EXTERN(void) jpeg_set_colorspace(j_compress_ptr cinfo,
+                                 J_COLOR_SPACE colorspace);
+EXTERN(void) jpeg_default_colorspace(j_compress_ptr cinfo);
+EXTERN(void) jpeg_set_quality(j_compress_ptr cinfo, int quality,
+                              boolean force_baseline);
+EXTERN(void) jpeg_set_linear_quality(j_compress_ptr cinfo, int scale_factor,
+                                     boolean force_baseline);
+#if JPEG_LIB_VERSION >= 70
+EXTERN(void) jpeg_default_qtables(j_compress_ptr cinfo,
+                                  boolean force_baseline);
+#endif
+EXTERN(void) jpeg_add_quant_table(j_compress_ptr cinfo, int which_tbl,
+                                  const unsigned int *basic_table,
+                                  int scale_factor, boolean force_baseline);
+EXTERN(int) jpeg_quality_scaling(int quality);
+EXTERN(void) jpeg_enable_lossless(j_compress_ptr cinfo,
+                                  int predictor_selection_value,
+                                  int point_transform);
+EXTERN(void) jpeg_simple_progression(j_compress_ptr cinfo);
+EXTERN(void) jpeg_suppress_tables(j_compress_ptr cinfo, boolean suppress);
+EXTERN(JQUANT_TBL *) jpeg_alloc_quant_table(j_common_ptr cinfo);
+EXTERN(JHUFF_TBL *) jpeg_alloc_huff_table(j_common_ptr cinfo);
+
+/* Main entry points for compression */
+EXTERN(void) jpeg_start_compress(j_compress_ptr cinfo,
+                                 boolean write_all_tables);
+EXTERN(JDIMENSION) jpeg_write_scanlines(j_compress_ptr cinfo,
+                                        JSAMPARRAY scanlines,
+                                        JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_write_scanlines(j_compress_ptr cinfo,
+                                          J12SAMPARRAY scanlines,
+                                          JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg16_write_scanlines(j_compress_ptr cinfo,
+                                          J16SAMPARRAY scanlines,
+                                          JDIMENSION num_lines);
+EXTERN(void) jpeg_finish_compress(j_compress_ptr cinfo);
+
+#if JPEG_LIB_VERSION >= 70
+/* Precalculate JPEG dimensions for current compression parameters. */
+EXTERN(void) jpeg_calc_jpeg_dimensions(j_compress_ptr cinfo);
+#endif
+
+/* Replaces jpeg_write_scanlines when writing raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_write_raw_data(j_compress_ptr cinfo, JSAMPIMAGE data,
+                                       JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_write_raw_data(j_compress_ptr cinfo,
+                                         J12SAMPIMAGE data,
+                                         JDIMENSION num_lines);
+
+/* Write a special marker.  See libjpeg.txt concerning safe usage. */
+EXTERN(void) jpeg_write_marker(j_compress_ptr cinfo, int marker,
+                               const JOCTET *dataptr, unsigned int datalen);
+/* Same, but piecemeal. */
+EXTERN(void) jpeg_write_m_header(j_compress_ptr cinfo, int marker,
+                                 unsigned int datalen);
+EXTERN(void) jpeg_write_m_byte(j_compress_ptr cinfo, int val);
+
+/* Alternate compression function: just write an abbreviated table file */
+EXTERN(void) jpeg_write_tables(j_compress_ptr cinfo);
+
+/* Write ICC profile.  See libjpeg.txt for usage information. */
+EXTERN(void) jpeg_write_icc_profile(j_compress_ptr cinfo,
+                                    const JOCTET *icc_data_ptr,
+                                    unsigned int icc_data_len);
+
+
+/* Decompression startup: read start of JPEG datastream to see what's there */
+EXTERN(int) jpeg_read_header(j_decompress_ptr cinfo, boolean require_image);
+/* Return value is one of: */
+#define JPEG_SUSPENDED           0 /* Suspended due to lack of input data */
+#define JPEG_HEADER_OK           1 /* Found valid image datastream */
+#define JPEG_HEADER_TABLES_ONLY  2 /* Found valid table-specs-only datastream */
+/* If you pass require_image = TRUE (normal case), you need not check for
+ * a TABLES_ONLY return code; an abbreviated file will cause an error exit.
+ * JPEG_SUSPENDED is only possible if you use a data source module that can
+ * give a suspension return (the stdio source module doesn't).
+ */
+
+/* Main entry points for decompression */
+EXTERN(boolean) jpeg_start_decompress(j_decompress_ptr cinfo);
+EXTERN(JDIMENSION) jpeg_read_scanlines(j_decompress_ptr cinfo,
+                                       JSAMPARRAY scanlines,
+                                       JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg12_read_scanlines(j_decompress_ptr cinfo,
+                                         J12SAMPARRAY scanlines,
+                                         JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg16_read_scanlines(j_decompress_ptr cinfo,
+                                         J16SAMPARRAY scanlines,
+                                         JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg_skip_scanlines(j_decompress_ptr cinfo,
+                                       JDIMENSION num_lines);
+EXTERN(JDIMENSION) jpeg12_skip_scanlines(j_decompress_ptr cinfo,
+                                         JDIMENSION num_lines);
+EXTERN(void) jpeg_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                                JDIMENSION *width);
+EXTERN(void) jpeg12_crop_scanline(j_decompress_ptr cinfo, JDIMENSION *xoffset,
+                                  JDIMENSION *width);
+EXTERN(boolean) jpeg_finish_decompress(j_decompress_ptr cinfo);
+
+/* Replaces jpeg_read_scanlines when reading raw downsampled data. */
+EXTERN(JDIMENSION) jpeg_read_raw_data(j_decompress_ptr cinfo, JSAMPIMAGE data,
+                                      JDIMENSION max_lines);
+EXTERN(JDIMENSION) jpeg12_read_raw_data(j_decompress_ptr cinfo,
+                                        J12SAMPIMAGE data,
+                                        JDIMENSION max_lines);
+
+/* Additional entry points for buffered-image mode. */
+EXTERN(boolean) jpeg_has_multiple_scans(j_decompress_ptr cinfo);
+EXTERN(boolean) jpeg_start_output(j_decompress_ptr cinfo, int scan_number);
+EXTERN(boolean) jpeg_finish_output(j_decompress_ptr cinfo);
+EXTERN(boolean) jpeg_input_complete(j_decompress_ptr cinfo);
+EXTERN(void) jpeg_new_colormap(j_decompress_ptr cinfo);
+EXTERN(int) jpeg_consume_input(j_decompress_ptr cinfo);
+/* Return value is one of: */
+/* #define JPEG_SUSPENDED       0    Suspended due to lack of input data */
+#define JPEG_REACHED_SOS        1 /* Reached start of new scan */
+#define JPEG_REACHED_EOI        2 /* Reached end of image */
+#define JPEG_ROW_COMPLETED      3 /* Completed one iMCU row */
+#define JPEG_SCAN_COMPLETED     4 /* Completed last iMCU row of a scan */
+
+/* Precalculate output dimensions for current decompression parameters. */
+#if JPEG_LIB_VERSION >= 80
+EXTERN(void) jpeg_core_output_dimensions(j_decompress_ptr cinfo);
+#endif
+EXTERN(void) jpeg_calc_output_dimensions(j_decompress_ptr cinfo);
+
+/* Control saving of COM and APPn markers into marker_list. */
+EXTERN(void) jpeg_save_markers(j_decompress_ptr cinfo, int marker_code,
+                               unsigned int length_limit);
+
+/* Install a special processing method for COM or APPn markers. */
+EXTERN(void) jpeg_set_marker_processor(j_decompress_ptr cinfo,
+                                       int marker_code,
+                                       jpeg_marker_parser_method routine);
+
+/* Read or write raw DCT coefficients --- useful for lossless transcoding. */
+EXTERN(jvirt_barray_ptr *) jpeg_read_coefficients(j_decompress_ptr cinfo);
+EXTERN(void) jpeg_write_coefficients(j_compress_ptr cinfo,
+                                     jvirt_barray_ptr *coef_arrays);
+EXTERN(void) jpeg_copy_critical_parameters(j_decompress_ptr srcinfo,
+                                           j_compress_ptr dstinfo);
+
+/* If you choose to abort compression or decompression before completing
+ * jpeg_finish_(de)compress, then you need to clean up to release memory,
+ * temporary files, etc.  You can just call jpeg_destroy_(de)compress
+ * if you're done with the JPEG object, but if you want to clean it up and
+ * reuse it, call this:
+ */
+EXTERN(void) jpeg_abort_compress(j_compress_ptr cinfo);
+EXTERN(void) jpeg_abort_decompress(j_decompress_ptr cinfo);
+
+/* Generic versions of jpeg_abort and jpeg_destroy that work on either
+ * flavor of JPEG object.  These may be more convenient in some places.
+ */
+EXTERN(void) jpeg_abort(j_common_ptr cinfo);
+EXTERN(void) jpeg_destroy(j_common_ptr cinfo);
+
+/* Default restart-marker-resync procedure for use by data source modules */
+EXTERN(boolean) jpeg_resync_to_restart(j_decompress_ptr cinfo, int desired);
+
+/* Read ICC profile.  See libjpeg.txt for usage information. */
+EXTERN(boolean) jpeg_read_icc_profile(j_decompress_ptr cinfo,
+                                      JOCTET **icc_data_ptr,
+                                      unsigned int *icc_data_len);
+
+
+/* These marker codes are exported since applications and data source modules
+ * are likely to want to use them.
+ */
+
+#define JPEG_RST0       0xD0    /* RST0 marker code */
+#define JPEG_EOI        0xD9    /* EOI marker code */
+#define JPEG_APP0       0xE0    /* APP0 marker code */
+#define JPEG_COM        0xFE    /* COM marker code */
+
+
+/* If we have a brain-damaged compiler that emits warnings (or worse, errors)
+ * for structure definitions that are never filled in, keep it quiet by
+ * supplying dummy definitions for the various substructures.
+ */
+
+#ifdef INCOMPLETE_TYPES_BROKEN
+#ifndef JPEG_INTERNALS          /* will be defined in jpegint.h */
+struct jvirt_sarray_control { long dummy; };
+struct jvirt_barray_control { long dummy; };
+struct jpeg_comp_master { long dummy; };
+struct jpeg_c_main_controller { long dummy; };
+struct jpeg_c_prep_controller { long dummy; };
+struct jpeg_c_coef_controller { long dummy; };
+struct jpeg_marker_writer { long dummy; };
+struct jpeg_color_converter { long dummy; };
+struct jpeg_downsampler { long dummy; };
+struct jpeg_forward_dct { long dummy; };
+struct jpeg_entropy_encoder { long dummy; };
+struct jpeg_decomp_master { long dummy; };
+struct jpeg_d_main_controller { long dummy; };
+struct jpeg_d_coef_controller { long dummy; };
+struct jpeg_d_post_controller { long dummy; };
+struct jpeg_input_controller { long dummy; };
+struct jpeg_marker_reader { long dummy; };
+struct jpeg_entropy_decoder { long dummy; };
+struct jpeg_inverse_dct { long dummy; };
+struct jpeg_upsampler { long dummy; };
+struct jpeg_color_deconverter { long dummy; };
+struct jpeg_color_quantizer { long dummy; };
+#endif /* JPEG_INTERNALS */
+#endif /* INCOMPLETE_TYPES_BROKEN */
+
+
+/*
+ * The JPEG library modules define JPEG_INTERNALS before including this file.
+ * The internal structure declarations are read only when that is true.
+ * Applications using the library should not include jpegint.h, but may wish
+ * to include jerror.h.
+ */
+
+#ifdef JPEG_INTERNALS
+#include "jpegint.h"            /* fetch private declarations */
+#include "jerror.h"             /* fetch error codes too */
+#endif
+
+#ifdef __cplusplus
+#ifndef DONT_USE_EXTERN_C
+}
+#endif
+#endif
+
+#endif /* JPEGLIB_H */
diff --git a/thirdparty/libjpeg-turbo/src/jquant1.c b/thirdparty/libjpeg-turbo/src/jquant1.c
new file mode 100644
index 00000000000..bd68dcb8c6a
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jquant1.c
@@ -0,0 +1,864 @@
+/*
+ * jquant1.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2015, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains 1-pass color quantization (color mapping) routines.
+ * These routines provide mapping to a fixed color map using equally spaced
+ * color values.  Optional Floyd-Steinberg or ordered dithering is available.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+#if defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
+
+
+/*
+ * The main purpose of 1-pass quantization is to provide a fast, if not very
+ * high quality, colormapped output capability.  A 2-pass quantizer usually
+ * gives better visual quality; however, for quantized grayscale output this
+ * quantizer is perfectly adequate.  Dithering is highly recommended with this
+ * quantizer, though you can turn it off if you really want to.
+ *
+ * In 1-pass quantization the colormap must be chosen in advance of seeing the
+ * image.  We use a map consisting of all combinations of Ncolors[i] color
+ * values for the i'th component.  The Ncolors[] values are chosen so that
+ * their product, the total number of colors, is no more than that requested.
+ * (In most cases, the product will be somewhat less.)
+ *
+ * Since the colormap is orthogonal, the representative value for each color
+ * component can be determined without considering the other components;
+ * then these indexes can be combined into a colormap index by a standard
+ * N-dimensional-array-subscript calculation.  Most of the arithmetic involved
+ * can be precalculated and stored in the lookup table colorindex[].
+ * colorindex[i][j] maps pixel value j in component i to the nearest
+ * representative value (grid plane) for that component; this index is
+ * multiplied by the array stride for component i, so that the
+ * index of the colormap entry closest to a given pixel value is just
+ *    sum( colorindex[component-number][pixel-component-value] )
+ * Aside from being fast, this scheme allows for variable spacing between
+ * representative values with no additional lookup cost.
+ *
+ * If gamma correction has been applied in color conversion, it might be wise
+ * to adjust the color grid spacing so that the representative colors are
+ * equidistant in linear space.  At this writing, gamma correction is not
+ * implemented by jdcolor, so nothing is done here.
+ */
+
+
+/* Declarations for ordered dithering.
+ *
+ * We use a standard 16x16 ordered dither array.  The basic concept of ordered
+ * dithering is described in many references, for instance Dale Schumacher's
+ * chapter II.2 of Graphics Gems II (James Arvo, ed. Academic Press, 1991).
+ * In place of Schumacher's comparisons against a "threshold" value, we add a
+ * "dither" value to the input pixel and then round the result to the nearest
+ * output value.  The dither value is equivalent to (0.5 - threshold) times
+ * the distance between output values.  For ordered dithering, we assume that
+ * the output colors are equally spaced; if not, results will probably be
+ * worse, since the dither may be too much or too little at a given point.
+ *
+ * The normal calculation would be to form pixel value + dither, range-limit
+ * this to 0.._MAXJSAMPLE, and then index into the colorindex table as usual.
+ * We can skip the separate range-limiting step by extending the colorindex
+ * table in both directions.
+ */
+
+#define ODITHER_SIZE  16        /* dimension of dither matrix */
+/* NB: if ODITHER_SIZE is not a power of 2, ODITHER_MASK uses will break */
+#define ODITHER_CELLS  (ODITHER_SIZE * ODITHER_SIZE) /* # cells in matrix */
+#define ODITHER_MASK  (ODITHER_SIZE - 1) /* mask for wrapping around
+                                            counters */
+
+typedef int ODITHER_MATRIX[ODITHER_SIZE][ODITHER_SIZE];
+typedef int (*ODITHER_MATRIX_PTR)[ODITHER_SIZE];
+
+static const UINT8 base_dither_matrix[ODITHER_SIZE][ODITHER_SIZE] = {
+  /* Bayer's order-4 dither array.  Generated by the code given in
+   * Stephen Hawley's article "Ordered Dithering" in Graphics Gems I.
+   * The values in this array must range from 0 to ODITHER_CELLS-1.
+   */
+  {   0, 192,  48, 240,  12, 204,  60, 252,   3, 195,  51, 243,  15, 207,  63, 255 },
+  { 128,  64, 176, 112, 140,  76, 188, 124, 131,  67, 179, 115, 143,  79, 191, 127 },
+  {  32, 224,  16, 208,  44, 236,  28, 220,  35, 227,  19, 211,  47, 239,  31, 223 },
+  { 160,  96, 144,  80, 172, 108, 156,  92, 163,  99, 147,  83, 175, 111, 159,  95 },
+  {   8, 200,  56, 248,   4, 196,  52, 244,  11, 203,  59, 251,   7, 199,  55, 247 },
+  { 136,  72, 184, 120, 132,  68, 180, 116, 139,  75, 187, 123, 135,  71, 183, 119 },
+  {  40, 232,  24, 216,  36, 228,  20, 212,  43, 235,  27, 219,  39, 231,  23, 215 },
+  { 168, 104, 152,  88, 164, 100, 148,  84, 171, 107, 155,  91, 167, 103, 151,  87 },
+  {   2, 194,  50, 242,  14, 206,  62, 254,   1, 193,  49, 241,  13, 205,  61, 253 },
+  { 130,  66, 178, 114, 142,  78, 190, 126, 129,  65, 177, 113, 141,  77, 189, 125 },
+  {  34, 226,  18, 210,  46, 238,  30, 222,  33, 225,  17, 209,  45, 237,  29, 221 },
+  { 162,  98, 146,  82, 174, 110, 158,  94, 161,  97, 145,  81, 173, 109, 157,  93 },
+  {  10, 202,  58, 250,   6, 198,  54, 246,   9, 201,  57, 249,   5, 197,  53, 245 },
+  { 138,  74, 186, 122, 134,  70, 182, 118, 137,  73, 185, 121, 133,  69, 181, 117 },
+  {  42, 234,  26, 218,  38, 230,  22, 214,  41, 233,  25, 217,  37, 229,  21, 213 },
+  { 170, 106, 154,  90, 166, 102, 150,  86, 169, 105, 153,  89, 165, 101, 149,  85 }
+};
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array is indexed [component#][position].
+ * We provide (#columns + 2) entries per component; the extra entry at each
+ * end saves us from special-casing the first and last pixels.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
+#else
+typedef JLONG FSERROR;          /* may need more than 16 bits */
+typedef JLONG LOCFSERROR;       /* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR *FSERRPTR;      /* pointer to error array */
+
+
+/* Private subobject */
+
+#define MAX_Q_COMPS  4          /* max components I can handle */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Initially allocated colormap is saved here */
+  _JSAMPARRAY sv_colormap;      /* The color map as a 2-D pixel array */
+  int sv_actual;                /* number of entries in use */
+
+  _JSAMPARRAY colorindex;       /* Precomputed mapping for speed */
+  /* colorindex[i][j] = index of color closest to pixel value j in component i,
+   * premultiplied as described above.  Since colormap indexes must fit into
+   * _JSAMPLEs, the entries of this array will too.
+   */
+  boolean is_padded;            /* is the colorindex padded for odither? */
+
+  int Ncolors[MAX_Q_COMPS];     /* # of values allocated to each component */
+
+  /* Variables for ordered dithering */
+  int row_index;                /* cur row's vertical index in dither matrix */
+  ODITHER_MATRIX_PTR odither[MAX_Q_COMPS]; /* one dither array per component */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors[MAX_Q_COMPS]; /* accumulated errors */
+  boolean on_odd_row;           /* flag to remember which row we are on */
+} my_cquantizer;
+
+typedef my_cquantizer *my_cquantize_ptr;
+
+
+/*
+ * Policy-making subroutines for create_colormap and create_colorindex.
+ * These routines determine the colormap to be used.  The rest of the module
+ * only assumes that the colormap is orthogonal.
+ *
+ *  * select_ncolors decides how to divvy up the available colors
+ *    among the components.
+ *  * output_value defines the set of representative values for a component.
+ *  * largest_input_value defines the mapping from input values to
+ *    representative values for a component.
+ * Note that the latter two routines may impose different policies for
+ * different components, though this is not currently done.
+ */
+
+
+LOCAL(int)
+select_ncolors(j_decompress_ptr cinfo, int Ncolors[])
+/* Determine allocation of desired colors to components, */
+/* and fill in Ncolors[] array to indicate choice. */
+/* Return value is total number of colors (product of Ncolors[] values). */
+{
+  int nc = cinfo->out_color_components; /* number of color components */
+  int max_colors = cinfo->desired_number_of_colors;
+  int total_colors, iroot, i, j;
+  boolean changed;
+  long temp;
+  int RGB_order[3] = { RGB_GREEN, RGB_RED, RGB_BLUE };
+  RGB_order[0] = rgb_green[cinfo->out_color_space];
+  RGB_order[1] = rgb_red[cinfo->out_color_space];
+  RGB_order[2] = rgb_blue[cinfo->out_color_space];
+
+  /* We can allocate at least the nc'th root of max_colors per component. */
+  /* Compute floor(nc'th root of max_colors). */
+  iroot = 1;
+  do {
+    iroot++;
+    temp = iroot;               /* set temp = iroot ** nc */
+    for (i = 1; i < nc; i++)
+      temp *= iroot;
+  } while (temp <= (long)max_colors); /* repeat till iroot exceeds root */
+  iroot--;                      /* now iroot = floor(root) */
+
+  /* Must have at least 2 color values per component */
+  if (iroot < 2)
+    ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, (int)temp);
+
+  /* Initialize to iroot color values for each component */
+  total_colors = 1;
+  for (i = 0; i < nc; i++) {
+    Ncolors[i] = iroot;
+    total_colors *= iroot;
+  }
+  /* We may be able to increment the count for one or more components without
+   * exceeding max_colors, though we know not all can be incremented.
+   * Sometimes, the first component can be incremented more than once!
+   * (Example: for 16 colors, we start at 2*2*2, go to 3*2*2, then 4*2*2.)
+   * In RGB colorspace, try to increment G first, then R, then B.
+   */
+  do {
+    changed = FALSE;
+    for (i = 0; i < nc; i++) {
+      j = (cinfo->out_color_space == JCS_RGB ? RGB_order[i] : i);
+      /* calculate new total_colors if Ncolors[j] is incremented */
+      temp = total_colors / Ncolors[j];
+      temp *= Ncolors[j] + 1;   /* done in long arith to avoid oflo */
+      if (temp > (long)max_colors)
+        break;                  /* won't fit, done with this pass */
+      Ncolors[j]++;             /* OK, apply the increment */
+      total_colors = (int)temp;
+      changed = TRUE;
+    }
+  } while (changed);
+
+  return total_colors;
+}
+
+
+LOCAL(int)
+output_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return j'th output value, where j will range from 0 to maxj */
+/* The output values must fall in 0.._MAXJSAMPLE in increasing order */
+{
+  /* We always provide values 0 and _MAXJSAMPLE for each component;
+   * any additional values are equally spaced between these limits.
+   * (Forcing the upper and lower values to the limits ensures that
+   * dithering can't produce a color outside the selected gamut.)
+   */
+  return (int)(((JLONG)j * _MAXJSAMPLE + maxj / 2) / maxj);
+}
+
+
+LOCAL(int)
+largest_input_value(j_decompress_ptr cinfo, int ci, int j, int maxj)
+/* Return largest input value that should map to j'th output value */
+/* Must have largest(j=0) >= 0, and largest(j=maxj) >= _MAXJSAMPLE */
+{
+  /* Breakpoints are halfway between values returned by output_value */
+  return (int)(((JLONG)(2 * j + 1) * _MAXJSAMPLE + maxj) / (2 * maxj));
+}
+
+
+/*
+ * Create the colormap.
+ */
+
+LOCAL(void)
+create_colormap(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  _JSAMPARRAY colormap;         /* Created colormap */
+  int total_colors;             /* Number of distinct output colors */
+  int i, j, k, nci, blksize, blkdist, ptr, val;
+
+  /* Select number of colors for each component */
+  total_colors = select_ncolors(cinfo, cquantize->Ncolors);
+
+  /* Report selected color counts */
+  if (cinfo->out_color_components == 3)
+    TRACEMS4(cinfo, 1, JTRC_QUANT_3_NCOLORS, total_colors,
+             cquantize->Ncolors[0], cquantize->Ncolors[1],
+             cquantize->Ncolors[2]);
+  else
+    TRACEMS1(cinfo, 1, JTRC_QUANT_NCOLORS, total_colors);
+
+  /* Allocate and fill in the colormap. */
+  /* The colors are ordered in the map in standard row-major order, */
+  /* i.e. rightmost (highest-indexed) color changes most rapidly. */
+
+  colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)total_colors, (JDIMENSION)cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  /* blkdist is distance between groups of identical entries for a component */
+  blkdist = total_colors;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colormap entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blkdist / nci;
+    for (j = 0; j < nci; j++) {
+      /* Compute j'th output value (out of nci) for component */
+      val = output_value(cinfo, i, j, nci - 1);
+      /* Fill in all colormap entries that have this value of this component */
+      for (ptr = j * blksize; ptr < total_colors; ptr += blkdist) {
+        /* fill in blksize entries beginning at ptr */
+        for (k = 0; k < blksize; k++)
+          colormap[i][ptr + k] = (_JSAMPLE)val;
+      }
+    }
+    blkdist = blksize;          /* blksize of this color is blkdist of next */
+  }
+
+  /* Save the colormap in private storage,
+   * where it will survive color quantization mode changes.
+   */
+  cquantize->sv_colormap = colormap;
+  cquantize->sv_actual = total_colors;
+}
+
+
+/*
+ * Create the color index table.
+ */
+
+LOCAL(void)
+create_colorindex(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  _JSAMPROW indexptr;
+  int i, j, k, nci, blksize, val, pad;
+
+  /* For ordered dither, we pad the color index tables by _MAXJSAMPLE in
+   * each direction (input index values can be -_MAXJSAMPLE .. 2*_MAXJSAMPLE).
+   * This is not necessary in the other dithering modes.  However, we
+   * flag whether it was done in case user changes dithering mode.
+   */
+  if (cinfo->dither_mode == JDITHER_ORDERED) {
+    pad = _MAXJSAMPLE * 2;
+    cquantize->is_padded = TRUE;
+  } else {
+    pad = 0;
+    cquantize->is_padded = FALSE;
+  }
+
+  cquantize->colorindex = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE,
+     (JDIMENSION)(_MAXJSAMPLE + 1 + pad),
+     (JDIMENSION)cinfo->out_color_components);
+
+  /* blksize is number of adjacent repeated entries for a component */
+  blksize = cquantize->sv_actual;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    /* fill in colorindex entries for i'th color component */
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    blksize = blksize / nci;
+
+    /* adjust colorindex pointers to provide padding at negative indexes. */
+    if (pad)
+      cquantize->colorindex[i] += _MAXJSAMPLE;
+
+    /* in loop, val = index of current output value, */
+    /* and k = largest j that maps to current val */
+    indexptr = cquantize->colorindex[i];
+    val = 0;
+    k = largest_input_value(cinfo, i, 0, nci - 1);
+    for (j = 0; j <= _MAXJSAMPLE; j++) {
+      while (j > k)             /* advance val if past boundary */
+        k = largest_input_value(cinfo, i, ++val, nci - 1);
+      /* premultiply so that no multiplication needed in main processing */
+      indexptr[j] = (_JSAMPLE)(val * blksize);
+    }
+    /* Pad at both ends if necessary */
+    if (pad)
+      for (j = 1; j <= _MAXJSAMPLE; j++) {
+        indexptr[-j] = indexptr[0];
+        indexptr[_MAXJSAMPLE + j] = indexptr[_MAXJSAMPLE];
+      }
+  }
+}
+
+
+/*
+ * Create an ordered-dither array for a component having ncolors
+ * distinct output values.
+ */
+
+LOCAL(ODITHER_MATRIX_PTR)
+make_odither_array(j_decompress_ptr cinfo, int ncolors)
+{
+  ODITHER_MATRIX_PTR odither;
+  int j, k;
+  JLONG num, den;
+
+  odither = (ODITHER_MATRIX_PTR)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(ODITHER_MATRIX));
+  /* The inter-value distance for this color is _MAXJSAMPLE/(ncolors-1).
+   * Hence the dither value for the matrix cell with fill order f
+   * (f=0..N-1) should be (N-1-2*f)/(2*N) * _MAXJSAMPLE/(ncolors-1).
+   * On 16-bit-int machine, be careful to avoid overflow.
+   */
+  den = 2 * ODITHER_CELLS * ((JLONG)(ncolors - 1));
+  for (j = 0; j < ODITHER_SIZE; j++) {
+    for (k = 0; k < ODITHER_SIZE; k++) {
+      num = ((JLONG)(ODITHER_CELLS - 1 -
+                     2 * ((int)base_dither_matrix[j][k]))) * _MAXJSAMPLE;
+      /* Ensure round towards zero despite C's lack of consistency
+       * about rounding negative values in integer division...
+       */
+      odither[j][k] = (int)(num < 0 ? -((-num) / den) : num / den);
+    }
+  }
+  return odither;
+}
+
+
+/*
+ * Create the ordered-dither tables.
+ * Components having the same number of representative colors may
+ * share a dither table.
+ */
+
+LOCAL(void)
+create_odither_tables(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  ODITHER_MATRIX_PTR odither;
+  int i, j, nci;
+
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    nci = cquantize->Ncolors[i]; /* # of distinct values for this color */
+    odither = NULL;             /* search for matching prior component */
+    for (j = 0; j < i; j++) {
+      if (nci == cquantize->Ncolors[j]) {
+        odither = cquantize->odither[j];
+        break;
+      }
+    }
+    if (odither == NULL)        /* need a new table? */
+      odither = make_odither_array(cinfo, nci);
+    cquantize->odither[i] = odither;
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+color_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+               _JSAMPARRAY output_buf, int num_rows)
+/* General case, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  _JSAMPARRAY colorindex = cquantize->colorindex;
+  register int pixcode, ci;
+  register _JSAMPROW ptrin, ptrout;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  register int nc = cinfo->out_color_components;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode = 0;
+      for (ci = 0; ci < nc; ci++) {
+        pixcode += colorindex[ci][*ptrin++];
+      }
+      *ptrout++ = (_JSAMPLE)pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+color_quantize3(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  register int pixcode;
+  register _JSAMPROW ptrin, ptrout;
+  _JSAMPROW colorindex0 = cquantize->colorindex[0];
+  _JSAMPROW colorindex1 = cquantize->colorindex[1];
+  _JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptrin = input_buf[row];
+    ptrout = output_buf[row];
+    for (col = width; col > 0; col--) {
+      pixcode  = colorindex0[*ptrin++];
+      pixcode += colorindex1[*ptrin++];
+      pixcode += colorindex2[*ptrin++];
+      *ptrout++ = (_JSAMPLE)pixcode;
+    }
+  }
+}
+
+
+METHODDEF(void)
+quantize_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                    _JSAMPARRAY output_buf, int num_rows)
+/* General case, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex_ci;
+  int *dither;                  /* points to active row of dither matrix */
+  int row_index, col_index;     /* current indexes into dither matrix */
+  int nc = cinfo->out_color_components;
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
+    row_index = cquantize->row_index;
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      colorindex_ci = cquantize->colorindex[ci];
+      dither = cquantize->odither[ci][row_index];
+      col_index = 0;
+
+      for (col = width; col > 0; col--) {
+        /* Form pixel value + dither, range-limit to 0.._MAXJSAMPLE,
+         * select output value, accumulate into output code for this pixel.
+         * Range-limiting need not be done explicitly, as we have extended
+         * the colorindex table to produce the right answers for out-of-range
+         * inputs.  The maximum dither is +- _MAXJSAMPLE; this sets the
+         * required amount of padding.
+         */
+        *output_ptr +=
+          colorindex_ci[*input_ptr + dither[col_index]];
+        input_ptr += nc;
+        output_ptr++;
+        col_index = (col_index + 1) & ODITHER_MASK;
+      }
+    }
+    /* Advance row index for next row */
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize3_ord_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                     _JSAMPARRAY output_buf, int num_rows)
+/* Fast path for out_color_components==3, with ordered dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  register int pixcode;
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex0 = cquantize->colorindex[0];
+  _JSAMPROW colorindex1 = cquantize->colorindex[1];
+  _JSAMPROW colorindex2 = cquantize->colorindex[2];
+  int *dither0;                 /* points to active row of dither matrix */
+  int *dither1;
+  int *dither2;
+  int row_index, col_index;     /* current indexes into dither matrix */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    row_index = cquantize->row_index;
+    input_ptr = input_buf[row];
+    output_ptr = output_buf[row];
+    dither0 = cquantize->odither[0][row_index];
+    dither1 = cquantize->odither[1][row_index];
+    dither2 = cquantize->odither[2][row_index];
+    col_index = 0;
+
+    for (col = width; col > 0; col--) {
+      pixcode  = colorindex0[(*input_ptr++) + dither0[col_index]];
+      pixcode += colorindex1[(*input_ptr++) + dither1[col_index]];
+      pixcode += colorindex2[(*input_ptr++) + dither2[col_index]];
+      *output_ptr++ = (_JSAMPLE)pixcode;
+      col_index = (col_index + 1) & ODITHER_MASK;
+    }
+    row_index = (row_index + 1) & ODITHER_MASK;
+    cquantize->row_index = row_index;
+  }
+}
+
+
+METHODDEF(void)
+quantize_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                   _JSAMPARRAY output_buf, int num_rows)
+/* General case, with Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  register LOCFSERROR cur;      /* current error or pixel value */
+  LOCFSERROR belowerr;          /* error for pixel below cur */
+  LOCFSERROR bpreverr;          /* error for below/prev col */
+  LOCFSERROR bnexterr;          /* error for below/next col */
+  LOCFSERROR delta;
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
+  register _JSAMPROW input_ptr;
+  register _JSAMPROW output_ptr;
+  _JSAMPROW colorindex_ci;
+  _JSAMPROW colormap_ci;
+  int pixcode;
+  int nc = cinfo->out_color_components;
+  int dir;                      /* 1 for left-to-right, -1 for right-to-left */
+  int dirnc;                    /* dir * nc */
+  int ci;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    /* Initialize output values to 0 so can process components separately */
+    jzero_far((void *)output_buf[row], (size_t)(width * sizeof(_JSAMPLE)));
+    for (ci = 0; ci < nc; ci++) {
+      input_ptr = input_buf[row] + ci;
+      output_ptr = output_buf[row];
+      if (cquantize->on_odd_row) {
+        /* work right to left in this row */
+        input_ptr += (width - 1) * nc; /* so point to rightmost pixel */
+        output_ptr += width - 1;
+        dir = -1;
+        dirnc = -nc;
+        errorptr = cquantize->fserrors[ci] + (width + 1); /* => entry after last column */
+      } else {
+        /* work left to right in this row */
+        dir = 1;
+        dirnc = nc;
+        errorptr = cquantize->fserrors[ci]; /* => entry before first column */
+      }
+      colorindex_ci = cquantize->colorindex[ci];
+      colormap_ci = cquantize->sv_colormap[ci];
+      /* Preset error values: no error propagated to first pixel from left */
+      cur = 0;
+      /* and no error propagated to row below yet */
+      belowerr = bpreverr = 0;
+
+      for (col = width; col > 0; col--) {
+        /* cur holds the error propagated from the previous pixel on the
+         * current line.  Add the error propagated from the previous line
+         * to form the complete error correction term for this pixel, and
+         * round the error term (which is expressed * 16) to an integer.
+         * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+         * for either sign of the error value.
+         * Note: errorptr points to *previous* column's array entry.
+         */
+        cur = RIGHT_SHIFT(cur + errorptr[dir] + 8, 4);
+        /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
+         * The maximum error is +- _MAXJSAMPLE; this sets the required size
+         * of the range_limit array.
+         */
+        cur += *input_ptr;
+        cur = range_limit[cur];
+        /* Select output value, accumulate into output code for this pixel */
+        pixcode = colorindex_ci[cur];
+        *output_ptr += (_JSAMPLE)pixcode;
+        /* Compute actual representation error at this pixel */
+        /* Note: we can do this even though we don't have the final */
+        /* pixel code, because the colormap is orthogonal. */
+        cur -= colormap_ci[pixcode];
+        /* Compute error fractions to be propagated to adjacent pixels.
+         * Add these into the running sums, and simultaneously shift the
+         * next-line error sums left by 1 column.
+         */
+        bnexterr = cur;
+        delta = cur * 2;
+        cur += delta;           /* form error * 3 */
+        errorptr[0] = (FSERROR)(bpreverr + cur);
+        cur += delta;           /* form error * 5 */
+        bpreverr = belowerr + cur;
+        belowerr = bnexterr;
+        cur += delta;           /* form error * 7 */
+        /* At this point cur contains the 7/16 error value to be propagated
+         * to the next pixel on the current line, and all the errors for the
+         * next line have been shifted over. We are therefore ready to move on.
+         */
+        input_ptr += dirnc;     /* advance input ptr to next column */
+        output_ptr += dir;      /* advance output ptr to next column */
+        errorptr += dir;        /* advance errorptr to current column */
+      }
+      /* Post-loop cleanup: we must unload the final error value into the
+       * final fserrors[] entry.  Note we need not unload belowerr because
+       * it is for the dummy column before or after the actual array.
+       */
+      errorptr[0] = (FSERROR)bpreverr; /* unload prev err into array */
+    }
+    cquantize->on_odd_row = (cquantize->on_odd_row ? FALSE : TRUE);
+  }
+}
+
+
+/*
+ * Allocate workspace for Floyd-Steinberg errors.
+ */
+
+LOCAL(void)
+alloc_fs_workspace(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  arraysize = (size_t)((cinfo->output_width + 2) * sizeof(FSERROR));
+  for (i = 0; i < cinfo->out_color_components; i++) {
+    cquantize->fserrors[i] = (FSERRPTR)
+      (*cinfo->mem->alloc_large) ((j_common_ptr)cinfo, JPOOL_IMAGE, arraysize);
+  }
+}
+
+
+/*
+ * Initialize for one-pass color quantization.
+ */
+
+METHODDEF(void)
+start_pass_1_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  size_t arraysize;
+  int i;
+
+  /* Install my colormap. */
+  cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
+  cinfo->actual_number_of_colors = cquantize->sv_actual;
+
+  /* Initialize for desired dithering mode. */
+  switch (cinfo->dither_mode) {
+  case JDITHER_NONE:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub._color_quantize = color_quantize3;
+    else
+      cquantize->pub._color_quantize = color_quantize;
+    break;
+  case JDITHER_ORDERED:
+    if (cinfo->out_color_components == 3)
+      cquantize->pub._color_quantize = quantize3_ord_dither;
+    else
+      cquantize->pub._color_quantize = quantize_ord_dither;
+    cquantize->row_index = 0;   /* initialize state for ordered dither */
+    /* If user changed to ordered dither from another mode,
+     * we must recreate the color index table with padding.
+     * This will cost extra space, but probably isn't very likely.
+     */
+    if (!cquantize->is_padded)
+      create_colorindex(cinfo);
+    /* Create ordered-dither tables if we didn't already. */
+    if (cquantize->odither[0] == NULL)
+      create_odither_tables(cinfo);
+    break;
+  case JDITHER_FS:
+    cquantize->pub._color_quantize = quantize_fs_dither;
+    cquantize->on_odd_row = FALSE; /* initialize state for F-S dither */
+    /* Allocate Floyd-Steinberg workspace if didn't already. */
+    if (cquantize->fserrors[0] == NULL)
+      alloc_fs_workspace(cinfo);
+    /* Initialize the propagated errors to zero. */
+    arraysize = (size_t)((cinfo->output_width + 2) * sizeof(FSERROR));
+    for (i = 0; i < cinfo->out_color_components; i++)
+      jzero_far((void *)cquantize->fserrors[i], arraysize);
+    break;
+  default:
+    ERREXIT(cinfo, JERR_NOT_COMPILED);
+    break;
+  }
+}
+
+
+/*
+ * Finish up at the end of the pass.
+ */
+
+METHODDEF(void)
+finish_pass_1_quant(j_decompress_ptr cinfo)
+{
+  /* no work in 1-pass case */
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ * Shouldn't get to this module!
+ */
+
+METHODDEF(void)
+new_color_map_1_quant(j_decompress_ptr cinfo)
+{
+  ERREXIT(cinfo, JERR_MODE_CHANGE);
+}
+
+
+/*
+ * Module initialization routine for 1-pass color quantization.
+ */
+
+GLOBAL(void)
+_jinit_1pass_quantizer(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  /* Color quantization is not supported with lossless JPEG images */
+  if (cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize;
+  cquantize->pub.start_pass = start_pass_1_quant;
+  cquantize->pub.finish_pass = finish_pass_1_quant;
+  cquantize->pub.new_color_map = new_color_map_1_quant;
+  cquantize->fserrors[0] = NULL; /* Flag FS workspace not allocated */
+  cquantize->odither[0] = NULL; /* Also flag odither arrays not allocated */
+
+  /* Make sure my internal arrays won't overflow */
+  if (cinfo->out_color_components > MAX_Q_COMPS)
+    ERREXIT1(cinfo, JERR_QUANT_COMPONENTS, MAX_Q_COMPS);
+  /* Make sure colormap indexes can be represented by _JSAMPLEs */
+  if (cinfo->desired_number_of_colors > (_MAXJSAMPLE + 1))
+    ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, _MAXJSAMPLE + 1);
+
+  /* Create the colormap and color index table. */
+  create_colormap(cinfo);
+  create_colorindex(cinfo);
+
+  /* Allocate Floyd-Steinberg workspace now if requested.
+   * We do this now since it may affect the memory manager's space
+   * calculations.  If the user changes to FS dither mode in a later pass, we
+   * will allocate the space then, and will possibly overrun the
+   * max_memory_to_use setting.
+   */
+  if (cinfo->dither_mode == JDITHER_FS)
+    alloc_fs_workspace(cinfo);
+}
+
+#endif /* defined(QUANT_1PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
diff --git a/thirdparty/libjpeg-turbo/src/jquant2.c b/thirdparty/libjpeg-turbo/src/jquant2.c
new file mode 100644
index 00000000000..9ba51fa8872
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jquant2.c
@@ -0,0 +1,1293 @@
+/*
+ * jquant2.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2009, 2014-2015, 2020, 2022-2023, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains 2-pass color quantization (color mapping) routines.
+ * These routines provide selection of a custom color map for an image,
+ * followed by mapping of the image to that color map, with optional
+ * Floyd-Steinberg dithering.
+ * It is also possible to use just the second pass to map to an arbitrary
+ * externally-given color map.
+ *
+ * Note: ordered dithering is not supported, since there isn't any fast
+ * way to compute intercolor distances; it's unclear that ordered dither's
+ * fundamental assumptions even hold with an irregularly spaced color map.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+#if defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16
+
+
+/*
+ * This module implements the well-known Heckbert paradigm for color
+ * quantization.  Most of the ideas used here can be traced back to
+ * Heckbert's seminal paper
+ *   Heckbert, Paul.  "Color Image Quantization for Frame Buffer Display",
+ *   Proc. SIGGRAPH '82, Computer Graphics v.16 #3 (July 1982), pp 297-304.
+ *
+ * In the first pass over the image, we accumulate a histogram showing the
+ * usage count of each possible color.  To keep the histogram to a reasonable
+ * size, we reduce the precision of the input; typical practice is to retain
+ * 5 or 6 bits per color, so that 8 or 4 different input values are counted
+ * in the same histogram cell.
+ *
+ * Next, the color-selection step begins with a box representing the whole
+ * color space, and repeatedly splits the "largest" remaining box until we
+ * have as many boxes as desired colors.  Then the mean color in each
+ * remaining box becomes one of the possible output colors.
+ *
+ * The second pass over the image maps each input pixel to the closest output
+ * color (optionally after applying a Floyd-Steinberg dithering correction).
+ * This mapping is logically trivial, but making it go fast enough requires
+ * considerable care.
+ *
+ * Heckbert-style quantizers vary a good deal in their policies for choosing
+ * the "largest" box and deciding where to cut it.  The particular policies
+ * used here have proved out well in experimental comparisons, but better ones
+ * may yet be found.
+ *
+ * In earlier versions of the IJG code, this module quantized in YCbCr color
+ * space, processing the raw upsampled data without a color conversion step.
+ * This allowed the color conversion math to be done only once per colormap
+ * entry, not once per pixel.  However, that optimization precluded other
+ * useful optimizations (such as merging color conversion with upsampling)
+ * and it also interfered with desired capabilities such as quantizing to an
+ * externally-supplied colormap.  We have therefore abandoned that approach.
+ * The present code works in the post-conversion color space, typically RGB.
+ *
+ * To improve the visual quality of the results, we actually work in scaled
+ * RGB space, giving G distances more weight than R, and R in turn more than
+ * B.  To do everything in integer math, we must use integer scale factors.
+ * The 2/3/1 scale factors used here correspond loosely to the relative
+ * weights of the colors in the NTSC grayscale equation.
+ * If you want to use this code to quantize a non-RGB color space, you'll
+ * probably need to change these scale factors.
+ */
+
+#define R_SCALE  2              /* scale R distances by this much */
+#define G_SCALE  3              /* scale G distances by this much */
+#define B_SCALE  1              /* and B by this much */
+
+static const int c_scales[3] = { R_SCALE, G_SCALE, B_SCALE };
+#define C0_SCALE  c_scales[rgb_red[cinfo->out_color_space]]
+#define C1_SCALE  c_scales[rgb_green[cinfo->out_color_space]]
+#define C2_SCALE  c_scales[rgb_blue[cinfo->out_color_space]]
+
+/*
+ * First we have the histogram data structure and routines for creating it.
+ *
+ * The number of bits of precision can be adjusted by changing these symbols.
+ * We recommend keeping 6 bits for G and 5 each for R and B.
+ * If you have plenty of memory and cycles, 6 bits all around gives marginally
+ * better results; if you are short of memory, 5 bits all around will save
+ * some space but degrade the results.
+ * To maintain a fully accurate histogram, we'd need to allocate a "long"
+ * (preferably unsigned long) for each cell.  In practice this is overkill;
+ * we can get by with 16 bits per cell.  Few of the cell counts will overflow,
+ * and clamping those that do overflow to the maximum value will give close-
+ * enough results.  This reduces the recommended histogram size from 256Kb
+ * to 128Kb, which is a useful savings on PC-class machines.
+ * (In the second pass the histogram space is re-used for pixel mapping data;
+ * in that capacity, each cell must be able to store zero to the number of
+ * desired colors.  16 bits/cell is plenty for that too.)
+ * Since the JPEG code is intended to run in small memory model on 80x86
+ * machines, we can't just allocate the histogram in one chunk.  Instead
+ * of a true 3-D array, we use a row of pointers to 2-D arrays.  Each
+ * pointer corresponds to a C0 value (typically 2^5 = 32 pointers) and
+ * each 2-D array has 2^6*2^5 = 2048 or 2^6*2^6 = 4096 entries.
+ */
+
+#define MAXNUMCOLORS  (_MAXJSAMPLE + 1) /* maximum size of colormap */
+
+/* These will do the right thing for either R,G,B or B,G,R color order,
+ * but you may not like the results for other color orders.
+ */
+#define HIST_C0_BITS  5         /* bits of precision in R/B histogram */
+#define HIST_C1_BITS  6         /* bits of precision in G histogram */
+#define HIST_C2_BITS  5         /* bits of precision in B/R histogram */
+
+/* Number of elements along histogram axes. */
+#define HIST_C0_ELEMS  (1 << HIST_C0_BITS)
+#define HIST_C1_ELEMS  (1 << HIST_C1_BITS)
+#define HIST_C2_ELEMS  (1 << HIST_C2_BITS)
+
+/* These are the amounts to shift an input value to get a histogram index. */
+#define C0_SHIFT  (BITS_IN_JSAMPLE - HIST_C0_BITS)
+#define C1_SHIFT  (BITS_IN_JSAMPLE - HIST_C1_BITS)
+#define C2_SHIFT  (BITS_IN_JSAMPLE - HIST_C2_BITS)
+
+
+typedef UINT16 histcell;        /* histogram cell; prefer an unsigned type */
+
+typedef histcell *histptr;      /* for pointers to histogram cells */
+
+typedef histcell hist1d[HIST_C2_ELEMS]; /* typedefs for the array */
+typedef hist1d *hist2d;         /* type for the 2nd-level pointers */
+typedef hist2d *hist3d;         /* type for top-level pointer */
+
+
+/* Declarations for Floyd-Steinberg dithering.
+ *
+ * Errors are accumulated into the array fserrors[], at a resolution of
+ * 1/16th of a pixel count.  The error at a given pixel is propagated
+ * to its not-yet-processed neighbors using the standard F-S fractions,
+ *              ...     (here)  7/16
+ *              3/16    5/16    1/16
+ * We work left-to-right on even rows, right-to-left on odd rows.
+ *
+ * We can get away with a single array (holding one row's worth of errors)
+ * by using it to store the current row's errors at pixel columns not yet
+ * processed, but the next row's errors at columns already processed.  We
+ * need only a few extra variables to hold the errors immediately around the
+ * current column.  (If we are lucky, those variables are in registers, but
+ * even if not, they're probably cheaper to access than array elements are.)
+ *
+ * The fserrors[] array has (#columns + 2) entries; the extra entry at
+ * each end saves us from special-casing the first and last pixels.
+ * Each entry is three values long, one value for each color component.
+ */
+
+#if BITS_IN_JSAMPLE == 8
+typedef INT16 FSERROR;          /* 16 bits should be enough */
+typedef int LOCFSERROR;         /* use 'int' for calculation temps */
+#else
+typedef JLONG FSERROR;          /* may need more than 16 bits */
+typedef JLONG LOCFSERROR;       /* be sure calculation temps are big enough */
+#endif
+
+typedef FSERROR *FSERRPTR;      /* pointer to error array */
+
+
+/* Private subobject */
+
+typedef struct {
+  struct jpeg_color_quantizer pub; /* public fields */
+
+  /* Space for the eventually created colormap is stashed here */
+  _JSAMPARRAY sv_colormap;      /* colormap allocated at init time */
+  int desired;                  /* desired # of colors = size of colormap */
+
+  /* Variables for accumulating image statistics */
+  hist3d histogram;             /* pointer to the histogram */
+
+  boolean needs_zeroed;         /* TRUE if next pass must zero histogram */
+
+  /* Variables for Floyd-Steinberg dithering */
+  FSERRPTR fserrors;            /* accumulated errors */
+  boolean on_odd_row;           /* flag to remember which row we are on */
+  int *error_limiter;           /* table for clamping the applied error */
+} my_cquantizer;
+
+typedef my_cquantizer *my_cquantize_ptr;
+
+
+/*
+ * Prescan some rows of pixels.
+ * In this module the prescan simply updates the histogram, which has been
+ * initialized to zeroes by start_pass.
+ * An output_buf parameter is required by the method signature, but no data
+ * is actually output (in fact the buffer controller is probably passing a
+ * NULL pointer).
+ */
+
+METHODDEF(void)
+prescan_quantize(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                 _JSAMPARRAY output_buf, int num_rows)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  register _JSAMPROW ptr;
+  register histptr histp;
+  register hist3d histogram = cquantize->histogram;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    ptr = input_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the histogram */
+      histp = &histogram[ptr[0] >> C0_SHIFT]
+                        [ptr[1] >> C1_SHIFT]
+                        [ptr[2] >> C2_SHIFT];
+      /* increment, check for overflow and undo increment if so. */
+      if (++(*histp) <= 0)
+        (*histp)--;
+      ptr += 3;
+    }
+  }
+}
+
+
+/*
+ * Next we have the really interesting routines: selection of a colormap
+ * given the completed histogram.
+ * These routines work with a list of "boxes", each representing a rectangular
+ * subset of the input color space (to histogram precision).
+ */
+
+typedef struct {
+  /* The bounds of the box (inclusive); expressed as histogram indexes */
+  int c0min, c0max;
+  int c1min, c1max;
+  int c2min, c2max;
+  /* The volume (actually 2-norm) of the box */
+  JLONG volume;
+  /* The number of nonzero histogram cells within this box */
+  long colorcount;
+} box;
+
+typedef box *boxptr;
+
+
+LOCAL(boxptr)
+find_biggest_color_pop(boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest color population */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register long maxc = 0;
+  boxptr which = NULL;
+
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->colorcount > maxc && boxp->volume > 0) {
+      which = boxp;
+      maxc = boxp->colorcount;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(boxptr)
+find_biggest_volume(boxptr boxlist, int numboxes)
+/* Find the splittable box with the largest (scaled) volume */
+/* Returns NULL if no splittable boxes remain */
+{
+  register boxptr boxp;
+  register int i;
+  register JLONG maxv = 0;
+  boxptr which = NULL;
+
+  for (i = 0, boxp = boxlist; i < numboxes; i++, boxp++) {
+    if (boxp->volume > maxv) {
+      which = boxp;
+      maxv = boxp->volume;
+    }
+  }
+  return which;
+}
+
+
+LOCAL(void)
+update_box(j_decompress_ptr cinfo, boxptr boxp)
+/* Shrink the min/max bounds of a box to enclose only nonzero elements, */
+/* and recompute its volume and population */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0, c1, c2;
+  int c0min, c0max, c1min, c1max, c2min, c2max;
+  JLONG dist0, dist1, dist2;
+  long ccount;
+
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+
+  if (c0max > c0min)
+    for (c0 = c0min; c0 <= c0max; c0++)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+        histp = &histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0min = c0min = c0;
+            goto have_c0min;
+          }
+      }
+have_c0min:
+  if (c0max > c0min)
+    for (c0 = c0max; c0 >= c0min; c0--)
+      for (c1 = c1min; c1 <= c1max; c1++) {
+        histp = &histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c0max = c0max = c0;
+            goto have_c0max;
+          }
+      }
+have_c0max:
+  if (c1max > c1min)
+    for (c1 = c1min; c1 <= c1max; c1++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+        histp = &histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1min = c1min = c1;
+            goto have_c1min;
+          }
+      }
+have_c1min:
+  if (c1max > c1min)
+    for (c1 = c1max; c1 >= c1min; c1--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+        histp = &histogram[c0][c1][c2min];
+        for (c2 = c2min; c2 <= c2max; c2++)
+          if (*histp++ != 0) {
+            boxp->c1max = c1max = c1;
+            goto have_c1max;
+          }
+      }
+have_c1max:
+  if (c2max > c2min)
+    for (c2 = c2min; c2 <= c2max; c2++)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+        histp = &histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2min = c2min = c2;
+            goto have_c2min;
+          }
+      }
+have_c2min:
+  if (c2max > c2min)
+    for (c2 = c2max; c2 >= c2min; c2--)
+      for (c0 = c0min; c0 <= c0max; c0++) {
+        histp = &histogram[c0][c1min][c2];
+        for (c1 = c1min; c1 <= c1max; c1++, histp += HIST_C2_ELEMS)
+          if (*histp != 0) {
+            boxp->c2max = c2max = c2;
+            goto have_c2max;
+          }
+      }
+have_c2max:
+
+  /* Update box volume.
+   * We use 2-norm rather than real volume here; this biases the method
+   * against making long narrow boxes, and it has the side benefit that
+   * a box is splittable iff norm > 0.
+   * Since the differences are expressed in histogram-cell units,
+   * we have to shift back to _JSAMPLE units to get consistent distances;
+   * after which, we scale according to the selected distance scale factors.
+   */
+  dist0 = ((c0max - c0min) << C0_SHIFT) * C0_SCALE;
+  dist1 = ((c1max - c1min) << C1_SHIFT) * C1_SCALE;
+  dist2 = ((c2max - c2min) << C2_SHIFT) * C2_SCALE;
+  boxp->volume = dist0 * dist0 + dist1 * dist1 + dist2 * dist2;
+
+  /* Now scan remaining volume of box and compute population */
+  ccount = 0;
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = &histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++, histp++)
+        if (*histp != 0) {
+          ccount++;
+        }
+    }
+  boxp->colorcount = ccount;
+}
+
+
+LOCAL(int)
+median_cut(j_decompress_ptr cinfo, boxptr boxlist, int numboxes,
+           int desired_colors)
+/* Repeatedly select and split the largest box until we have enough boxes */
+{
+  int n, lb;
+  int c0, c1, c2, cmax;
+  register boxptr b1, b2;
+
+  while (numboxes < desired_colors) {
+    /* Select box to split.
+     * Current algorithm: by population for first half, then by volume.
+     */
+    if (numboxes * 2 <= desired_colors) {
+      b1 = find_biggest_color_pop(boxlist, numboxes);
+    } else {
+      b1 = find_biggest_volume(boxlist, numboxes);
+    }
+    if (b1 == NULL)             /* no splittable boxes left! */
+      break;
+    b2 = &boxlist[numboxes];    /* where new box will go */
+    /* Copy the color bounds to the new box. */
+    b2->c0max = b1->c0max;  b2->c1max = b1->c1max;  b2->c2max = b1->c2max;
+    b2->c0min = b1->c0min;  b2->c1min = b1->c1min;  b2->c2min = b1->c2min;
+    /* Choose which axis to split the box on.
+     * Current algorithm: longest scaled axis.
+     * See notes in update_box about scaling distances.
+     */
+    c0 = ((b1->c0max - b1->c0min) << C0_SHIFT) * C0_SCALE;
+    c1 = ((b1->c1max - b1->c1min) << C1_SHIFT) * C1_SCALE;
+    c2 = ((b1->c2max - b1->c2min) << C2_SHIFT) * C2_SCALE;
+    /* We want to break any ties in favor of green, then red, blue last.
+     * This code does the right thing for R,G,B or B,G,R color orders only.
+     */
+    if (rgb_red[cinfo->out_color_space] == 0) {
+      cmax = c1;  n = 1;
+      if (c0 > cmax) { cmax = c0;  n = 0; }
+      if (c2 > cmax) { n = 2; }
+    } else {
+      cmax = c1;  n = 1;
+      if (c2 > cmax) { cmax = c2;  n = 2; }
+      if (c0 > cmax) { n = 0; }
+    }
+    /* Choose split point along selected axis, and update box bounds.
+     * Current algorithm: split at halfway point.
+     * (Since the box has been shrunk to minimum volume,
+     * any split will produce two nonempty subboxes.)
+     * Note that lb value is max for lower box, so must be < old max.
+     */
+    switch (n) {
+    case 0:
+      lb = (b1->c0max + b1->c0min) / 2;
+      b1->c0max = lb;
+      b2->c0min = lb + 1;
+      break;
+    case 1:
+      lb = (b1->c1max + b1->c1min) / 2;
+      b1->c1max = lb;
+      b2->c1min = lb + 1;
+      break;
+    case 2:
+      lb = (b1->c2max + b1->c2min) / 2;
+      b1->c2max = lb;
+      b2->c2min = lb + 1;
+      break;
+    }
+    /* Update stats for boxes */
+    update_box(cinfo, b1);
+    update_box(cinfo, b2);
+    numboxes++;
+  }
+  return numboxes;
+}
+
+
+LOCAL(void)
+compute_color(j_decompress_ptr cinfo, boxptr boxp, int icolor)
+/* Compute representative color for a box, put it in colormap[icolor] */
+{
+  /* Current algorithm: mean weighted by pixels (not colors) */
+  /* Note it is important to get the rounding correct! */
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  histptr histp;
+  int c0, c1, c2;
+  int c0min, c0max, c1min, c1max, c2min, c2max;
+  long count;
+  long total = 0;
+  long c0total = 0;
+  long c1total = 0;
+  long c2total = 0;
+
+  c0min = boxp->c0min;  c0max = boxp->c0max;
+  c1min = boxp->c1min;  c1max = boxp->c1max;
+  c2min = boxp->c2min;  c2max = boxp->c2max;
+
+  for (c0 = c0min; c0 <= c0max; c0++)
+    for (c1 = c1min; c1 <= c1max; c1++) {
+      histp = &histogram[c0][c1][c2min];
+      for (c2 = c2min; c2 <= c2max; c2++) {
+        if ((count = *histp++) != 0) {
+          total += count;
+          c0total += ((c0 << C0_SHIFT) + ((1 << C0_SHIFT) >> 1)) * count;
+          c1total += ((c1 << C1_SHIFT) + ((1 << C1_SHIFT) >> 1)) * count;
+          c2total += ((c2 << C2_SHIFT) + ((1 << C2_SHIFT) >> 1)) * count;
+        }
+      }
+    }
+
+  ((_JSAMPARRAY)cinfo->colormap)[0][icolor] =
+    (_JSAMPLE)((c0total + (total >> 1)) / total);
+  ((_JSAMPARRAY)cinfo->colormap)[1][icolor] =
+    (_JSAMPLE)((c1total + (total >> 1)) / total);
+  ((_JSAMPARRAY)cinfo->colormap)[2][icolor] =
+    (_JSAMPLE)((c2total + (total >> 1)) / total);
+}
+
+
+LOCAL(void)
+select_colors(j_decompress_ptr cinfo, int desired_colors)
+/* Master routine for color selection */
+{
+  boxptr boxlist;
+  int numboxes;
+  int i;
+
+  /* Allocate workspace for box list */
+  boxlist = (boxptr)(*cinfo->mem->alloc_small)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE, desired_colors * sizeof(box));
+  /* Initialize one box containing whole space */
+  numboxes = 1;
+  boxlist[0].c0min = 0;
+  boxlist[0].c0max = _MAXJSAMPLE >> C0_SHIFT;
+  boxlist[0].c1min = 0;
+  boxlist[0].c1max = _MAXJSAMPLE >> C1_SHIFT;
+  boxlist[0].c2min = 0;
+  boxlist[0].c2max = _MAXJSAMPLE >> C2_SHIFT;
+  /* Shrink it to actually-used volume and set its statistics */
+  update_box(cinfo, &boxlist[0]);
+  /* Perform median-cut to produce final box list */
+  numboxes = median_cut(cinfo, boxlist, numboxes, desired_colors);
+  /* Compute the representative color for each box, fill colormap */
+  for (i = 0; i < numboxes; i++)
+    compute_color(cinfo, &boxlist[i], i);
+  cinfo->actual_number_of_colors = numboxes;
+  TRACEMS1(cinfo, 1, JTRC_QUANT_SELECTED, numboxes);
+}
+
+
+/*
+ * These routines are concerned with the time-critical task of mapping input
+ * colors to the nearest color in the selected colormap.
+ *
+ * We re-use the histogram space as an "inverse color map", essentially a
+ * cache for the results of nearest-color searches.  All colors within a
+ * histogram cell will be mapped to the same colormap entry, namely the one
+ * closest to the cell's center.  This may not be quite the closest entry to
+ * the actual input color, but it's almost as good.  A zero in the cache
+ * indicates we haven't found the nearest color for that cell yet; the array
+ * is cleared to zeroes before starting the mapping pass.  When we find the
+ * nearest color for a cell, its colormap index plus one is recorded in the
+ * cache for future use.  The pass2 scanning routines call fill_inverse_cmap
+ * when they need to use an unfilled entry in the cache.
+ *
+ * Our method of efficiently finding nearest colors is based on the "locally
+ * sorted search" idea described by Heckbert and on the incremental distance
+ * calculation described by Spencer W. Thomas in chapter III.1 of Graphics
+ * Gems II (James Arvo, ed.  Academic Press, 1991).  Thomas points out that
+ * the distances from a given colormap entry to each cell of the histogram can
+ * be computed quickly using an incremental method: the differences between
+ * distances to adjacent cells themselves differ by a constant.  This allows a
+ * fairly fast implementation of the "brute force" approach of computing the
+ * distance from every colormap entry to every histogram cell.  Unfortunately,
+ * it needs a work array to hold the best-distance-so-far for each histogram
+ * cell (because the inner loop has to be over cells, not colormap entries).
+ * The work array elements have to be JLONGs, so the work array would need
+ * 256Kb at our recommended precision.  This is not feasible in DOS machines.
+ *
+ * To get around these problems, we apply Thomas' method to compute the
+ * nearest colors for only the cells within a small subbox of the histogram.
+ * The work array need be only as big as the subbox, so the memory usage
+ * problem is solved.  Furthermore, we need not fill subboxes that are never
+ * referenced in pass2; many images use only part of the color gamut, so a
+ * fair amount of work is saved.  An additional advantage of this
+ * approach is that we can apply Heckbert's locality criterion to quickly
+ * eliminate colormap entries that are far away from the subbox; typically
+ * three-fourths of the colormap entries are rejected by Heckbert's criterion,
+ * and we need not compute their distances to individual cells in the subbox.
+ * The speed of this approach is heavily influenced by the subbox size: too
+ * small means too much overhead, too big loses because Heckbert's criterion
+ * can't eliminate as many colormap entries.  Empirically the best subbox
+ * size seems to be about 1/512th of the histogram (1/8th in each direction).
+ *
+ * Thomas' article also describes a refined method which is asymptotically
+ * faster than the brute-force method, but it is also far more complex and
+ * cannot efficiently be applied to small subboxes.  It is therefore not
+ * useful for programs intended to be portable to DOS machines.  On machines
+ * with plenty of memory, filling the whole histogram in one shot with Thomas'
+ * refined method might be faster than the present code --- but then again,
+ * it might not be any faster, and it's certainly more complicated.
+ */
+
+
+/* log2(histogram cells in update box) for each axis; this can be adjusted */
+#define BOX_C0_LOG  (HIST_C0_BITS - 3)
+#define BOX_C1_LOG  (HIST_C1_BITS - 3)
+#define BOX_C2_LOG  (HIST_C2_BITS - 3)
+
+#define BOX_C0_ELEMS  (1 << BOX_C0_LOG) /* # of hist cells in update box */
+#define BOX_C1_ELEMS  (1 << BOX_C1_LOG)
+#define BOX_C2_ELEMS  (1 << BOX_C2_LOG)
+
+#define BOX_C0_SHIFT  (C0_SHIFT + BOX_C0_LOG)
+#define BOX_C1_SHIFT  (C1_SHIFT + BOX_C1_LOG)
+#define BOX_C2_SHIFT  (C2_SHIFT + BOX_C2_LOG)
+
+
+/*
+ * The next three routines implement inverse colormap filling.  They could
+ * all be folded into one big routine, but splitting them up this way saves
+ * some stack space (the mindist[] and bestdist[] arrays need not coexist)
+ * and may allow some compilers to produce better code by registerizing more
+ * inner-loop variables.
+ */
+
+LOCAL(int)
+find_nearby_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+                   _JSAMPLE colorlist[])
+/* Locate the colormap entries close enough to an update box to be candidates
+ * for the nearest entry to some cell(s) in the update box.  The update box
+ * is specified by the center coordinates of its first cell.  The number of
+ * candidate colormap entries is returned, and their colormap indexes are
+ * placed in colorlist[].
+ * This routine uses Heckbert's "locally sorted search" criterion to select
+ * the colors that need further consideration.
+ */
+{
+  int numcolors = cinfo->actual_number_of_colors;
+  int maxc0, maxc1, maxc2;
+  int centerc0, centerc1, centerc2;
+  int i, x, ncolors;
+  JLONG minmaxdist, min_dist, max_dist, tdist;
+  JLONG mindist[MAXNUMCOLORS];  /* min distance to colormap entry i */
+
+  /* Compute true coordinates of update box's upper corner and center.
+   * Actually we compute the coordinates of the center of the upper-corner
+   * histogram cell, which are the upper bounds of the volume we care about.
+   * Note that since ">>" rounds down, the "center" values may be closer to
+   * min than to max; hence comparisons to them must be "<=", not "<".
+   */
+  maxc0 = minc0 + ((1 << BOX_C0_SHIFT) - (1 << C0_SHIFT));
+  centerc0 = (minc0 + maxc0) >> 1;
+  maxc1 = minc1 + ((1 << BOX_C1_SHIFT) - (1 << C1_SHIFT));
+  centerc1 = (minc1 + maxc1) >> 1;
+  maxc2 = minc2 + ((1 << BOX_C2_SHIFT) - (1 << C2_SHIFT));
+  centerc2 = (minc2 + maxc2) >> 1;
+
+  /* For each color in colormap, find:
+   *  1. its minimum squared-distance to any point in the update box
+   *     (zero if color is within update box);
+   *  2. its maximum squared-distance to any point in the update box.
+   * Both of these can be found by considering only the corners of the box.
+   * We save the minimum distance for each color in mindist[];
+   * only the smallest maximum distance is of interest.
+   */
+  minmaxdist = 0x7FFFFFFFL;
+
+  for (i = 0; i < numcolors; i++) {
+    /* We compute the squared-c0-distance term, then add in the other two. */
+    x = ((_JSAMPARRAY)cinfo->colormap)[0][i];
+    if (x < minc0) {
+      tdist = (x - minc0) * C0_SCALE;
+      min_dist = tdist * tdist;
+      tdist = (x - maxc0) * C0_SCALE;
+      max_dist = tdist * tdist;
+    } else if (x > maxc0) {
+      tdist = (x - maxc0) * C0_SCALE;
+      min_dist = tdist * tdist;
+      tdist = (x - minc0) * C0_SCALE;
+      max_dist = tdist * tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      min_dist = 0;
+      if (x <= centerc0) {
+        tdist = (x - maxc0) * C0_SCALE;
+        max_dist = tdist * tdist;
+      } else {
+        tdist = (x - minc0) * C0_SCALE;
+        max_dist = tdist * tdist;
+      }
+    }
+
+    x = ((_JSAMPARRAY)cinfo->colormap)[1][i];
+    if (x < minc1) {
+      tdist = (x - minc1) * C1_SCALE;
+      min_dist += tdist * tdist;
+      tdist = (x - maxc1) * C1_SCALE;
+      max_dist += tdist * tdist;
+    } else if (x > maxc1) {
+      tdist = (x - maxc1) * C1_SCALE;
+      min_dist += tdist * tdist;
+      tdist = (x - minc1) * C1_SCALE;
+      max_dist += tdist * tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc1) {
+        tdist = (x - maxc1) * C1_SCALE;
+        max_dist += tdist * tdist;
+      } else {
+        tdist = (x - minc1) * C1_SCALE;
+        max_dist += tdist * tdist;
+      }
+    }
+
+    x = ((_JSAMPARRAY)cinfo->colormap)[2][i];
+    if (x < minc2) {
+      tdist = (x - minc2) * C2_SCALE;
+      min_dist += tdist * tdist;
+      tdist = (x - maxc2) * C2_SCALE;
+      max_dist += tdist * tdist;
+    } else if (x > maxc2) {
+      tdist = (x - maxc2) * C2_SCALE;
+      min_dist += tdist * tdist;
+      tdist = (x - minc2) * C2_SCALE;
+      max_dist += tdist * tdist;
+    } else {
+      /* within cell range so no contribution to min_dist */
+      if (x <= centerc2) {
+        tdist = (x - maxc2) * C2_SCALE;
+        max_dist += tdist * tdist;
+      } else {
+        tdist = (x - minc2) * C2_SCALE;
+        max_dist += tdist * tdist;
+      }
+    }
+
+    mindist[i] = min_dist;      /* save away the results */
+    if (max_dist < minmaxdist)
+      minmaxdist = max_dist;
+  }
+
+  /* Now we know that no cell in the update box is more than minmaxdist
+   * away from some colormap entry.  Therefore, only colors that are
+   * within minmaxdist of some part of the box need be considered.
+   */
+  ncolors = 0;
+  for (i = 0; i < numcolors; i++) {
+    if (mindist[i] <= minmaxdist)
+      colorlist[ncolors++] = (_JSAMPLE)i;
+  }
+  return ncolors;
+}
+
+
+LOCAL(void)
+find_best_colors(j_decompress_ptr cinfo, int minc0, int minc1, int minc2,
+                 int numcolors, _JSAMPLE colorlist[], _JSAMPLE bestcolor[])
+/* Find the closest colormap entry for each cell in the update box,
+ * given the list of candidate colors prepared by find_nearby_colors.
+ * Return the indexes of the closest entries in the bestcolor[] array.
+ * This routine uses Thomas' incremental distance calculation method to
+ * find the distance from a colormap entry to successive cells in the box.
+ */
+{
+  int ic0, ic1, ic2;
+  int i, icolor;
+  register JLONG *bptr;         /* pointer into bestdist[] array */
+  _JSAMPLE *cptr;               /* pointer into bestcolor[] array */
+  JLONG dist0, dist1;           /* initial distance values */
+  register JLONG dist2;         /* current distance in inner loop */
+  JLONG xx0, xx1;               /* distance increments */
+  register JLONG xx2;
+  JLONG inc0, inc1, inc2;       /* initial values for increments */
+  /* This array holds the distance to the nearest-so-far color for each cell */
+  JLONG bestdist[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Initialize best-distance for each cell of the update box */
+  bptr = bestdist;
+  for (i = BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS - 1; i >= 0; i--)
+    *bptr++ = 0x7FFFFFFFL;
+
+  /* For each color selected by find_nearby_colors,
+   * compute its distance to the center of each cell in the box.
+   * If that's less than best-so-far, update best distance and color number.
+   */
+
+  /* Nominal steps between cell centers ("x" in Thomas article) */
+#define STEP_C0  ((1 << C0_SHIFT) * C0_SCALE)
+#define STEP_C1  ((1 << C1_SHIFT) * C1_SCALE)
+#define STEP_C2  ((1 << C2_SHIFT) * C2_SCALE)
+
+  for (i = 0; i < numcolors; i++) {
+    icolor = colorlist[i];
+    /* Compute (square of) distance from minc0/c1/c2 to this color */
+    inc0 = (minc0 - ((_JSAMPARRAY)cinfo->colormap)[0][icolor]) * C0_SCALE;
+    dist0 = inc0 * inc0;
+    inc1 = (minc1 - ((_JSAMPARRAY)cinfo->colormap)[1][icolor]) * C1_SCALE;
+    dist0 += inc1 * inc1;
+    inc2 = (minc2 - ((_JSAMPARRAY)cinfo->colormap)[2][icolor]) * C2_SCALE;
+    dist0 += inc2 * inc2;
+    /* Form the initial difference increments */
+    inc0 = inc0 * (2 * STEP_C0) + STEP_C0 * STEP_C0;
+    inc1 = inc1 * (2 * STEP_C1) + STEP_C1 * STEP_C1;
+    inc2 = inc2 * (2 * STEP_C2) + STEP_C2 * STEP_C2;
+    /* Now loop over all cells in box, updating distance per Thomas method */
+    bptr = bestdist;
+    cptr = bestcolor;
+    xx0 = inc0;
+    for (ic0 = BOX_C0_ELEMS - 1; ic0 >= 0; ic0--) {
+      dist1 = dist0;
+      xx1 = inc1;
+      for (ic1 = BOX_C1_ELEMS - 1; ic1 >= 0; ic1--) {
+        dist2 = dist1;
+        xx2 = inc2;
+        for (ic2 = BOX_C2_ELEMS - 1; ic2 >= 0; ic2--) {
+          if (dist2 < *bptr) {
+            *bptr = dist2;
+            *cptr = (_JSAMPLE)icolor;
+          }
+          dist2 += xx2;
+          xx2 += 2 * STEP_C2 * STEP_C2;
+          bptr++;
+          cptr++;
+        }
+        dist1 += xx1;
+        xx1 += 2 * STEP_C1 * STEP_C1;
+      }
+      dist0 += xx0;
+      xx0 += 2 * STEP_C0 * STEP_C0;
+    }
+  }
+}
+
+
+LOCAL(void)
+fill_inverse_cmap(j_decompress_ptr cinfo, int c0, int c1, int c2)
+/* Fill the inverse-colormap entries in the update box that contains */
+/* histogram cell c0/c1/c2.  (Only that one cell MUST be filled, but */
+/* we can fill as many others as we wish.) */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int minc0, minc1, minc2;      /* lower left corner of update box */
+  int ic0, ic1, ic2;
+  register _JSAMPLE *cptr;      /* pointer into bestcolor[] array */
+  register histptr cachep;      /* pointer into main cache array */
+  /* This array lists the candidate colormap indexes. */
+  _JSAMPLE colorlist[MAXNUMCOLORS];
+  int numcolors;                /* number of candidate colors */
+  /* This array holds the actually closest colormap index for each cell. */
+  _JSAMPLE bestcolor[BOX_C0_ELEMS * BOX_C1_ELEMS * BOX_C2_ELEMS];
+
+  /* Convert cell coordinates to update box ID */
+  c0 >>= BOX_C0_LOG;
+  c1 >>= BOX_C1_LOG;
+  c2 >>= BOX_C2_LOG;
+
+  /* Compute true coordinates of update box's origin corner.
+   * Actually we compute the coordinates of the center of the corner
+   * histogram cell, which are the lower bounds of the volume we care about.
+   */
+  minc0 = (c0 << BOX_C0_SHIFT) + ((1 << C0_SHIFT) >> 1);
+  minc1 = (c1 << BOX_C1_SHIFT) + ((1 << C1_SHIFT) >> 1);
+  minc2 = (c2 << BOX_C2_SHIFT) + ((1 << C2_SHIFT) >> 1);
+
+  /* Determine which colormap entries are close enough to be candidates
+   * for the nearest entry to some cell in the update box.
+   */
+  numcolors = find_nearby_colors(cinfo, minc0, minc1, minc2, colorlist);
+
+  /* Determine the actually nearest colors. */
+  find_best_colors(cinfo, minc0, minc1, minc2, numcolors, colorlist,
+                   bestcolor);
+
+  /* Save the best color numbers (plus 1) in the main cache array */
+  c0 <<= BOX_C0_LOG;            /* convert ID back to base cell indexes */
+  c1 <<= BOX_C1_LOG;
+  c2 <<= BOX_C2_LOG;
+  cptr = bestcolor;
+  for (ic0 = 0; ic0 < BOX_C0_ELEMS; ic0++) {
+    for (ic1 = 0; ic1 < BOX_C1_ELEMS; ic1++) {
+      cachep = &histogram[c0 + ic0][c1 + ic1][c2];
+      for (ic2 = 0; ic2 < BOX_C2_ELEMS; ic2++) {
+        *cachep++ = (histcell)((*cptr++) + 1);
+      }
+    }
+  }
+}
+
+
+/*
+ * Map some rows of pixels to the output colormapped representation.
+ */
+
+METHODDEF(void)
+pass2_no_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
+/* This version performs no dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register _JSAMPROW inptr, outptr;
+  register histptr cachep;
+  register int c0, c1, c2;
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    for (col = width; col > 0; col--) {
+      /* get pixel value and index into the cache */
+      c0 = (*inptr++) >> C0_SHIFT;
+      c1 = (*inptr++) >> C1_SHIFT;
+      c2 = (*inptr++) >> C2_SHIFT;
+      cachep = &histogram[c0][c1][c2];
+      /* If we have not seen this color before, find nearest colormap entry */
+      /* and update the cache */
+      if (*cachep == 0)
+        fill_inverse_cmap(cinfo, c0, c1, c2);
+      /* Now emit the colormap index for this cell */
+      *outptr++ = (_JSAMPLE)(*cachep - 1);
+    }
+  }
+}
+
+
+METHODDEF(void)
+pass2_fs_dither(j_decompress_ptr cinfo, _JSAMPARRAY input_buf,
+                _JSAMPARRAY output_buf, int num_rows)
+/* This version performs Floyd-Steinberg dithering */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  register LOCFSERROR cur0, cur1, cur2; /* current error or pixel value */
+  LOCFSERROR belowerr0, belowerr1, belowerr2; /* error for pixel below cur */
+  LOCFSERROR bpreverr0, bpreverr1, bpreverr2; /* error for below/prev col */
+  register FSERRPTR errorptr;   /* => fserrors[] at column before current */
+  _JSAMPROW inptr;              /* => current input pixel */
+  _JSAMPROW outptr;             /* => current output pixel */
+  histptr cachep;
+  int dir;                      /* +1 or -1 depending on direction */
+  int dir3;                     /* 3*dir, for advancing inptr & errorptr */
+  int row;
+  JDIMENSION col;
+  JDIMENSION width = cinfo->output_width;
+  _JSAMPLE *range_limit = (_JSAMPLE *)cinfo->sample_range_limit;
+  int *error_limit = cquantize->error_limiter;
+  _JSAMPROW colormap0 = ((_JSAMPARRAY)cinfo->colormap)[0];
+  _JSAMPROW colormap1 = ((_JSAMPARRAY)cinfo->colormap)[1];
+  _JSAMPROW colormap2 = ((_JSAMPARRAY)cinfo->colormap)[2];
+  SHIFT_TEMPS
+
+  for (row = 0; row < num_rows; row++) {
+    inptr = input_buf[row];
+    outptr = output_buf[row];
+    if (cquantize->on_odd_row) {
+      /* work right to left in this row */
+      inptr += (width - 1) * 3; /* so point to rightmost pixel */
+      outptr += width - 1;
+      dir = -1;
+      dir3 = -3;
+      errorptr = cquantize->fserrors + (width + 1) * 3; /* => entry after last column */
+      cquantize->on_odd_row = FALSE; /* flip for next time */
+    } else {
+      /* work left to right in this row */
+      dir = 1;
+      dir3 = 3;
+      errorptr = cquantize->fserrors; /* => entry before first real column */
+      cquantize->on_odd_row = TRUE; /* flip for next time */
+    }
+    /* Preset error values: no error propagated to first pixel from left */
+    cur0 = cur1 = cur2 = 0;
+    /* and no error propagated to row below yet */
+    belowerr0 = belowerr1 = belowerr2 = 0;
+    bpreverr0 = bpreverr1 = bpreverr2 = 0;
+
+    for (col = width; col > 0; col--) {
+      /* curN holds the error propagated from the previous pixel on the
+       * current line.  Add the error propagated from the previous line
+       * to form the complete error correction term for this pixel, and
+       * round the error term (which is expressed * 16) to an integer.
+       * RIGHT_SHIFT rounds towards minus infinity, so adding 8 is correct
+       * for either sign of the error value.
+       * Note: errorptr points to *previous* column's array entry.
+       */
+      cur0 = RIGHT_SHIFT(cur0 + errorptr[dir3 + 0] + 8, 4);
+      cur1 = RIGHT_SHIFT(cur1 + errorptr[dir3 + 1] + 8, 4);
+      cur2 = RIGHT_SHIFT(cur2 + errorptr[dir3 + 2] + 8, 4);
+      /* Limit the error using transfer function set by init_error_limit.
+       * See comments with init_error_limit for rationale.
+       */
+      cur0 = error_limit[cur0];
+      cur1 = error_limit[cur1];
+      cur2 = error_limit[cur2];
+      /* Form pixel value + error, and range-limit to 0.._MAXJSAMPLE.
+       * The maximum error is +- _MAXJSAMPLE (or less with error limiting);
+       * this sets the required size of the range_limit array.
+       */
+      cur0 += inptr[0];
+      cur1 += inptr[1];
+      cur2 += inptr[2];
+      cur0 = range_limit[cur0];
+      cur1 = range_limit[cur1];
+      cur2 = range_limit[cur2];
+      /* Index into the cache with adjusted pixel value */
+      cachep =
+        &histogram[cur0 >> C0_SHIFT][cur1 >> C1_SHIFT][cur2 >> C2_SHIFT];
+      /* If we have not seen this color before, find nearest colormap */
+      /* entry and update the cache */
+      if (*cachep == 0)
+        fill_inverse_cmap(cinfo, cur0 >> C0_SHIFT, cur1 >> C1_SHIFT,
+                          cur2 >> C2_SHIFT);
+      /* Now emit the colormap index for this cell */
+      {
+        register int pixcode = *cachep - 1;
+        *outptr = (_JSAMPLE)pixcode;
+        /* Compute representation error for this pixel */
+        cur0 -= colormap0[pixcode];
+        cur1 -= colormap1[pixcode];
+        cur2 -= colormap2[pixcode];
+      }
+      /* Compute error fractions to be propagated to adjacent pixels.
+       * Add these into the running sums, and simultaneously shift the
+       * next-line error sums left by 1 column.
+       */
+      {
+        register LOCFSERROR bnexterr;
+
+        bnexterr = cur0;        /* Process component 0 */
+        errorptr[0] = (FSERROR)(bpreverr0 + cur0 * 3);
+        bpreverr0 = belowerr0 + cur0 * 5;
+        belowerr0 = bnexterr;
+        cur0 *= 7;
+        bnexterr = cur1;        /* Process component 1 */
+        errorptr[1] = (FSERROR)(bpreverr1 + cur1 * 3);
+        bpreverr1 = belowerr1 + cur1 * 5;
+        belowerr1 = bnexterr;
+        cur1 *= 7;
+        bnexterr = cur2;        /* Process component 2 */
+        errorptr[2] = (FSERROR)(bpreverr2 + cur2 * 3);
+        bpreverr2 = belowerr2 + cur2 * 5;
+        belowerr2 = bnexterr;
+        cur2 *= 7;
+      }
+      /* At this point curN contains the 7/16 error value to be propagated
+       * to the next pixel on the current line, and all the errors for the
+       * next line have been shifted over.  We are therefore ready to move on.
+       */
+      inptr += dir3;            /* Advance pixel pointers to next column */
+      outptr += dir;
+      errorptr += dir3;         /* advance errorptr to current column */
+    }
+    /* Post-loop cleanup: we must unload the final error values into the
+     * final fserrors[] entry.  Note we need not unload belowerrN because
+     * it is for the dummy column before or after the actual array.
+     */
+    errorptr[0] = (FSERROR)bpreverr0; /* unload prev errs into array */
+    errorptr[1] = (FSERROR)bpreverr1;
+    errorptr[2] = (FSERROR)bpreverr2;
+  }
+}
+
+
+/*
+ * Initialize the error-limiting transfer function (lookup table).
+ * The raw F-S error computation can potentially compute error values of up to
+ * +- _MAXJSAMPLE.  But we want the maximum correction applied to a pixel to be
+ * much less, otherwise obviously wrong pixels will be created.  (Typical
+ * effects include weird fringes at color-area boundaries, isolated bright
+ * pixels in a dark area, etc.)  The standard advice for avoiding this problem
+ * is to ensure that the "corners" of the color cube are allocated as output
+ * colors; then repeated errors in the same direction cannot cause cascading
+ * error buildup.  However, that only prevents the error from getting
+ * completely out of hand; Aaron Giles reports that error limiting improves
+ * the results even with corner colors allocated.
+ * A simple clamping of the error values to about +- _MAXJSAMPLE/8 works pretty
+ * well, but the smoother transfer function used below is even better.  Thanks
+ * to Aaron Giles for this idea.
+ */
+
+LOCAL(void)
+init_error_limit(j_decompress_ptr cinfo)
+/* Allocate and fill in the error_limiter table */
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  int *table;
+  int in, out;
+
+  table = (int *)(*cinfo->mem->alloc_small)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE, (_MAXJSAMPLE * 2 + 1) * sizeof(int));
+  table += _MAXJSAMPLE;         /* so can index -_MAXJSAMPLE .. +_MAXJSAMPLE */
+  cquantize->error_limiter = table;
+
+#define STEPSIZE  ((_MAXJSAMPLE + 1) / 16)
+  /* Map errors 1:1 up to +- _MAXJSAMPLE/16 */
+  out = 0;
+  for (in = 0; in < STEPSIZE; in++, out++) {
+    table[in] = out;  table[-in] = -out;
+  }
+  /* Map errors 1:2 up to +- 3*_MAXJSAMPLE/16 */
+  for (; in < STEPSIZE * 3; in++, out += (in & 1) ? 0 : 1) {
+    table[in] = out;  table[-in] = -out;
+  }
+  /* Clamp the rest to final out value (which is (_MAXJSAMPLE+1)/8) */
+  for (; in <= _MAXJSAMPLE; in++) {
+    table[in] = out;  table[-in] = -out;
+  }
+#undef STEPSIZE
+}
+
+
+/*
+ * Finish up at the end of each pass.
+ */
+
+METHODDEF(void)
+finish_pass1(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+
+  /* Select the representative colors and fill in cinfo->colormap */
+  cinfo->colormap = (JSAMPARRAY)cquantize->sv_colormap;
+  select_colors(cinfo, cquantize->desired);
+  /* Force next pass to zero the color index table */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+METHODDEF(void)
+finish_pass2(j_decompress_ptr cinfo)
+{
+  /* no work */
+}
+
+
+/*
+ * Initialize for each processing pass.
+ */
+
+METHODDEF(void)
+start_pass_2_quant(j_decompress_ptr cinfo, boolean is_pre_scan)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+  hist3d histogram = cquantize->histogram;
+  int i;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give them F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  if (is_pre_scan) {
+    /* Set up method pointers */
+    cquantize->pub._color_quantize = prescan_quantize;
+    cquantize->pub.finish_pass = finish_pass1;
+    cquantize->needs_zeroed = TRUE; /* Always zero histogram */
+  } else {
+    /* Set up method pointers */
+    if (cinfo->dither_mode == JDITHER_FS)
+      cquantize->pub._color_quantize = pass2_fs_dither;
+    else
+      cquantize->pub._color_quantize = pass2_no_dither;
+    cquantize->pub.finish_pass = finish_pass2;
+
+    /* Make sure color count is acceptable */
+    i = cinfo->actual_number_of_colors;
+    if (i < 1)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 1);
+    if (i > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+
+    if (cinfo->dither_mode == JDITHER_FS) {
+      size_t arraysize =
+        (size_t)((cinfo->output_width + 2) * (3 * sizeof(FSERROR)));
+      /* Allocate Floyd-Steinberg workspace if we didn't already. */
+      if (cquantize->fserrors == NULL)
+        cquantize->fserrors = (FSERRPTR)(*cinfo->mem->alloc_large)
+          ((j_common_ptr)cinfo, JPOOL_IMAGE, arraysize);
+      /* Initialize the propagated errors to zero. */
+      jzero_far((void *)cquantize->fserrors, arraysize);
+      /* Make the error-limit table if we didn't already. */
+      if (cquantize->error_limiter == NULL)
+        init_error_limit(cinfo);
+      cquantize->on_odd_row = FALSE;
+    }
+
+  }
+  /* Zero the histogram or inverse color map, if necessary */
+  if (cquantize->needs_zeroed) {
+    for (i = 0; i < HIST_C0_ELEMS; i++) {
+      jzero_far((void *)histogram[i],
+                HIST_C1_ELEMS * HIST_C2_ELEMS * sizeof(histcell));
+    }
+    cquantize->needs_zeroed = FALSE;
+  }
+}
+
+
+/*
+ * Switch to a new external colormap between output passes.
+ */
+
+METHODDEF(void)
+new_color_map_2_quant(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize = (my_cquantize_ptr)cinfo->cquantize;
+
+  /* Reset the inverse color map */
+  cquantize->needs_zeroed = TRUE;
+}
+
+
+/*
+ * Module initialization routine for 2-pass color quantization.
+ */
+
+GLOBAL(void)
+_jinit_2pass_quantizer(j_decompress_ptr cinfo)
+{
+  my_cquantize_ptr cquantize;
+  int i;
+
+  if (cinfo->data_precision != BITS_IN_JSAMPLE)
+    ERREXIT1(cinfo, JERR_BAD_PRECISION, cinfo->data_precision);
+
+  cquantize = (my_cquantize_ptr)
+    (*cinfo->mem->alloc_small) ((j_common_ptr)cinfo, JPOOL_IMAGE,
+                                sizeof(my_cquantizer));
+  cinfo->cquantize = (struct jpeg_color_quantizer *)cquantize;
+  cquantize->pub.start_pass = start_pass_2_quant;
+  cquantize->pub.new_color_map = new_color_map_2_quant;
+  cquantize->fserrors = NULL;   /* flag optional arrays not allocated */
+  cquantize->error_limiter = NULL;
+
+  /* Make sure jdmaster didn't give me a case I can't handle */
+  if (cinfo->out_color_components != 3 ||
+      cinfo->out_color_space == JCS_RGB565 || cinfo->master->lossless)
+    ERREXIT(cinfo, JERR_NOTIMPL);
+
+  /* Allocate the histogram/inverse colormap storage */
+  cquantize->histogram = (hist3d)(*cinfo->mem->alloc_small)
+    ((j_common_ptr)cinfo, JPOOL_IMAGE, HIST_C0_ELEMS * sizeof(hist2d));
+  for (i = 0; i < HIST_C0_ELEMS; i++) {
+    cquantize->histogram[i] = (hist2d)(*cinfo->mem->alloc_large)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       HIST_C1_ELEMS * HIST_C2_ELEMS * sizeof(histcell));
+  }
+  cquantize->needs_zeroed = TRUE; /* histogram is garbage now */
+
+  /* Allocate storage for the completed colormap, if required.
+   * We do this now since it may affect the memory manager's space
+   * calculations.
+   */
+  if (cinfo->enable_2pass_quant) {
+    /* Make sure color count is acceptable */
+    int desired = cinfo->desired_number_of_colors;
+    /* Lower bound on # of colors ... somewhat arbitrary as long as > 0 */
+    if (desired < 8)
+      ERREXIT1(cinfo, JERR_QUANT_FEW_COLORS, 8);
+    /* Make sure colormap indexes can be represented by _JSAMPLEs */
+    if (desired > MAXNUMCOLORS)
+      ERREXIT1(cinfo, JERR_QUANT_MANY_COLORS, MAXNUMCOLORS);
+    cquantize->sv_colormap = (_JSAMPARRAY)(*cinfo->mem->alloc_sarray)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE, (JDIMENSION)desired, (JDIMENSION)3);
+    cquantize->desired = desired;
+  } else
+    cquantize->sv_colormap = NULL;
+
+  /* Only F-S dithering or no dithering is supported. */
+  /* If user asks for ordered dither, give them F-S. */
+  if (cinfo->dither_mode != JDITHER_NONE)
+    cinfo->dither_mode = JDITHER_FS;
+
+  /* Allocate Floyd-Steinberg workspace if necessary.
+   * This isn't really needed until pass 2, but again it may affect the memory
+   * manager's space calculations.  Although we will cope with a later change
+   * in dither_mode, we do not promise to honor max_memory_to_use if
+   * dither_mode changes.
+   */
+  if (cinfo->dither_mode == JDITHER_FS) {
+    cquantize->fserrors = (FSERRPTR)(*cinfo->mem->alloc_large)
+      ((j_common_ptr)cinfo, JPOOL_IMAGE,
+       (size_t)((cinfo->output_width + 2) * (3 * sizeof(FSERROR))));
+    /* Might as well create the error-limiting table too. */
+    init_error_limit(cinfo);
+  }
+}
+
+#endif /* defined(QUANT_2PASS_SUPPORTED) && BITS_IN_JSAMPLE != 16 */
diff --git a/thirdparty/libjpeg-turbo/src/jsamplecomp.h b/thirdparty/libjpeg-turbo/src/jsamplecomp.h
new file mode 100644
index 00000000000..3a1f2960299
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jsamplecomp.h
@@ -0,0 +1,333 @@
+/*
+ * jsamplecomp.h
+ *
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ */
+
+/* In source files that must be compiled for multiple data precisions, we
+ * prefix all precision-dependent data types, macros, methods, fields, and
+ * function names with an underscore.  Including this file replaces those
+ * precision-independent tokens with their precision-dependent equivalents,
+ * based on the value of BITS_IN_JSAMPLE.
+ */
+
+#ifndef JSAMPLECOMP_H
+#define JSAMPLECOMP_H
+
+#if BITS_IN_JSAMPLE == 16
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  J16SAMPLE
+
+#define _MAXJSAMPLE  MAXJ16SAMPLE
+#define _CENTERJSAMPLE   CENTERJ16SAMPLE
+
+#define _JSAMPROW  J16SAMPROW
+#define _JSAMPARRAY  J16SAMPARRAY
+#define _JSAMPIMAGE  J16SAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg16_write_scanlines
+#define _jpeg_read_scanlines  jpeg16_read_scanlines
+
+/* Internal methods (jpegint.h) */
+
+#ifdef C_LOSSLESS_SUPPORTED
+/* Use the 16-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data_16
+/* Use the 16-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data_16
+/* Use the 16-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data_16
+/* Use the 16-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_16
+/* Use the 16-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample_16
+#endif
+#ifdef D_LOSSLESS_SUPPORTED
+/* Use the 16-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data_16
+/* Use the 16-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data_16
+/* Use the 16-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data_16
+/* Use the 16-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample_16
+/* Use the 16-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_16
+#endif
+
+/* Global internal functions (jpegint.h) */
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_main_controller  j16init_c_main_controller
+#define _jinit_c_prep_controller  j16init_c_prep_controller
+#define _jinit_color_converter  j16init_color_converter
+#define _jinit_downsampler  j16init_downsampler
+#define _jinit_c_diff_controller  j16init_c_diff_controller
+#define _jinit_lossless_compressor  j16init_lossless_compressor
+#endif
+
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_main_controller  j16init_d_main_controller
+#define _jinit_d_post_controller  j16init_d_post_controller
+#define _jinit_upsampler  j16init_upsampler
+#define _jinit_color_deconverter  j16init_color_deconverter
+#define _jinit_merged_upsampler  j16init_merged_upsampler
+#define _jinit_d_diff_controller  j16init_d_diff_controller
+#define _jinit_lossless_decompressor  j16init_lossless_decompressor
+#endif
+
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+#define _jcopy_sample_rows  j16copy_sample_rows
+#endif
+
+/* Internal fields (cdjpeg.h) */
+
+#if defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+/* Use the 16-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer16
+#endif
+
+/* Image I/O functions (cdjpeg.h) */
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_read_ppm  j16init_read_ppm
+#endif
+
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_write_ppm  j16init_write_ppm
+#endif
+
+#elif BITS_IN_JSAMPLE == 12
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  J12SAMPLE
+
+#define _MAXJSAMPLE  MAXJ12SAMPLE
+#define _CENTERJSAMPLE   CENTERJ12SAMPLE
+
+#define _JSAMPROW  J12SAMPROW
+#define _JSAMPARRAY  J12SAMPARRAY
+#define _JSAMPIMAGE  J12SAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg12_write_scanlines
+#define _jpeg_write_raw_data  jpeg12_write_raw_data
+#define _jpeg_read_scanlines  jpeg12_read_scanlines
+#define _jpeg_skip_scanlines  jpeg12_skip_scanlines
+#define _jpeg_crop_scanline  jpeg12_crop_scanline
+#define _jpeg_read_raw_data  jpeg12_read_raw_data
+
+/* Internal methods (jpegint.h) */
+
+/* Use the 12-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data_12
+/* Use the 12-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data_12
+/* Use the 12-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data_12
+/* Use the 12-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_12
+/* Use the 12-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample_12
+/* Use the 12-bit method in the jpeg_forward_dct structure. */
+#define _forward_DCT  forward_DCT_12
+/* Use the 12-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data_12
+/* Use the 12-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data_12
+/* Use the 12-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data_12
+/* Use the 12-bit method in the jpeg_inverse_dct structure. */
+#define _inverse_DCT_method_ptr  inverse_DCT_12_method_ptr
+#define _inverse_DCT  inverse_DCT_12
+/* Use the 12-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample_12
+/* Use the 12-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert_12
+/* Use the 12-bit method in the jpeg_color_quantizer structure. */
+#define _color_quantize  color_quantize_12
+
+/* Global internal functions (jpegint.h) */
+#define _jinit_c_main_controller  j12init_c_main_controller
+#define _jinit_c_prep_controller  j12init_c_prep_controller
+#define _jinit_c_coef_controller  j12init_c_coef_controller
+#define _jinit_color_converter  j12init_color_converter
+#define _jinit_downsampler  j12init_downsampler
+#define _jinit_forward_dct  j12init_forward_dct
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_diff_controller  j12init_c_diff_controller
+#define _jinit_lossless_compressor  j12init_lossless_compressor
+#endif
+
+#define _jinit_d_main_controller  j12init_d_main_controller
+#define _jinit_d_coef_controller  j12init_d_coef_controller
+#define _jinit_d_post_controller  j12init_d_post_controller
+#define _jinit_inverse_dct  j12init_inverse_dct
+#define _jinit_upsampler  j12init_upsampler
+#define _jinit_color_deconverter  j12init_color_deconverter
+#define _jinit_1pass_quantizer  j12init_1pass_quantizer
+#define _jinit_2pass_quantizer  j12init_2pass_quantizer
+#define _jinit_merged_upsampler  j12init_merged_upsampler
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_diff_controller  j12init_d_diff_controller
+#define _jinit_lossless_decompressor  j12init_lossless_decompressor
+#endif
+
+#define _jcopy_sample_rows  j12copy_sample_rows
+
+/* Global internal functions (jdct.h) */
+#define _jpeg_fdct_islow  jpeg12_fdct_islow
+#define _jpeg_fdct_ifast  jpeg12_fdct_ifast
+
+#define _jpeg_idct_islow  jpeg12_idct_islow
+#define _jpeg_idct_ifast  jpeg12_idct_ifast
+#define _jpeg_idct_float  jpeg12_idct_float
+#define _jpeg_idct_7x7  jpeg12_idct_7x7
+#define _jpeg_idct_6x6  jpeg12_idct_6x6
+#define _jpeg_idct_5x5  jpeg12_idct_5x5
+#define _jpeg_idct_4x4  jpeg12_idct_4x4
+#define _jpeg_idct_3x3  jpeg12_idct_3x3
+#define _jpeg_idct_2x2  jpeg12_idct_2x2
+#define _jpeg_idct_1x1  jpeg12_idct_1x1
+#define _jpeg_idct_9x9  jpeg12_idct_9x9
+#define _jpeg_idct_10x10  jpeg12_idct_10x10
+#define _jpeg_idct_11x11  jpeg12_idct_11x11
+#define _jpeg_idct_12x12  jpeg12_idct_12x12
+#define _jpeg_idct_13x13  jpeg12_idct_13x13
+#define _jpeg_idct_14x14  jpeg12_idct_14x14
+#define _jpeg_idct_15x15  jpeg12_idct_15x15
+#define _jpeg_idct_16x16  jpeg12_idct_16x16
+
+/* Internal fields (cdjpeg.h) */
+
+/* Use the 12-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer12
+
+/* Image I/O functions (cdjpeg.h) */
+#define _jinit_write_gif  j12init_write_gif
+#define _jinit_read_ppm  j12init_read_ppm
+#define _jinit_write_ppm  j12init_write_ppm
+
+#define _read_color_map  read_color_map_12
+
+#else /* BITS_IN_JSAMPLE */
+
+/* Sample data types and macros (jmorecfg.h) */
+#define _JSAMPLE  JSAMPLE
+
+#define _MAXJSAMPLE  MAXJSAMPLE
+#define _CENTERJSAMPLE   CENTERJSAMPLE
+
+#define _JSAMPROW  JSAMPROW
+#define _JSAMPARRAY  JSAMPARRAY
+#define _JSAMPIMAGE  JSAMPIMAGE
+
+/* External functions (jpeglib.h) */
+#define _jpeg_write_scanlines  jpeg_write_scanlines
+#define _jpeg_write_raw_data  jpeg_write_raw_data
+#define _jpeg_read_scanlines  jpeg_read_scanlines
+#define _jpeg_skip_scanlines  jpeg_skip_scanlines
+#define _jpeg_crop_scanline  jpeg_crop_scanline
+#define _jpeg_read_raw_data  jpeg_read_raw_data
+
+/* Internal methods (jpegint.h) */
+
+/* Use the 8-bit method in the jpeg_c_main_controller structure. */
+#define _process_data  process_data
+/* Use the 8-bit method in the jpeg_c_prep_controller structure. */
+#define _pre_process_data  pre_process_data
+/* Use the 8-bit method in the jpeg_c_coef_controller structure. */
+#define _compress_data  compress_data
+/* Use the 8-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert
+/* Use the 8-bit method in the jpeg_downsampler structure. */
+#define _downsample  downsample
+/* Use the 8-bit method in the jpeg_forward_dct structure. */
+#define _forward_DCT  forward_DCT
+/* Use the 8-bit method in the jpeg_d_main_controller structure. */
+#define _process_data  process_data
+/* Use the 8-bit method in the jpeg_d_coef_controller structure. */
+#define _decompress_data  decompress_data
+/* Use the 8-bit method in the jpeg_d_post_controller structure. */
+#define _post_process_data  post_process_data
+/* Use the 8-bit method in the jpeg_inverse_dct structure. */
+#define _inverse_DCT_method_ptr  inverse_DCT_method_ptr
+#define _inverse_DCT  inverse_DCT
+/* Use the 8-bit method in the jpeg_upsampler structure. */
+#define _upsample  upsample
+/* Use the 8-bit method in the jpeg_color_converter structure. */
+#define _color_convert  color_convert
+/* Use the 8-bit method in the jpeg_color_quantizer structure. */
+#define _color_quantize  color_quantize
+
+/* Global internal functions (jpegint.h) */
+#define _jinit_c_main_controller  jinit_c_main_controller
+#define _jinit_c_prep_controller  jinit_c_prep_controller
+#define _jinit_c_coef_controller  jinit_c_coef_controller
+#define _jinit_color_converter  jinit_color_converter
+#define _jinit_downsampler  jinit_downsampler
+#define _jinit_forward_dct  jinit_forward_dct
+#ifdef C_LOSSLESS_SUPPORTED
+#define _jinit_c_diff_controller  jinit_c_diff_controller
+#define _jinit_lossless_compressor  jinit_lossless_compressor
+#endif
+
+#define _jinit_d_main_controller  jinit_d_main_controller
+#define _jinit_d_coef_controller  jinit_d_coef_controller
+#define _jinit_d_post_controller  jinit_d_post_controller
+#define _jinit_inverse_dct  jinit_inverse_dct
+#define _jinit_upsampler  jinit_upsampler
+#define _jinit_color_deconverter  jinit_color_deconverter
+#define _jinit_1pass_quantizer  jinit_1pass_quantizer
+#define _jinit_2pass_quantizer  jinit_2pass_quantizer
+#define _jinit_merged_upsampler  jinit_merged_upsampler
+#ifdef D_LOSSLESS_SUPPORTED
+#define _jinit_d_diff_controller  jinit_d_diff_controller
+#define _jinit_lossless_decompressor  jinit_lossless_decompressor
+#endif
+
+#define _jcopy_sample_rows  jcopy_sample_rows
+
+/* Global internal functions (jdct.h) */
+#define _jpeg_fdct_islow  jpeg_fdct_islow
+#define _jpeg_fdct_ifast  jpeg_fdct_ifast
+
+#define _jpeg_idct_islow  jpeg_idct_islow
+#define _jpeg_idct_ifast  jpeg_idct_ifast
+#define _jpeg_idct_float  jpeg_idct_float
+#define _jpeg_idct_7x7  jpeg_idct_7x7
+#define _jpeg_idct_6x6  jpeg_idct_6x6
+#define _jpeg_idct_5x5  jpeg_idct_5x5
+#define _jpeg_idct_4x4  jpeg_idct_4x4
+#define _jpeg_idct_3x3  jpeg_idct_3x3
+#define _jpeg_idct_2x2  jpeg_idct_2x2
+#define _jpeg_idct_1x1  jpeg_idct_1x1
+#define _jpeg_idct_9x9  jpeg_idct_9x9
+#define _jpeg_idct_10x10  jpeg_idct_10x10
+#define _jpeg_idct_11x11  jpeg_idct_11x11
+#define _jpeg_idct_12x12  jpeg_idct_12x12
+#define _jpeg_idct_13x13  jpeg_idct_13x13
+#define _jpeg_idct_14x14  jpeg_idct_14x14
+#define _jpeg_idct_15x15  jpeg_idct_15x15
+#define _jpeg_idct_16x16  jpeg_idct_16x16
+
+/* Internal fields (cdjpeg.h) */
+
+/* Use the 8-bit buffer in the cjpeg_source_struct and djpeg_dest_struct
+   structures. */
+#define _buffer  buffer
+
+/* Image I/O functions (cdjpeg.h) */
+#define _jinit_write_gif  jinit_write_gif
+#define _jinit_read_ppm  jinit_read_ppm
+#define _jinit_write_ppm  jinit_write_ppm
+
+#define _read_color_map  read_color_map
+
+#endif /* BITS_IN_JSAMPLE */
+
+#endif /* JSAMPLECOMP_H */
diff --git a/thirdparty/libjpeg-turbo/src/jsimd.h b/thirdparty/libjpeg-turbo/src/jsimd.h
new file mode 100644
index 00000000000..6ae021a651d
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jsimd.h
@@ -0,0 +1,127 @@
+/*
+ * jsimd.h
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ * Copyright (C) 2011, 2014, 2022, D. R. Commander.
+ * Copyright (C) 2015-2016, 2018, 2022, Matthieu Darbois.
+ * Copyright (C) 2020, Arm Limited.
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ */
+
+#ifdef WITH_SIMD
+
+#include "jchuff.h"             /* Declarations shared with jcphuff.c */
+
+EXTERN(int) jsimd_can_rgb_ycc(void);
+EXTERN(int) jsimd_can_rgb_gray(void);
+EXTERN(int) jsimd_can_ycc_rgb(void);
+EXTERN(int) jsimd_can_ycc_rgb565(void);
+EXTERN(int) jsimd_c_can_null_convert(void);
+
+EXTERN(void) jsimd_rgb_ycc_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                                   JSAMPIMAGE output_buf,
+                                   JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_rgb_gray_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                                    JSAMPIMAGE output_buf,
+                                    JDIMENSION output_row, int num_rows);
+EXTERN(void) jsimd_ycc_rgb_convert(j_decompress_ptr cinfo,
+                                   JSAMPIMAGE input_buf, JDIMENSION input_row,
+                                   JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_ycc_rgb565_convert(j_decompress_ptr cinfo,
+                                      JSAMPIMAGE input_buf,
+                                      JDIMENSION input_row,
+                                      JSAMPARRAY output_buf, int num_rows);
+EXTERN(void) jsimd_c_null_convert(j_compress_ptr cinfo, JSAMPARRAY input_buf,
+                                  JSAMPIMAGE output_buf, JDIMENSION output_row,
+                                  int num_rows);
+
+EXTERN(int) jsimd_can_h2v2_downsample(void);
+EXTERN(int) jsimd_can_h2v1_downsample(void);
+
+EXTERN(void) jsimd_h2v2_downsample(j_compress_ptr cinfo,
+                                   jpeg_component_info *compptr,
+                                   JSAMPARRAY input_data,
+                                   JSAMPARRAY output_data);
+
+EXTERN(int) jsimd_can_h2v2_smooth_downsample(void);
+
+EXTERN(void) jsimd_h2v2_smooth_downsample(j_compress_ptr cinfo,
+                                          jpeg_component_info *compptr,
+                                          JSAMPARRAY input_data,
+                                          JSAMPARRAY output_data);
+
+EXTERN(void) jsimd_h2v1_downsample(j_compress_ptr cinfo,
+                                   jpeg_component_info *compptr,
+                                   JSAMPARRAY input_data,
+                                   JSAMPARRAY output_data);
+
+EXTERN(int) jsimd_can_h2v2_upsample(void);
+EXTERN(int) jsimd_can_h2v1_upsample(void);
+EXTERN(int) jsimd_can_int_upsample(void);
+
+EXTERN(void) jsimd_h2v2_upsample(j_decompress_ptr cinfo,
+                                 jpeg_component_info *compptr,
+                                 JSAMPARRAY input_data,
+                                 JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h2v1_upsample(j_decompress_ptr cinfo,
+                                 jpeg_component_info *compptr,
+                                 JSAMPARRAY input_data,
+                                 JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_int_upsample(j_decompress_ptr cinfo,
+                                jpeg_component_info *compptr,
+                                JSAMPARRAY input_data,
+                                JSAMPARRAY *output_data_ptr);
+
+EXTERN(int) jsimd_can_h2v2_fancy_upsample(void);
+EXTERN(int) jsimd_can_h2v1_fancy_upsample(void);
+EXTERN(int) jsimd_can_h1v2_fancy_upsample(void);
+
+EXTERN(void) jsimd_h2v2_fancy_upsample(j_decompress_ptr cinfo,
+                                       jpeg_component_info *compptr,
+                                       JSAMPARRAY input_data,
+                                       JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h2v1_fancy_upsample(j_decompress_ptr cinfo,
+                                       jpeg_component_info *compptr,
+                                       JSAMPARRAY input_data,
+                                       JSAMPARRAY *output_data_ptr);
+EXTERN(void) jsimd_h1v2_fancy_upsample(j_decompress_ptr cinfo,
+                                       jpeg_component_info *compptr,
+                                       JSAMPARRAY input_data,
+                                       JSAMPARRAY *output_data_ptr);
+
+EXTERN(int) jsimd_can_h2v2_merged_upsample(void);
+EXTERN(int) jsimd_can_h2v1_merged_upsample(void);
+
+EXTERN(void) jsimd_h2v2_merged_upsample(j_decompress_ptr cinfo,
+                                        JSAMPIMAGE input_buf,
+                                        JDIMENSION in_row_group_ctr,
+                                        JSAMPARRAY output_buf);
+EXTERN(void) jsimd_h2v1_merged_upsample(j_decompress_ptr cinfo,
+                                        JSAMPIMAGE input_buf,
+                                        JDIMENSION in_row_group_ctr,
+                                        JSAMPARRAY output_buf);
+
+EXTERN(int) jsimd_can_huff_encode_one_block(void);
+
+EXTERN(JOCTET *) jsimd_huff_encode_one_block(void *state, JOCTET *buffer,
+                                             JCOEFPTR block, int last_dc_val,
+                                             c_derived_tbl *dctbl,
+                                             c_derived_tbl *actbl);
+
+EXTERN(int) jsimd_can_encode_mcu_AC_first_prepare(void);
+
+EXTERN(void) jsimd_encode_mcu_AC_first_prepare
+  (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
+   UJCOEF *values, size_t *zerobits);
+
+EXTERN(int) jsimd_can_encode_mcu_AC_refine_prepare(void);
+
+EXTERN(int) jsimd_encode_mcu_AC_refine_prepare
+  (const JCOEF *block, const int *jpeg_natural_order_start, int Sl, int Al,
+   UJCOEF *absvalues, size_t *bits);
+
+#endif /* WITH_SIMD */
diff --git a/thirdparty/libjpeg-turbo/src/jsimddct.h b/thirdparty/libjpeg-turbo/src/jsimddct.h
new file mode 100644
index 00000000000..55ee8cf67f5
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jsimddct.h
@@ -0,0 +1,70 @@
+/*
+ * jsimddct.h
+ *
+ * Copyright 2009 Pierre Ossman <ossman@cendio.se> for Cendio AB
+ *
+ * Based on the x86 SIMD extension for IJG JPEG library,
+ * Copyright (C) 1999-2006, MIYASAKA Masaru.
+ * For conditions of distribution and use, see copyright notice in jsimdext.inc
+ *
+ */
+
+EXTERN(int) jsimd_can_convsamp(void);
+EXTERN(int) jsimd_can_convsamp_float(void);
+
+EXTERN(void) jsimd_convsamp(JSAMPARRAY sample_data, JDIMENSION start_col,
+                            DCTELEM *workspace);
+EXTERN(void) jsimd_convsamp_float(JSAMPARRAY sample_data, JDIMENSION start_col,
+                                  FAST_FLOAT *workspace);
+
+EXTERN(int) jsimd_can_fdct_islow(void);
+EXTERN(int) jsimd_can_fdct_ifast(void);
+EXTERN(int) jsimd_can_fdct_float(void);
+
+EXTERN(void) jsimd_fdct_islow(DCTELEM *data);
+EXTERN(void) jsimd_fdct_ifast(DCTELEM *data);
+EXTERN(void) jsimd_fdct_float(FAST_FLOAT *data);
+
+EXTERN(int) jsimd_can_quantize(void);
+EXTERN(int) jsimd_can_quantize_float(void);
+
+EXTERN(void) jsimd_quantize(JCOEFPTR coef_block, DCTELEM *divisors,
+                            DCTELEM *workspace);
+EXTERN(void) jsimd_quantize_float(JCOEFPTR coef_block, FAST_FLOAT *divisors,
+                                  FAST_FLOAT *workspace);
+
+EXTERN(int) jsimd_can_idct_2x2(void);
+EXTERN(int) jsimd_can_idct_4x4(void);
+EXTERN(int) jsimd_can_idct_6x6(void);
+EXTERN(int) jsimd_can_idct_12x12(void);
+
+EXTERN(void) jsimd_idct_2x2(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jsimd_idct_4x4(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jsimd_idct_6x6(j_decompress_ptr cinfo,
+                            jpeg_component_info *compptr, JCOEFPTR coef_block,
+                            JSAMPARRAY output_buf, JDIMENSION output_col);
+EXTERN(void) jsimd_idct_12x12(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+
+EXTERN(int) jsimd_can_idct_islow(void);
+EXTERN(int) jsimd_can_idct_ifast(void);
+EXTERN(int) jsimd_can_idct_float(void);
+
+EXTERN(void) jsimd_idct_islow(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) jsimd_idct_ifast(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
+EXTERN(void) jsimd_idct_float(j_decompress_ptr cinfo,
+                              jpeg_component_info *compptr,
+                              JCOEFPTR coef_block, JSAMPARRAY output_buf,
+                              JDIMENSION output_col);
diff --git a/thirdparty/libjpeg-turbo/src/jstdhuff.c b/thirdparty/libjpeg-turbo/src/jstdhuff.c
new file mode 100644
index 00000000000..39459012055
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jstdhuff.c
@@ -0,0 +1,144 @@
+/*
+ * jstdhuff.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1998, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2013, 2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains routines to set the default Huffman tables, if they are
+ * not already set.
+ */
+
+/*
+ * Huffman table setup routines
+ */
+
+LOCAL(void)
+add_huff_table(j_common_ptr cinfo, JHUFF_TBL **htblptr, const UINT8 *bits,
+               const UINT8 *val)
+/* Define a Huffman table */
+{
+  int nsymbols, len;
+
+  if (*htblptr == NULL)
+    *htblptr = jpeg_alloc_huff_table(cinfo);
+  else if (cinfo->is_decompressor)
+    return;
+
+  /* Copy the number-of-symbols-of-each-code-length counts */
+  memcpy((*htblptr)->bits, bits, sizeof((*htblptr)->bits));
+
+  /* Validate the counts.  We do this here mainly so we can copy the right
+   * number of symbols from the val[] array, without risking marching off
+   * the end of memory.  jchuff.c will do a more thorough test later.
+   */
+  nsymbols = 0;
+  for (len = 1; len <= 16; len++)
+    nsymbols += bits[len];
+  if (nsymbols < 1 || nsymbols > 256)
+    ERREXIT(cinfo, JERR_BAD_HUFF_TABLE);
+
+  memcpy((*htblptr)->huffval, val, nsymbols * sizeof(UINT8));
+  memset(&((*htblptr)->huffval[nsymbols]), 0,
+         (256 - nsymbols) * sizeof(UINT8));
+
+  /* Initialize sent_table FALSE so table will be written to JPEG file. */
+  (*htblptr)->sent_table = FALSE;
+}
+
+
+LOCAL(void)
+std_huff_tables(j_common_ptr cinfo)
+/* Set up the standard Huffman tables (cf. JPEG standard section K.3) */
+/* IMPORTANT: these are only valid for 8-bit data precision! */
+{
+  JHUFF_TBL **dc_huff_tbl_ptrs, **ac_huff_tbl_ptrs;
+
+  static const UINT8 bits_dc_luminance[17] = {
+    /* 0-base */ 0, 0, 1, 5, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_luminance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_dc_chrominance[17] = {
+    /* 0-base */ 0, 0, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
+  };
+  static const UINT8 val_dc_chrominance[] = {
+    0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11
+  };
+
+  static const UINT8 bits_ac_luminance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 3, 3, 2, 4, 3, 5, 5, 4, 4, 0, 0, 1, 0x7d
+  };
+  static const UINT8 val_ac_luminance[] = {
+    0x01, 0x02, 0x03, 0x00, 0x04, 0x11, 0x05, 0x12,
+    0x21, 0x31, 0x41, 0x06, 0x13, 0x51, 0x61, 0x07,
+    0x22, 0x71, 0x14, 0x32, 0x81, 0x91, 0xa1, 0x08,
+    0x23, 0x42, 0xb1, 0xc1, 0x15, 0x52, 0xd1, 0xf0,
+    0x24, 0x33, 0x62, 0x72, 0x82, 0x09, 0x0a, 0x16,
+    0x17, 0x18, 0x19, 0x1a, 0x25, 0x26, 0x27, 0x28,
+    0x29, 0x2a, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
+    0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
+    0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
+    0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
+    0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
+    0x7a, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
+    0x8a, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98,
+    0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7,
+    0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6,
+    0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3, 0xc4, 0xc5,
+    0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2, 0xd3, 0xd4,
+    0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xe1, 0xe2,
+    0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9, 0xea,
+    0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  static const UINT8 bits_ac_chrominance[17] = {
+    /* 0-base */ 0, 0, 2, 1, 2, 4, 4, 3, 4, 7, 5, 4, 4, 0, 1, 2, 0x77
+  };
+  static const UINT8 val_ac_chrominance[] = {
+    0x00, 0x01, 0x02, 0x03, 0x11, 0x04, 0x05, 0x21,
+    0x31, 0x06, 0x12, 0x41, 0x51, 0x07, 0x61, 0x71,
+    0x13, 0x22, 0x32, 0x81, 0x08, 0x14, 0x42, 0x91,
+    0xa1, 0xb1, 0xc1, 0x09, 0x23, 0x33, 0x52, 0xf0,
+    0x15, 0x62, 0x72, 0xd1, 0x0a, 0x16, 0x24, 0x34,
+    0xe1, 0x25, 0xf1, 0x17, 0x18, 0x19, 0x1a, 0x26,
+    0x27, 0x28, 0x29, 0x2a, 0x35, 0x36, 0x37, 0x38,
+    0x39, 0x3a, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
+    0x49, 0x4a, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58,
+    0x59, 0x5a, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
+    0x69, 0x6a, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78,
+    0x79, 0x7a, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87,
+    0x88, 0x89, 0x8a, 0x92, 0x93, 0x94, 0x95, 0x96,
+    0x97, 0x98, 0x99, 0x9a, 0xa2, 0xa3, 0xa4, 0xa5,
+    0xa6, 0xa7, 0xa8, 0xa9, 0xaa, 0xb2, 0xb3, 0xb4,
+    0xb5, 0xb6, 0xb7, 0xb8, 0xb9, 0xba, 0xc2, 0xc3,
+    0xc4, 0xc5, 0xc6, 0xc7, 0xc8, 0xc9, 0xca, 0xd2,
+    0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda,
+    0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, 0xe8, 0xe9,
+    0xea, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, 0xf8,
+    0xf9, 0xfa
+  };
+
+  if (cinfo->is_decompressor) {
+    dc_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->dc_huff_tbl_ptrs;
+    ac_huff_tbl_ptrs = ((j_decompress_ptr)cinfo)->ac_huff_tbl_ptrs;
+  } else {
+    dc_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->dc_huff_tbl_ptrs;
+    ac_huff_tbl_ptrs = ((j_compress_ptr)cinfo)->ac_huff_tbl_ptrs;
+  }
+
+  add_huff_table(cinfo, &dc_huff_tbl_ptrs[0], bits_dc_luminance,
+                 val_dc_luminance);
+  add_huff_table(cinfo, &ac_huff_tbl_ptrs[0], bits_ac_luminance,
+                 val_ac_luminance);
+  add_huff_table(cinfo, &dc_huff_tbl_ptrs[1], bits_dc_chrominance,
+                 val_dc_chrominance);
+  add_huff_table(cinfo, &ac_huff_tbl_ptrs[1], bits_ac_chrominance,
+                 val_ac_chrominance);
+}
diff --git a/thirdparty/libjpeg-turbo/src/jutils.c b/thirdparty/libjpeg-turbo/src/jutils.c
new file mode 100644
index 00000000000..24caac19021
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jutils.c
@@ -0,0 +1,148 @@
+/*
+ * jutils.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-1996, Thomas G. Lane.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2022, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains tables and miscellaneous utility routines needed
+ * for both compression and decompression.
+ * Note we prefix all global names with "j" to minimize conflicts with
+ * a surrounding application.
+ */
+
+#define JPEG_INTERNALS
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "jsamplecomp.h"
+
+
+#if BITS_IN_JSAMPLE == 8
+
+/*
+ * jpeg_zigzag_order[i] is the zigzag-order position of the i'th element
+ * of a DCT block read in natural order (left to right, top to bottom).
+ */
+
+#if 0                           /* This table is not actually needed in v6a */
+
+const int jpeg_zigzag_order[DCTSIZE2] = {
+   0,  1,  5,  6, 14, 15, 27, 28,
+   2,  4,  7, 13, 16, 26, 29, 42,
+   3,  8, 12, 17, 25, 30, 41, 43,
+   9, 11, 18, 24, 31, 40, 44, 53,
+  10, 19, 23, 32, 39, 45, 52, 54,
+  20, 22, 33, 38, 46, 51, 55, 60,
+  21, 34, 37, 47, 50, 56, 59, 61,
+  35, 36, 48, 49, 57, 58, 62, 63
+};
+
+#endif
+
+/*
+ * jpeg_natural_order[i] is the natural-order position of the i'th element
+ * of zigzag order.
+ *
+ * When reading corrupted data, the Huffman decoders could attempt
+ * to reference an entry beyond the end of this array (if the decoded
+ * zero run length reaches past the end of the block).  To prevent
+ * wild stores without adding an inner-loop test, we put some extra
+ * "63"s after the real entries.  This will cause the extra coefficient
+ * to be stored in location 63 of the block, not somewhere random.
+ * The worst case would be a run-length of 15, which means we need 16
+ * fake entries.
+ */
+
+const int jpeg_natural_order[DCTSIZE2 + 16] = {
+  0,  1,  8, 16,  9,  2,  3, 10,
+ 17, 24, 32, 25, 18, 11,  4,  5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13,  6,  7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63,
+ 63, 63, 63, 63, 63, 63, 63, 63, /* extra entries for safety in decoder */
+ 63, 63, 63, 63, 63, 63, 63, 63
+};
+
+
+/*
+ * Arithmetic utilities
+ */
+
+GLOBAL(long)
+jdiv_round_up(long a, long b)
+/* Compute a/b rounded up to next integer, ie, ceil(a/b) */
+/* Assumes a >= 0, b > 0 */
+{
+  return (a + b - 1L) / b;
+}
+
+
+GLOBAL(long)
+jround_up(long a, long b)
+/* Compute a rounded up to next multiple of b, ie, ceil(a/b)*b */
+/* Assumes a >= 0, b > 0 */
+{
+  a += b - 1L;
+  return a - (a % b);
+}
+
+#endif /* BITS_IN_JSAMPLE == 8 */
+
+
+#if BITS_IN_JSAMPLE != 16 || \
+    defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED)
+
+GLOBAL(void)
+_jcopy_sample_rows(_JSAMPARRAY input_array, int source_row,
+                   _JSAMPARRAY output_array, int dest_row, int num_rows,
+                   JDIMENSION num_cols)
+/* Copy some rows of samples from one place to another.
+ * num_rows rows are copied from input_array[source_row++]
+ * to output_array[dest_row++]; these areas may overlap for duplication.
+ * The source and destination arrays must be at least as wide as num_cols.
+ */
+{
+  register _JSAMPROW inptr, outptr;
+  register size_t count = (size_t)(num_cols * sizeof(_JSAMPLE));
+  register int row;
+
+  input_array += source_row;
+  output_array += dest_row;
+
+  for (row = num_rows; row > 0; row--) {
+    inptr = *input_array++;
+    outptr = *output_array++;
+    memcpy(outptr, inptr, count);
+  }
+}
+
+#endif /* BITS_IN_JSAMPLE != 16 ||
+          defined(C_LOSSLESS_SUPPORTED) || defined(D_LOSSLESS_SUPPORTED) */
+
+
+#if BITS_IN_JSAMPLE == 8
+
+GLOBAL(void)
+jcopy_block_row(JBLOCKROW input_row, JBLOCKROW output_row,
+                JDIMENSION num_blocks)
+/* Copy a row of coefficient blocks from one place to another. */
+{
+  memcpy(output_row, input_row, num_blocks * (DCTSIZE2 * sizeof(JCOEF)));
+}
+
+
+GLOBAL(void)
+jzero_far(void *target, size_t bytestozero)
+/* Zero out a chunk of memory. */
+/* This might be sample-array data, block-array data, or alloc_large data. */
+{
+  memset(target, 0, bytestozero);
+}
+
+#endif /* BITS_IN_JSAMPLE == 8 */
diff --git a/thirdparty/libjpeg-turbo/src/jversion.h b/thirdparty/libjpeg-turbo/src/jversion.h
new file mode 100644
index 00000000000..8e4f4ef7496
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/jversion.h
@@ -0,0 +1,60 @@
+// Originally generated by libjpeg-turbo's cmake build, then modified to support multiple platforms.
+
+/*
+ * jversion.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1991-2020, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2012-2024, D. R. Commander.
+ * Godot modifications:
+ * Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md).
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains software version identification.
+ */
+
+
+#if JPEG_LIB_VERSION >= 80
+
+#define JVERSION        "8d  15-Jan-2012"
+
+#elif JPEG_LIB_VERSION >= 70
+
+#define JVERSION        "7  27-Jun-2009"
+
+#else
+
+#define JVERSION        "6b  27-Mar-1998"
+
+#endif
+
+/*
+ * NOTE: It is our convention to place the authors in the following order:
+ * - libjpeg-turbo authors (2009-) in descending order of the date of their
+ *   most recent contribution to the project, then in ascending order of the
+ *   date of their first contribution to the project, then in alphabetical
+ *   order
+ * - Upstream authors in descending order of the date of the first inclusion of
+ *   their code
+ */
+
+#define JCOPYRIGHT1 \
+  "Copyright (C) 2009-2024 D. R. Commander\n" \
+  "Copyright (C) 2015, 2020 Google, Inc.\n" \
+  "Copyright (C) 2019-2020 Arm Limited\n" \
+  "Copyright (C) 2015-2016, 2018 Matthieu Darbois\n" \
+  "Copyright (C) 2011-2016 Siarhei Siamashka\n" \
+  "Copyright (C) 2015 Intel Corporation\n"
+#define JCOPYRIGHT2 \
+  "Copyright (C) 2013-2014 Linaro Limited\n" \
+  "Copyright (C) 2013-2014 MIPS Technologies, Inc.\n" \
+  "Copyright (C) 2009, 2012 Pierre Ossman for Cendio AB\n" \
+  "Copyright (C) 2009-2011 Nokia Corporation and/or its subsidiary(-ies)\n" \
+  "Copyright (C) 1999-2006 MIYASAKA Masaru\n" \
+  "Copyright (C) 1999 Ken Murchison\n" \
+  "Copyright (C) 1991-2020 Thomas G. Lane, Guido Vollbeding\n"
+
+#define JCOPYRIGHT_SHORT \
+  "Copyright (C) 1991-2024 The libjpeg-turbo Project and many others"
diff --git a/thirdparty/libjpeg-turbo/src/tjutil.h b/thirdparty/libjpeg-turbo/src/tjutil.h
new file mode 100644
index 00000000000..10272e98867
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/tjutil.h
@@ -0,0 +1,53 @@
+/*
+ * Copyright (C)2011, 2022 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifdef _WIN32
+#ifndef strcasecmp
+#define strcasecmp  stricmp
+#endif
+#ifndef strncasecmp
+#define strncasecmp  strnicmp
+#endif
+#endif
+
+#ifdef _MSC_VER
+#define SNPRINTF(str, n, format, ...) \
+  _snprintf_s(str, n, _TRUNCATE, format, ##__VA_ARGS__)
+#else
+#define SNPRINTF  snprintf
+#endif
+
+#ifndef min
+#define min(a, b)  ((a) < (b) ? (a) : (b))
+#endif
+
+#ifndef max
+#define max(a, b)  ((a) > (b) ? (a) : (b))
+#endif
+
+extern double getTime(void);
diff --git a/thirdparty/libjpeg-turbo/src/transupp.c b/thirdparty/libjpeg-turbo/src/transupp.c
new file mode 100644
index 00000000000..0a92413a7f8
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/transupp.c
@@ -0,0 +1,2388 @@
+/*
+ * transupp.c
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2010, 2017, 2021-2022, 2024, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains image transformation routines and other utility code
+ * used by the jpegtran sample application.  These are NOT part of the core
+ * JPEG library.  But we keep these routines separate from jpegtran.c to
+ * ease the task of maintaining jpegtran-like programs that have other user
+ * interfaces.
+ */
+
+/* Although this file really shouldn't have access to the library internals,
+ * it's helpful to let it call jround_up() and jcopy_block_row().
+ */
+#define JPEG_INTERNALS
+
+#include "jinclude.h"
+#include "jpeglib.h"
+#include "transupp.h"           /* My own external interface */
+#include "jpegapicomp.h"
+#include <ctype.h>              /* to declare isdigit() */
+
+
+#if JPEG_LIB_VERSION >= 70
+#define dstinfo_min_DCT_h_scaled_size  dstinfo->min_DCT_h_scaled_size
+#define dstinfo_min_DCT_v_scaled_size  dstinfo->min_DCT_v_scaled_size
+#else
+#define dstinfo_min_DCT_h_scaled_size  DCTSIZE
+#define dstinfo_min_DCT_v_scaled_size  DCTSIZE
+#endif
+
+
+#if TRANSFORMS_SUPPORTED
+
+/*
+ * Lossless image transformation routines.  These routines work on DCT
+ * coefficient arrays and thus do not require any lossy decompression
+ * or recompression of the image.
+ * Thanks to Guido Vollbeding for the initial design and code of this feature,
+ * and to Ben Jackson for introducing the cropping feature.
+ *
+ * Horizontal flipping is done in-place, using a single top-to-bottom
+ * pass through the virtual source array.  It will thus be much the
+ * fastest option for images larger than main memory.
+ *
+ * The other routines require a set of destination virtual arrays, so they
+ * need twice as much memory as jpegtran normally does.  The destination
+ * arrays are always written in normal scan order (top to bottom) because
+ * the virtual array manager expects this.  The source arrays will be scanned
+ * in the corresponding order, which means multiple passes through the source
+ * arrays for most of the transforms.  That could result in much thrashing
+ * if the image is larger than main memory.
+ *
+ * If cropping or trimming is involved, the destination arrays may be smaller
+ * than the source arrays.  Note it is not possible to do horizontal flip
+ * in-place when a nonzero Y crop offset is specified, since we'd have to move
+ * data from one block row to another but the virtual array manager doesn't
+ * guarantee we can touch more than one row at a time.  So in that case,
+ * we have to use a separate destination array.
+ *
+ * Some notes about the operating environment of the individual transform
+ * routines:
+ * 1. Both the source and destination virtual arrays are allocated from the
+ *    source JPEG object, and therefore should be manipulated by calling the
+ *    source's memory manager.
+ * 2. The destination's component count should be used.  It may be smaller
+ *    than the source's when forcing to grayscale.
+ * 3. Likewise the destination's sampling factors should be used.  When
+ *    forcing to grayscale the destination's sampling factors will be all 1,
+ *    and we may as well take that as the effective iMCU size.
+ * 4. When "trim" is in effect, the destination's dimensions will be the
+ *    trimmed values but the source's will be untrimmed.
+ * 5. When "crop" is in effect, the destination's dimensions will be the
+ *    cropped values but the source's will be uncropped.  Each transform
+ *    routine is responsible for picking up source data starting at the
+ *    correct X and Y offset for the crop region.  (The X and Y offsets
+ *    passed to the transform routines are measured in iMCU blocks of the
+ *    destination.)
+ * 6. All the routines assume that the source and destination buffers are
+ *    padded out to a full iMCU boundary.  This is true, although for the
+ *    source buffer it is an undocumented property of jdcoefct.c.
+ */
+
+
+LOCAL(void)
+dequant_comp(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+             jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
+{
+  JDIMENSION blk_x, blk_y;
+  int offset_y, k;
+  JQUANT_TBL *qtblptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW block;
+  JCOEFPTR ptr;
+
+  qtblptr = compptr->quant_table;
+  for (blk_y = 0; blk_y < compptr->height_in_blocks;
+       blk_y += compptr->v_samp_factor) {
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef_array, blk_y,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+      block = buffer[offset_y];
+      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+        ptr = block[blk_x];
+        for (k = 0; k < DCTSIZE2; k++)
+          if (qtblptr->quantval[k] != qtblptr1->quantval[k])
+            ptr[k] *= qtblptr->quantval[k] / qtblptr1->quantval[k];
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+requant_comp(j_decompress_ptr cinfo, jpeg_component_info *compptr,
+             jvirt_barray_ptr coef_array, JQUANT_TBL *qtblptr1)
+{
+  JDIMENSION blk_x, blk_y;
+  int offset_y, k;
+  JQUANT_TBL *qtblptr;
+  JBLOCKARRAY buffer;
+  JBLOCKROW block;
+  JCOEFPTR ptr;
+  JCOEF temp, qval;
+
+  qtblptr = compptr->quant_table;
+  for (blk_y = 0; blk_y < compptr->height_in_blocks;
+       blk_y += compptr->v_samp_factor) {
+    buffer = (*cinfo->mem->access_virt_barray)
+      ((j_common_ptr)cinfo, coef_array, blk_y,
+       (JDIMENSION)compptr->v_samp_factor, TRUE);
+    for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+      block = buffer[offset_y];
+      for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+        ptr = block[blk_x];
+        for (k = 0; k < DCTSIZE2; k++) {
+          temp = qtblptr->quantval[k];
+          qval = qtblptr1->quantval[k];
+          if (temp != qval && qval != 0) {
+            temp *= ptr[k];
+            /* The following quantization code is copied from jcdctmgr.c */
+#ifdef FAST_DIVIDE
+#define DIVIDE_BY(a, b)  a /= b
+#else
+#define DIVIDE_BY(a, b)  if (a >= b) a /= b;  else a = 0
+#endif
+            if (temp < 0) {
+              temp = -temp;
+              temp += qval >> 1; /* for rounding */
+              DIVIDE_BY(temp, qval);
+              temp = -temp;
+            } else {
+              temp += qval >> 1; /* for rounding */
+              DIVIDE_BY(temp, qval);
+            }
+            ptr[k] = temp;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/*
+ * Calculate largest common denominator using Euclid's algorithm.
+ */
+LOCAL(JCOEF)
+largest_common_denominator(JCOEF a, JCOEF b)
+{
+  JCOEF c;
+
+  do {
+    c = a % b;
+    a = b;
+    b = c;
+  } while (c);
+
+  return a;
+}
+
+
+LOCAL(void)
+adjust_quant(j_decompress_ptr srcinfo, jvirt_barray_ptr *src_coef_arrays,
+             j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
+             boolean trim, j_compress_ptr dstinfo)
+{
+  jpeg_component_info *compptr1, *compptr2;
+  JQUANT_TBL *qtblptr1, *qtblptr2, *qtblptr3;
+  int ci, k;
+
+  for (ci = 0; ci < dstinfo->num_components && ci < dropinfo->num_components;
+       ci++) {
+    compptr1 = srcinfo->comp_info + ci;
+    compptr2 = dropinfo->comp_info + ci;
+    qtblptr1 = compptr1->quant_table;
+    if (qtblptr1 == NULL)
+      ERREXIT1(srcinfo, JERR_NO_QUANT_TABLE, compptr1->quant_tbl_no);
+    qtblptr2 = compptr2->quant_table;
+    if (qtblptr2 == NULL)
+      ERREXIT1(dropinfo, JERR_NO_QUANT_TABLE, compptr2->quant_tbl_no);
+    for (k = 0; k < DCTSIZE2; k++) {
+      if (qtblptr1->quantval[k] != qtblptr2->quantval[k]) {
+        if (trim)
+          requant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr1);
+        else {
+          qtblptr3 = dstinfo->quant_tbl_ptrs[compptr1->quant_tbl_no];
+          for (k = 0; k < DCTSIZE2; k++)
+            if (qtblptr1->quantval[k] != qtblptr2->quantval[k])
+              qtblptr3->quantval[k] =
+                largest_common_denominator(qtblptr1->quantval[k],
+                                           qtblptr2->quantval[k]);
+          dequant_comp(srcinfo, compptr1, src_coef_arrays[ci], qtblptr3);
+          dequant_comp(dropinfo, compptr2, drop_coef_arrays[ci], qtblptr3);
+        }
+        break;
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_drop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        j_decompress_ptr dropinfo, jvirt_barray_ptr *drop_coef_arrays,
+        JDIMENSION drop_width, JDIMENSION drop_height)
+/* Drop (insert) the contents of another image into the source image.  If the
+ * number of components in the drop image is smaller than the number of
+ * components in the destination image, then we fill in the remaining
+ * components with zero.  This allows for dropping the contents of grayscale
+ * images into (arbitrarily sampled) color images.
+ */
+{
+  JDIMENSION comp_width, comp_height;
+  JDIMENSION blk_y, x_drop_blocks, y_drop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = drop_width * compptr->h_samp_factor;
+    comp_height = drop_height * compptr->v_samp_factor;
+    x_drop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_drop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (blk_y = 0; blk_y < comp_height; blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], blk_y + y_drop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (ci < dropinfo->num_components) {
+        src_buffer = (*dropinfo->mem->access_virt_barray)
+          ((j_common_ptr)dropinfo, drop_coef_arrays[ci], blk_y,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+        for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+          jcopy_block_row(src_buffer[offset_y],
+                          dst_buffer[offset_y] + x_drop_blocks, comp_width);
+        }
+      } else {
+        for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+          memset(dst_buffer[offset_y] + x_drop_blocks, 0,
+                 comp_width * sizeof(JBLOCK));
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop. */
+{
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  /* We simply have to copy the right amount of data (the destination's
+   * image size) starting at the given X and Y offsets in the source.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                        dst_buffer[offset_y], compptr->width_in_blocks);
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_zero(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                 jvirt_barray_ptr *src_coef_arrays,
+                 jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: If the destination size is larger than the source, we fill in the
+ * expanded region with zero (neutral gray).  Note that we also have to zero
+ * partial iMCUs at the right and bottom edge of the source image area in this
+ * case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (dstinfo->_jpeg_width > srcinfo->output_width) {
+          if (x_crop_blocks > 0) {
+            memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
+          }
+          jcopy_block_row(src_buffer[offset_y],
+                          dst_buffer[offset_y] + x_crop_blocks, comp_width);
+          if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+            memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                   (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                   sizeof(JBLOCK));
+          }
+        } else {
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y], compptr->width_in_blocks);
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_flat(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                 JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                 jvirt_barray_ptr *src_coef_arrays,
+                 jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: The destination width is larger than the source, and we fill in
+ * the expanded region with the DC coefficient of the adjacent block.  Note
+ * that we also have to fill partial iMCUs at the right and bottom edge of the
+ * source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height;
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, offset_y;
+  JCOEF dc;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+          FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (x_crop_blocks > 0) {
+          memset(dst_buffer[offset_y], 0, x_crop_blocks * sizeof(JBLOCK));
+          dc = src_buffer[offset_y][0][0];
+          for (dst_blk_x = 0; dst_blk_x < x_crop_blocks; dst_blk_x++) {
+            dst_buffer[offset_y][dst_blk_x][0] = dc;
+          }
+        }
+        jcopy_block_row(src_buffer[offset_y],
+                        dst_buffer[offset_y] + x_crop_blocks, comp_width);
+        if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+          memset(dst_buffer[offset_y] + x_crop_blocks + comp_width, 0,
+                 (compptr->width_in_blocks - x_crop_blocks - comp_width) *
+                 sizeof(JBLOCK));
+          dc = src_buffer[offset_y][comp_width - 1][0];
+          for (dst_blk_x = x_crop_blocks + comp_width;
+               dst_blk_x < compptr->width_in_blocks; dst_blk_x++) {
+            dst_buffer[offset_y][dst_blk_x][0] = dc;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_crop_ext_reflect(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                    JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+                    jvirt_barray_ptr *src_coef_arrays,
+                    jvirt_barray_ptr *dst_coef_arrays)
+/* Crop.  This is only used when no rotate/flip is requested with the crop.
+ * Extension: The destination width is larger than the source, and we fill in
+ * the expanded region with repeated reflections of the source image.  Note
+ * that we also have to fill partial iMCUs at the right and bottom edge of the
+ * source image area in this case.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, src_blk_x;
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (dstinfo->_jpeg_height > srcinfo->output_height) {
+        if (dst_blk_y < y_crop_blocks ||
+            dst_blk_y >= y_crop_blocks + comp_height) {
+          for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+            memset(dst_buffer[offset_y], 0,
+                   compptr->width_in_blocks * sizeof(JBLOCK));
+          }
+          continue;
+        }
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y - y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      } else {
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks, (JDIMENSION)compptr->v_samp_factor,
+           FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        /* Copy source region */
+        jcopy_block_row(src_buffer[offset_y],
+                        dst_buffer[offset_y] + x_crop_blocks, comp_width);
+        if (x_crop_blocks > 0) {
+          /* Reflect to left */
+          dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks;
+          for (dst_blk_x = x_crop_blocks; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;      /* (re)set axis of reflection */
+            for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
+                 src_blk_x--, dst_blk_x--) {
+              dst_ptr = *(--dst_row_ptr);   /* destination goes left */
+              src_ptr = *src_row_ptr++;     /* source goes right */
+              /* This unrolled loop doesn't need to know which row it's on. */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;    /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                               change */
+              }
+            }
+          }
+        }
+        if (compptr->width_in_blocks > x_crop_blocks + comp_width) {
+          /* Reflect to right */
+          dst_row_ptr = dst_buffer[offset_y] + x_crop_blocks + comp_width;
+          for (dst_blk_x = compptr->width_in_blocks - x_crop_blocks - comp_width;
+               dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;      /* (re)set axis of reflection */
+            for (src_blk_x = comp_width; src_blk_x > 0 && dst_blk_x > 0;
+                 src_blk_x--, dst_blk_x--) {
+              dst_ptr = *dst_row_ptr++;     /* destination goes right */
+              src_ptr = *(--src_row_ptr);   /* source goes left */
+              /* This unrolled loop doesn't need to know which row it's on. */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;    /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                               change */
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_wipe(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+        JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+        jvirt_barray_ptr *src_coef_arrays,
+        JDIMENSION drop_width, JDIMENSION drop_height)
+/* Wipe - discard image contents of specified region and fill with zero
+ * (neutral gray)
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width;
+  JDIMENSION y_wipe_blocks, wipe_bottom;
+  int ci, offset_y;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
+    for (; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flatten(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           JDIMENSION drop_width, JDIMENSION drop_height)
+/* Flatten - discard image contents of specified region, similarly to wipe,
+ * but fill with the average of adjacent blocks instead of zero.
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width, wipe_right;
+  JDIMENSION y_wipe_blocks, wipe_bottom, blk_x;
+  int ci, offset_y, dc_left_value, dc_right_value, average;
+  JBLOCKARRAY buffer;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    wipe_right = wipe_width + x_wipe_blocks;
+    y_wipe_blocks = y_crop_offset * compptr->v_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor + y_wipe_blocks;
+    for (; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        memset(buffer[offset_y] + x_wipe_blocks, 0,
+               wipe_width * sizeof(JBLOCK));
+        if (x_wipe_blocks > 0) {
+          dc_left_value = buffer[offset_y][x_wipe_blocks - 1][0];
+          if (wipe_right < compptr->width_in_blocks) {
+            dc_right_value = buffer[offset_y][wipe_right][0];
+            average = (dc_left_value + dc_right_value) >> 1;
+          } else {
+            average = dc_left_value;
+          }
+        } else if (wipe_right < compptr->width_in_blocks) {
+          average = buffer[offset_y][wipe_right][0];
+        } else continue;
+        for (blk_x = x_wipe_blocks; blk_x < wipe_right; blk_x++) {
+          buffer[offset_y][blk_x][0] = (JCOEF)average;
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_reflect(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, jvirt_barray_ptr *src_coef_arrays,
+           JDIMENSION drop_width, JDIMENSION drop_height)
+/* Reflect - discard image contents of specified region, similarly to wipe,
+ * but fill with repeated reflections of the outside region instead of zero.
+ * NB: y_crop_offset is assumed to be zero.
+ */
+{
+  JDIMENSION x_wipe_blocks, wipe_width;
+  JDIMENSION y_wipe_blocks, wipe_bottom;
+  JDIMENSION src_blk_x, dst_blk_x;
+  int ci, k, offset_y;
+  JBLOCKARRAY buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_wipe_blocks = x_crop_offset * compptr->h_samp_factor;
+    wipe_width = drop_width * compptr->h_samp_factor;
+    wipe_bottom = drop_height * compptr->v_samp_factor;
+    for (y_wipe_blocks = 0; y_wipe_blocks < wipe_bottom;
+         y_wipe_blocks += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], y_wipe_blocks,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (x_wipe_blocks > 0) {
+          /* Reflect from left */
+          dst_row_ptr = buffer[offset_y] + x_wipe_blocks;
+          for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;     /* (re)set axis of reflection */
+            for (src_blk_x = x_wipe_blocks;
+                 src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
+              dst_ptr = *dst_row_ptr++;    /* destination goes right */
+              src_ptr = *(--src_row_ptr);  /* source goes left */
+              /* this unrolled loop doesn't need to know which row it's on... */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;   /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
+              }
+            }
+          }
+        } else if (compptr->width_in_blocks > x_wipe_blocks + wipe_width) {
+          /* Reflect from right */
+          dst_row_ptr = buffer[offset_y] + x_wipe_blocks + wipe_width;
+          for (dst_blk_x = wipe_width; dst_blk_x > 0;) {
+            src_row_ptr = dst_row_ptr;     /* (re)set axis of reflection */
+            src_blk_x = compptr->width_in_blocks - x_wipe_blocks - wipe_width;
+            for (; src_blk_x > 0 && dst_blk_x > 0; src_blk_x--, dst_blk_x--) {
+              dst_ptr = *(--dst_row_ptr);  /* destination goes left */
+              src_ptr = *src_row_ptr++;    /* source goes right */
+              /* this unrolled loop doesn't need to know which row it's on... */
+              for (k = 0; k < DCTSIZE2; k += 2) {
+                *dst_ptr++ = *src_ptr++;   /* copy even column */
+                *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign change */
+              }
+            }
+          }
+        } else {
+          memset(buffer[offset_y] + x_wipe_blocks, 0,
+                 wipe_width * sizeof(JBLOCK));
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h_no_crop(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                  JDIMENSION x_crop_offset, jvirt_barray_ptr *src_coef_arrays)
+/* Horizontal flip; done in-place, so no separate dest array is required.
+ * NB: this only works when y_crop_offset is zero.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, blk_x, blk_y, x_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY buffer;
+  JCOEFPTR ptr1, ptr2;
+  JCOEF temp1, temp2;
+  jpeg_component_info *compptr;
+
+  /* Horizontal mirroring of DCT blocks is accomplished by swapping
+   * pairs of blocks in-place.  Within a DCT block, we perform horizontal
+   * mirroring by changing the signs of odd-numbered columns.
+   * Partial iMCUs at the right edge are left untouched.
+   */
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    for (blk_y = 0; blk_y < compptr->height_in_blocks;
+         blk_y += compptr->v_samp_factor) {
+      buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        /* Do the mirroring */
+        for (blk_x = 0; blk_x * 2 < comp_width; blk_x++) {
+          ptr1 = buffer[offset_y][blk_x];
+          ptr2 = buffer[offset_y][comp_width - blk_x - 1];
+          /* this unrolled loop doesn't need to know which row it's on... */
+          for (k = 0; k < DCTSIZE2; k += 2) {
+            temp1 = *ptr1;      /* swap even column */
+            temp2 = *ptr2;
+            *ptr1++ = temp2;
+            *ptr2++ = temp1;
+            temp1 = *ptr1;      /* swap odd column with sign change */
+            temp2 = *ptr2;
+            *ptr1++ = -temp2;
+            *ptr2++ = -temp1;
+          }
+        }
+        if (x_crop_blocks > 0) {
+          /* Now left-justify the portion of the data to be kept.
+           * We can't use a single jcopy_block_row() call because that routine
+           * depends on memcpy(), whose behavior is unspecified for overlapping
+           * source and destination areas.  Sigh.
+           */
+          for (blk_x = 0; blk_x < compptr->width_in_blocks; blk_x++) {
+            jcopy_block_row(buffer[offset_y] + blk_x + x_crop_blocks,
+                            buffer[offset_y] + blk_x, (JDIMENSION)1);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_h(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* Horizontal flip in general cropping case */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, k, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Here we must output into a separate array because we can't touch
+   * different rows of a single virtual array simultaneously.  Otherwise,
+   * this is essentially the same as the routine above.
+   */
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      src_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, src_coef_arrays[ci], dst_blk_y + y_crop_blocks,
+         (JDIMENSION)compptr->v_samp_factor, FALSE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        dst_row_ptr = dst_buffer[offset_y];
+        src_row_ptr = src_buffer[offset_y];
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x++) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Do the mirrorable blocks */
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+            /* this unrolled loop doesn't need to know which row it's on... */
+            for (k = 0; k < DCTSIZE2; k += 2) {
+              *dst_ptr++ = *src_ptr++;    /* copy even column */
+              *dst_ptr++ = -(*src_ptr++); /* copy odd column with sign
+                                             change */
+            }
+          } else {
+            /* Copy last partial block(s) verbatim */
+            jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                            dst_row_ptr + dst_blk_x, (JDIMENSION)1);
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_flip_v(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* Vertical flip */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* We output into a separate array because we can't touch different
+   * rows of the source virtual array simultaneously.  Otherwise, this
+   * is a pretty straightforward analog of horizontal flip.
+   * Within a DCT block, vertical mirroring is done by changing the signs
+   * of odd-numbered rows.
+   * Partial iMCUs at the bottom edge are copied verbatim.
+   */
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION)compptr->v_samp_factor,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      } else {
+        /* Bottom-edge blocks will be copied verbatim. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          dst_row_ptr = dst_buffer[offset_y];
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          src_row_ptr += x_crop_blocks;
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            src_ptr = src_row_ptr[dst_blk_x];
+            for (i = 0; i < DCTSIZE; i += 2) {
+              /* copy even row */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = *src_ptr++;
+              /* copy odd row with sign change */
+              for (j = 0; j < DCTSIZE; j++)
+                *dst_ptr++ = -(*src_ptr++);
+            }
+          }
+        } else {
+          /* Just copy row verbatim. */
+          jcopy_block_row(src_buffer[offset_y] + x_crop_blocks,
+                          dst_buffer[offset_y], compptr->width_in_blocks);
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transpose(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+             JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+             jvirt_barray_ptr *src_coef_arrays,
+             jvirt_barray_ptr *dst_coef_arrays)
+/* Transpose source into destination */
+{
+  JDIMENSION dst_blk_x, dst_blk_y, x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Transposing pixels within a block just requires transposing the
+   * DCT coefficients.
+   * Partial iMCUs at the edges require no special treatment; we simply
+   * process all the available DCT blocks for every component.
+   */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION)compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            src_ptr =
+              src_buffer[offset_x][dst_blk_y + offset_y + y_crop_blocks];
+            for (i = 0; i < DCTSIZE; i++)
+              for (j = 0; j < DCTSIZE; j++)
+                dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_90(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+          JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+          jvirt_barray_ptr *src_coef_arrays,
+          jvirt_barray_ptr *dst_coef_arrays)
+/* 90 degree rotation is equivalent to
+ *   1. Transposing the image;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, comp_width, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) right edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_cols = srcinfo->output_height /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION)compptr->h_samp_factor,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          } else {
+            /* Edge blocks are transposed but not mirrored. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                i++;
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_270(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
+/* 270 degree rotation is equivalent to
+ *   1. Horizontal mirroring;
+ *   2. Transposing the image.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_rows, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  /* Because of the horizontal mirror step, we can't process partial iMCUs
+   * at the (output) bottom edge properly.  They just get transposed and
+   * not mirrored.
+   */
+  MCU_rows = srcinfo->output_width /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          src_buffer = (*srcinfo->mem->access_virt_barray)
+            ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+             dst_blk_x + x_crop_blocks,
+             (JDIMENSION)compptr->h_samp_factor, FALSE);
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              /* Block is within the mirrorable area. */
+              src_ptr = src_buffer[offset_x]
+                [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+              for (i = 0; i < DCTSIZE; i++) {
+                for (j = 0; j < DCTSIZE; j++) {
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  j++;
+                  dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                }
+              }
+            } else {
+              /* Edge blocks are transposed but not mirrored. */
+              src_ptr = src_buffer[offset_x]
+                [dst_blk_y + offset_y + y_crop_blocks];
+              for (i = 0; i < DCTSIZE; i++)
+                for (j = 0; j < DCTSIZE; j++)
+                  dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_rot_180(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+           JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+           jvirt_barray_ptr *src_coef_arrays,
+           jvirt_barray_ptr *dst_coef_arrays)
+/* 180 degree rotation is equivalent to
+ *   1. Vertical mirroring;
+ *   2. Horizontal mirroring.
+ * These two steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JBLOCKROW src_row_ptr, dst_row_ptr;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_width /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_height /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      if (y_crop_blocks + dst_blk_y < comp_height) {
+        /* Row is within the vertically mirrorable area. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           comp_height - y_crop_blocks - dst_blk_y -
+           (JDIMENSION)compptr->v_samp_factor,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      } else {
+        /* Bottom-edge rows are only mirrored horizontally. */
+        src_buffer = (*srcinfo->mem->access_virt_barray)
+          ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+           dst_blk_y + y_crop_blocks,
+           (JDIMENSION)compptr->v_samp_factor, FALSE);
+      }
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        dst_row_ptr = dst_buffer[offset_y];
+        if (y_crop_blocks + dst_blk_y < comp_height) {
+          /* Row is within the mirrorable area. */
+          src_row_ptr = src_buffer[compptr->v_samp_factor - offset_y - 1];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            dst_ptr = dst_row_ptr[dst_blk_x];
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored both ways. */
+              src_ptr =
+                src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                /* For even row, negate every odd column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = *src_ptr++;
+                  *dst_ptr++ = -(*src_ptr++);
+                }
+                /* For odd row, negate every even column. */
+                for (j = 0; j < DCTSIZE; j += 2) {
+                  *dst_ptr++ = -(*src_ptr++);
+                  *dst_ptr++ = *src_ptr++;
+                }
+              }
+            } else {
+              /* Any remaining right-edge blocks are only mirrored vertically. */
+              src_ptr = src_row_ptr[x_crop_blocks + dst_blk_x];
+              for (i = 0; i < DCTSIZE; i += 2) {
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = *src_ptr++;
+                for (j = 0; j < DCTSIZE; j++)
+                  *dst_ptr++ = -(*src_ptr++);
+              }
+            }
+          }
+        } else {
+          /* Remaining rows are just mirrored horizontally. */
+          src_row_ptr = src_buffer[offset_y];
+          for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+               dst_blk_x++) {
+            if (x_crop_blocks + dst_blk_x < comp_width) {
+              /* Process the blocks that can be mirrored. */
+              dst_ptr = dst_row_ptr[dst_blk_x];
+              src_ptr =
+                src_row_ptr[comp_width - x_crop_blocks - dst_blk_x - 1];
+              for (i = 0; i < DCTSIZE2; i += 2) {
+                *dst_ptr++ = *src_ptr++;
+                *dst_ptr++ = -(*src_ptr++);
+              }
+            } else {
+              /* Any remaining right-edge blocks are only copied. */
+              jcopy_block_row(src_row_ptr + dst_blk_x + x_crop_blocks,
+                              dst_row_ptr + dst_blk_x, (JDIMENSION)1);
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+LOCAL(void)
+do_transverse(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+              JDIMENSION x_crop_offset, JDIMENSION y_crop_offset,
+              jvirt_barray_ptr *src_coef_arrays,
+              jvirt_barray_ptr *dst_coef_arrays)
+/* Transverse transpose is equivalent to
+ *   1. 180 degree rotation;
+ *   2. Transposition;
+ * or
+ *   1. Horizontal mirroring;
+ *   2. Transposition;
+ *   3. Horizontal mirroring.
+ * These steps are merged into a single processing routine.
+ */
+{
+  JDIMENSION MCU_cols, MCU_rows, comp_width, comp_height, dst_blk_x, dst_blk_y;
+  JDIMENSION x_crop_blocks, y_crop_blocks;
+  int ci, i, j, offset_x, offset_y;
+  JBLOCKARRAY src_buffer, dst_buffer;
+  JCOEFPTR src_ptr, dst_ptr;
+  jpeg_component_info *compptr;
+
+  MCU_cols = srcinfo->output_height /
+             (dstinfo->max_h_samp_factor * dstinfo_min_DCT_h_scaled_size);
+  MCU_rows = srcinfo->output_width /
+             (dstinfo->max_v_samp_factor * dstinfo_min_DCT_v_scaled_size);
+
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    comp_width = MCU_cols * compptr->h_samp_factor;
+    comp_height = MCU_rows * compptr->v_samp_factor;
+    x_crop_blocks = x_crop_offset * compptr->h_samp_factor;
+    y_crop_blocks = y_crop_offset * compptr->v_samp_factor;
+    for (dst_blk_y = 0; dst_blk_y < compptr->height_in_blocks;
+         dst_blk_y += compptr->v_samp_factor) {
+      dst_buffer = (*srcinfo->mem->access_virt_barray)
+        ((j_common_ptr)srcinfo, dst_coef_arrays[ci], dst_blk_y,
+         (JDIMENSION)compptr->v_samp_factor, TRUE);
+      for (offset_y = 0; offset_y < compptr->v_samp_factor; offset_y++) {
+        for (dst_blk_x = 0; dst_blk_x < compptr->width_in_blocks;
+             dst_blk_x += compptr->h_samp_factor) {
+          if (x_crop_blocks + dst_blk_x < comp_width) {
+            /* Block is within the mirrorable area. */
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               comp_width - x_crop_blocks - dst_blk_x -
+               (JDIMENSION)compptr->h_samp_factor,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          } else {
+            src_buffer = (*srcinfo->mem->access_virt_barray)
+              ((j_common_ptr)srcinfo, src_coef_arrays[ci],
+               dst_blk_x + x_crop_blocks,
+               (JDIMENSION)compptr->h_samp_factor, FALSE);
+          }
+          for (offset_x = 0; offset_x < compptr->h_samp_factor; offset_x++) {
+            dst_ptr = dst_buffer[offset_y][dst_blk_x + offset_x];
+            if (y_crop_blocks + dst_blk_y < comp_height) {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Block is within the mirrorable area. */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                  }
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  }
+                }
+              } else {
+                /* Right-edge blocks are mirrored in y only */
+                src_ptr = src_buffer[offset_x]
+                  [comp_height - y_crop_blocks - dst_blk_y - offset_y - 1];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++) {
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                    j++;
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                  }
+                }
+              }
+            } else {
+              if (x_crop_blocks + dst_blk_x < comp_width) {
+                /* Bottom-edge blocks are mirrored in x only */
+                src_ptr = src_buffer[compptr->h_samp_factor - offset_x - 1]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++) {
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+                  i++;
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = -src_ptr[i * DCTSIZE + j];
+                }
+              } else {
+                /* At lower right corner, just transpose, no mirroring */
+                src_ptr = src_buffer[offset_x]
+                  [dst_blk_y + offset_y + y_crop_blocks];
+                for (i = 0; i < DCTSIZE; i++)
+                  for (j = 0; j < DCTSIZE; j++)
+                    dst_ptr[j * DCTSIZE + i] = src_ptr[i * DCTSIZE + j];
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
+
+/* Parse an unsigned integer: subroutine for jtransform_parse_crop_spec.
+ * Returns TRUE if valid integer found, FALSE if not.
+ * *strptr is advanced over the digit string, and *result is set to its value.
+ */
+
+LOCAL(boolean)
+jt_read_integer(const char **strptr, JDIMENSION *result)
+{
+  const char *ptr = *strptr;
+  JDIMENSION val = 0;
+
+  for (; isdigit(*ptr); ptr++) {
+    val = val * 10 + (JDIMENSION)(*ptr - '0');
+  }
+  *result = val;
+  if (ptr == *strptr)
+    return FALSE;               /* oops, no digits */
+  *strptr = ptr;
+  return TRUE;
+}
+
+
+/* Parse a crop specification (written in X11 geometry style).
+ * The routine returns TRUE if the spec string is valid, FALSE if not.
+ *
+ * The crop spec string should have the format
+ *      <width>[{fr}]x<height>[{fr}]{+-}<xoffset>{+-}<yoffset>
+ * where width, height, xoffset, and yoffset are unsigned integers.
+ * Each of the elements can be omitted to indicate a default value.
+ * (A weakness of this style is that it is not possible to omit xoffset
+ * while specifying yoffset, since they look alike.)
+ *
+ * This code is loosely based on XParseGeometry from the X11 distribution.
+ */
+
+GLOBAL(boolean)
+jtransform_parse_crop_spec(jpeg_transform_info *info, const char *spec)
+{
+  info->crop = FALSE;
+  info->crop_width_set = JCROP_UNSET;
+  info->crop_height_set = JCROP_UNSET;
+  info->crop_xoffset_set = JCROP_UNSET;
+  info->crop_yoffset_set = JCROP_UNSET;
+
+  if (isdigit(*spec)) {
+    /* fetch width */
+    if (!jt_read_integer(&spec, &info->crop_width))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_width_set = JCROP_FORCE;
+    } else if (*spec == 'r' || *spec == 'R') {
+      spec++;
+      info->crop_width_set = JCROP_REFLECT;
+    } else
+      info->crop_width_set = JCROP_POS;
+  }
+  if (*spec == 'x' || *spec == 'X') {
+    /* fetch height */
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_height))
+      return FALSE;
+    if (*spec == 'f' || *spec == 'F') {
+      spec++;
+      info->crop_height_set = JCROP_FORCE;
+    } else if (*spec == 'r' || *spec == 'R') {
+      spec++;
+      info->crop_height_set = JCROP_REFLECT;
+    } else
+      info->crop_height_set = JCROP_POS;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch xoffset */
+    info->crop_xoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_xoffset))
+      return FALSE;
+  }
+  if (*spec == '+' || *spec == '-') {
+    /* fetch yoffset */
+    info->crop_yoffset_set = (*spec == '-') ? JCROP_NEG : JCROP_POS;
+    spec++;
+    if (!jt_read_integer(&spec, &info->crop_yoffset))
+      return FALSE;
+  }
+  /* We had better have gotten to the end of the string. */
+  if (*spec != '\0')
+    return FALSE;
+  info->crop = TRUE;
+  return TRUE;
+}
+
+
+/* Trim off any partial iMCUs on the indicated destination edge */
+
+LOCAL(void)
+trim_right_edge(jpeg_transform_info *info, JDIMENSION full_width)
+{
+  JDIMENSION MCU_cols;
+
+  MCU_cols = info->output_width / info->iMCU_sample_width;
+  if (MCU_cols > 0 && info->x_crop_offset + MCU_cols ==
+      full_width / info->iMCU_sample_width)
+    info->output_width = MCU_cols * info->iMCU_sample_width;
+}
+
+LOCAL(void)
+trim_bottom_edge(jpeg_transform_info *info, JDIMENSION full_height)
+{
+  JDIMENSION MCU_rows;
+
+  MCU_rows = info->output_height / info->iMCU_sample_height;
+  if (MCU_rows > 0 && info->y_crop_offset + MCU_rows ==
+      full_height / info->iMCU_sample_height)
+    info->output_height = MCU_rows * info->iMCU_sample_height;
+}
+
+
+/* Request any required workspace.
+ *
+ * This routine figures out the size that the output image will be
+ * (which implies that all the transform parameters must be set before
+ * it is called).
+ *
+ * We allocate the workspace virtual arrays from the source decompression
+ * object, so that all the arrays (both the original data and the workspace)
+ * will be taken into account while making memory management decisions.
+ * Hence, this routine must be called after jpeg_read_header (which reads
+ * the image dimensions) and before jpeg_read_coefficients (which realizes
+ * the source's virtual arrays).
+ *
+ * This function returns FALSE right away if -perfect is given
+ * and transformation is not perfect.  Otherwise returns TRUE.
+ */
+
+GLOBAL(boolean)
+jtransform_request_workspace(j_decompress_ptr srcinfo,
+                             jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *coef_arrays;
+  boolean need_workspace, transpose_it;
+  jpeg_component_info *compptr;
+  JDIMENSION xoffset, yoffset, dtemp;
+  JDIMENSION width_in_iMCUs, height_in_iMCUs;
+  JDIMENSION width_in_blocks, height_in_blocks;
+  int itemp, ci, h_samp_factor, v_samp_factor;
+
+  /* Determine number of components in output image */
+  if (info->force_grayscale &&
+      srcinfo->jpeg_color_space == JCS_YCbCr &&
+      srcinfo->num_components == 3)
+    /* We'll only process the first component */
+    info->num_components = 1;
+  else
+    /* Process all the components */
+    info->num_components = srcinfo->num_components;
+
+  /* Compute output image dimensions and related values. */
+#if JPEG_LIB_VERSION >= 80
+  jpeg_core_output_dimensions(srcinfo);
+#else
+  srcinfo->output_width = srcinfo->image_width;
+  srcinfo->output_height = srcinfo->image_height;
+#endif
+
+  /* Return right away if -perfect is given and transformation is not perfect.
+   */
+  if (info->perfect) {
+    if (info->num_components == 1) {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    } else {
+      if (!jtransform_perfect_transform(srcinfo->output_width,
+          srcinfo->output_height,
+          srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size,
+          srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size,
+          info->transform))
+        return FALSE;
+    }
+  }
+
+  /* If there is only one output component, force the iMCU size to be 1;
+   * else use the source iMCU size.  (This allows us to do the right thing
+   * when reducing color to grayscale, and also provides a handy way of
+   * cleaning up "funny" grayscale images whose sampling factors are not 1x1.)
+   */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+    info->output_width = srcinfo->output_height;
+    info->output_height = srcinfo->output_width;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_h_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+    }
+    break;
+  default:
+    info->output_width = srcinfo->output_width;
+    info->output_height = srcinfo->output_height;
+    if (info->num_components == 1) {
+      info->iMCU_sample_width = srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height = srcinfo->_min_DCT_v_scaled_size;
+    } else {
+      info->iMCU_sample_width =
+        srcinfo->max_h_samp_factor * srcinfo->_min_DCT_h_scaled_size;
+      info->iMCU_sample_height =
+        srcinfo->max_v_samp_factor * srcinfo->_min_DCT_v_scaled_size;
+    }
+    break;
+  }
+
+  /* If cropping has been requested, compute the crop area's position and
+   * dimensions, ensuring that its upper left corner falls at an iMCU boundary.
+   */
+  if (info->crop) {
+    /* Insert default values for unset crop parameters */
+    if (info->crop_xoffset_set == JCROP_UNSET)
+      info->crop_xoffset = 0;   /* default to +0 */
+    if (info->crop_yoffset_set == JCROP_UNSET)
+      info->crop_yoffset = 0;   /* default to +0 */
+    if (info->crop_width_set == JCROP_UNSET) {
+      if (info->crop_xoffset >= info->output_width)
+        ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      info->crop_width = info->output_width - info->crop_xoffset;
+    } else {
+      /* Check for crop extension */
+      if (info->crop_width > info->output_width) {
+        /* Crop extension does not work when transforming! */
+        if (info->transform != JXFORM_NONE ||
+            info->crop_xoffset >= info->crop_width ||
+            info->crop_xoffset > info->crop_width - info->output_width)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+        if (info->crop_xoffset >= info->output_width ||
+            info->crop_width <= 0 ||
+            info->crop_xoffset > info->output_width - info->crop_width)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    if (info->crop_height_set == JCROP_UNSET) {
+      if (info->crop_yoffset >= info->output_height)
+        ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      info->crop_height = info->output_height - info->crop_yoffset;
+    } else {
+      /* Check for crop extension */
+      if (info->crop_height > info->output_height) {
+        /* Crop extension does not work when transforming! */
+        if (info->transform != JXFORM_NONE ||
+            info->crop_yoffset >= info->crop_height ||
+            info->crop_yoffset > info->crop_height - info->output_height)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      } else {
+        if (info->crop_yoffset >= info->output_height ||
+            info->crop_height <= 0 ||
+            info->crop_yoffset > info->output_height - info->crop_height)
+          ERREXIT(srcinfo, JERR_BAD_CROP_SPEC);
+      }
+    }
+    /* Convert negative crop offsets into regular offsets */
+    if (info->crop_xoffset_set != JCROP_NEG)
+      xoffset = info->crop_xoffset;
+    else if (info->crop_width > info->output_width) /* crop extension */
+      xoffset = info->crop_width - info->output_width - info->crop_xoffset;
+    else
+      xoffset = info->output_width - info->crop_width - info->crop_xoffset;
+    if (info->crop_yoffset_set != JCROP_NEG)
+      yoffset = info->crop_yoffset;
+    else if (info->crop_height > info->output_height) /* crop extension */
+      yoffset = info->crop_height - info->output_height - info->crop_yoffset;
+    else
+      yoffset = info->output_height - info->crop_height - info->crop_yoffset;
+    /* Now adjust so that upper left corner falls at an iMCU boundary */
+    switch (info->transform) {
+    case JXFORM_DROP:
+      /* Ensure the effective drop region will not exceed the requested */
+      itemp = info->iMCU_sample_width;
+      dtemp = itemp - 1 - ((xoffset + itemp - 1) % itemp);
+      xoffset += dtemp;
+      if (info->crop_width <= dtemp)
+        info->drop_width = 0;
+      else if (xoffset + info->crop_width - dtemp == info->output_width)
+        /* Matching right edge: include partial iMCU */
+        info->drop_width = (info->crop_width - dtemp + itemp - 1) / itemp;
+      else
+        info->drop_width = (info->crop_width - dtemp) / itemp;
+      itemp = info->iMCU_sample_height;
+      dtemp = itemp - 1 - ((yoffset + itemp - 1) % itemp);
+      yoffset += dtemp;
+      if (info->crop_height <= dtemp)
+        info->drop_height = 0;
+      else if (yoffset + info->crop_height - dtemp == info->output_height)
+        /* Matching bottom edge: include partial iMCU */
+        info->drop_height = (info->crop_height - dtemp + itemp - 1) / itemp;
+      else
+        info->drop_height = (info->crop_height - dtemp) / itemp;
+      /* Check if sampling factors match for dropping */
+      if (info->drop_width != 0 && info->drop_height != 0)
+        for (ci = 0; ci < info->num_components &&
+                     ci < info->drop_ptr->num_components; ci++) {
+          if (info->drop_ptr->comp_info[ci].h_samp_factor *
+              srcinfo->max_h_samp_factor !=
+              srcinfo->comp_info[ci].h_samp_factor *
+              info->drop_ptr->max_h_samp_factor)
+            ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
+              info->drop_ptr->comp_info[ci].h_samp_factor,
+              info->drop_ptr->max_h_samp_factor,
+              srcinfo->comp_info[ci].h_samp_factor,
+              srcinfo->max_h_samp_factor, 'h');
+          if (info->drop_ptr->comp_info[ci].v_samp_factor *
+              srcinfo->max_v_samp_factor !=
+              srcinfo->comp_info[ci].v_samp_factor *
+              info->drop_ptr->max_v_samp_factor)
+            ERREXIT6(srcinfo, JERR_BAD_DROP_SAMPLING, ci,
+              info->drop_ptr->comp_info[ci].v_samp_factor,
+              info->drop_ptr->max_v_samp_factor,
+              srcinfo->comp_info[ci].v_samp_factor,
+              srcinfo->max_v_samp_factor, 'v');
+        }
+      break;
+    case JXFORM_WIPE:
+      /* Ensure the effective wipe region will cover the requested */
+      info->drop_width = (JDIMENSION)jdiv_round_up
+        ((long)(info->crop_width + (xoffset % info->iMCU_sample_width)),
+         (long)info->iMCU_sample_width);
+      info->drop_height = (JDIMENSION)jdiv_round_up
+        ((long)(info->crop_height + (yoffset % info->iMCU_sample_height)),
+         (long)info->iMCU_sample_height);
+      break;
+    default:
+      /* Ensure the effective crop region will cover the requested */
+      if (info->crop_width_set == JCROP_FORCE ||
+          info->crop_width > info->output_width)
+        info->output_width = info->crop_width;
+      else
+        info->output_width =
+          info->crop_width + (xoffset % info->iMCU_sample_width);
+      if (info->crop_height_set == JCROP_FORCE ||
+          info->crop_height > info->output_height)
+        info->output_height = info->crop_height;
+      else
+        info->output_height =
+          info->crop_height + (yoffset % info->iMCU_sample_height);
+    }
+    /* Save x/y offsets measured in iMCUs */
+    info->x_crop_offset = xoffset / info->iMCU_sample_width;
+    info->y_crop_offset = yoffset / info->iMCU_sample_height;
+  } else {
+    info->x_crop_offset = 0;
+    info->y_crop_offset = 0;
+  }
+
+  /* Figure out whether we need workspace arrays,
+   * and if so whether they are transposed relative to the source.
+   */
+  need_workspace = FALSE;
+  transpose_it = FALSE;
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->x_crop_offset != 0 || info->y_crop_offset != 0 ||
+        info->output_width > srcinfo->output_width ||
+        info->output_height > srcinfo->output_height)
+      need_workspace = TRUE;
+    /* No workspace needed if neither cropping nor transforming */
+    break;
+  case JXFORM_FLIP_H:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_width);
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      need_workspace = TRUE;
+    /* do_flip_h_no_crop doesn't need a workspace array */
+    break;
+  case JXFORM_FLIP_V:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_TRANSPOSE:
+    /* transpose does NOT have to trim anything */
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_TRANSVERSE:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_height);
+      trim_bottom_edge(info, srcinfo->output_width);
+    }
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_90:
+    if (info->trim)
+      trim_right_edge(info, srcinfo->output_height);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_ROT_180:
+    if (info->trim) {
+      trim_right_edge(info, srcinfo->output_width);
+      trim_bottom_edge(info, srcinfo->output_height);
+    }
+    /* Need workspace arrays having same dimensions as source image. */
+    need_workspace = TRUE;
+    break;
+  case JXFORM_ROT_270:
+    if (info->trim)
+      trim_bottom_edge(info, srcinfo->output_width);
+    /* Need workspace arrays having transposed dimensions. */
+    need_workspace = TRUE;
+    transpose_it = TRUE;
+    break;
+  case JXFORM_WIPE:
+    break;
+  case JXFORM_DROP:
+    break;
+  }
+
+  /* Allocate workspace if needed.
+   * Note that we allocate arrays padded out to the next iMCU boundary,
+   * so that transform routines need not worry about missing edge blocks.
+   */
+  if (need_workspace) {
+    coef_arrays = (jvirt_barray_ptr *)
+      (*srcinfo->mem->alloc_small) ((j_common_ptr)srcinfo, JPOOL_IMAGE,
+                sizeof(jvirt_barray_ptr) * info->num_components);
+    width_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long)info->output_width, (long)info->iMCU_sample_width);
+    height_in_iMCUs = (JDIMENSION)
+      jdiv_round_up((long)info->output_height, (long)info->iMCU_sample_height);
+    for (ci = 0; ci < info->num_components; ci++) {
+      compptr = srcinfo->comp_info + ci;
+      if (info->num_components == 1) {
+        /* we're going to force samp factors to 1x1 in this case */
+        h_samp_factor = v_samp_factor = 1;
+      } else if (transpose_it) {
+        h_samp_factor = compptr->v_samp_factor;
+        v_samp_factor = compptr->h_samp_factor;
+      } else {
+        h_samp_factor = compptr->h_samp_factor;
+        v_samp_factor = compptr->v_samp_factor;
+      }
+      width_in_blocks = width_in_iMCUs * h_samp_factor;
+      height_in_blocks = height_in_iMCUs * v_samp_factor;
+      coef_arrays[ci] = (*srcinfo->mem->request_virt_barray)
+        ((j_common_ptr)srcinfo, JPOOL_IMAGE, FALSE,
+         width_in_blocks, height_in_blocks, (JDIMENSION)v_samp_factor);
+    }
+    info->workspace_coef_arrays = coef_arrays;
+  } else
+    info->workspace_coef_arrays = NULL;
+
+  return TRUE;
+}
+
+
+/* Transpose destination image parameters */
+
+LOCAL(void)
+transpose_critical_parameters(j_compress_ptr dstinfo)
+{
+  int tblno, i, j, ci, itemp;
+  jpeg_component_info *compptr;
+  JQUANT_TBL *qtblptr;
+  JDIMENSION jtemp;
+  UINT16 qtemp;
+
+  /* Transpose image dimensions */
+  jtemp = dstinfo->image_width;
+  dstinfo->image_width = dstinfo->image_height;
+  dstinfo->image_height = jtemp;
+#if JPEG_LIB_VERSION >= 70
+  itemp = dstinfo->min_DCT_h_scaled_size;
+  dstinfo->min_DCT_h_scaled_size = dstinfo->min_DCT_v_scaled_size;
+  dstinfo->min_DCT_v_scaled_size = itemp;
+#endif
+
+  /* Transpose sampling factors */
+  for (ci = 0; ci < dstinfo->num_components; ci++) {
+    compptr = dstinfo->comp_info + ci;
+    itemp = compptr->h_samp_factor;
+    compptr->h_samp_factor = compptr->v_samp_factor;
+    compptr->v_samp_factor = itemp;
+  }
+
+  /* Transpose quantization tables */
+  for (tblno = 0; tblno < NUM_QUANT_TBLS; tblno++) {
+    qtblptr = dstinfo->quant_tbl_ptrs[tblno];
+    if (qtblptr != NULL) {
+      for (i = 0; i < DCTSIZE; i++) {
+        for (j = 0; j < i; j++) {
+          qtemp = qtblptr->quantval[i * DCTSIZE + j];
+          qtblptr->quantval[i * DCTSIZE + j] =
+            qtblptr->quantval[j * DCTSIZE + i];
+          qtblptr->quantval[j * DCTSIZE + i] = qtemp;
+        }
+      }
+    }
+  }
+}
+
+
+/* Adjust Exif image parameters.
+ *
+ * We try to adjust the Tags ExifImageWidth and ExifImageHeight if possible.
+ */
+
+LOCAL(void)
+adjust_exif_parameters(JOCTET *data, unsigned int length, JDIMENSION new_width,
+                       JDIMENSION new_height)
+{
+  boolean is_motorola; /* Flag for byte order */
+  unsigned int number_of_tags, tagnum;
+  unsigned int firstoffset, offset;
+  JDIMENSION new_value;
+
+  if (length < 12) return; /* Length of an IFD entry */
+
+  /* Discover byte order */
+  if (data[0] == 0x49 && data[1] == 0x49)
+    is_motorola = FALSE;
+  else if (data[0] == 0x4D && data[1] == 0x4D)
+    is_motorola = TRUE;
+  else
+    return;
+
+  /* Check Tag Mark */
+  if (is_motorola) {
+    if (data[2] != 0) return;
+    if (data[3] != 0x2A) return;
+  } else {
+    if (data[3] != 0) return;
+    if (data[2] != 0x2A) return;
+  }
+
+  /* Get first IFD offset (offset to IFD0) */
+  if (is_motorola) {
+    if (data[4] != 0) return;
+    if (data[5] != 0) return;
+    firstoffset = data[6];
+    firstoffset <<= 8;
+    firstoffset += data[7];
+  } else {
+    if (data[7] != 0) return;
+    if (data[6] != 0) return;
+    firstoffset = data[5];
+    firstoffset <<= 8;
+    firstoffset += data[4];
+  }
+  if (firstoffset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this IFD */
+  if (is_motorola) {
+    number_of_tags = data[firstoffset];
+    number_of_tags <<= 8;
+    number_of_tags += data[firstoffset + 1];
+  } else {
+    number_of_tags = data[firstoffset + 1];
+    number_of_tags <<= 8;
+    number_of_tags += data[firstoffset];
+  }
+  if (number_of_tags == 0) return;
+  firstoffset += 2;
+
+  /* Search for ExifSubIFD offset Tag in IFD0 */
+  for (;;) {
+    if (firstoffset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = data[firstoffset];
+      tagnum <<= 8;
+      tagnum += data[firstoffset + 1];
+    } else {
+      tagnum = data[firstoffset + 1];
+      tagnum <<= 8;
+      tagnum += data[firstoffset];
+    }
+    if (tagnum == 0x8769) break; /* found ExifSubIFD offset Tag */
+    if (--number_of_tags == 0) return;
+    firstoffset += 12;
+  }
+
+  /* Get the ExifSubIFD offset */
+  if (is_motorola) {
+    if (data[firstoffset + 8] != 0) return;
+    if (data[firstoffset + 9] != 0) return;
+    offset = data[firstoffset + 10];
+    offset <<= 8;
+    offset += data[firstoffset + 11];
+  } else {
+    if (data[firstoffset + 11] != 0) return;
+    if (data[firstoffset + 10] != 0) return;
+    offset = data[firstoffset + 9];
+    offset <<= 8;
+    offset += data[firstoffset + 8];
+  }
+  if (offset > length - 2) return; /* check end of data segment */
+
+  /* Get the number of directory entries contained in this SubIFD */
+  if (is_motorola) {
+    number_of_tags = data[offset];
+    number_of_tags <<= 8;
+    number_of_tags += data[offset + 1];
+  } else {
+    number_of_tags = data[offset + 1];
+    number_of_tags <<= 8;
+    number_of_tags += data[offset];
+  }
+  if (number_of_tags < 2) return;
+  offset += 2;
+
+  /* Search for ExifImageWidth and ExifImageHeight Tags in this SubIFD */
+  do {
+    if (offset > length - 12) return; /* check end of data segment */
+    /* Get Tag number */
+    if (is_motorola) {
+      tagnum = data[offset];
+      tagnum <<= 8;
+      tagnum += data[offset + 1];
+    } else {
+      tagnum = data[offset + 1];
+      tagnum <<= 8;
+      tagnum += data[offset];
+    }
+    if (tagnum == 0xA002 || tagnum == 0xA003) {
+      if (tagnum == 0xA002)
+        new_value = new_width; /* ExifImageWidth Tag */
+      else
+        new_value = new_height; /* ExifImageHeight Tag */
+      if (is_motorola) {
+        data[offset + 2] = 0; /* Format = unsigned long (4 octets) */
+        data[offset + 3] = 4;
+        data[offset + 4] = 0; /* Number Of Components = 1 */
+        data[offset + 5] = 0;
+        data[offset + 6] = 0;
+        data[offset + 7] = 1;
+        data[offset + 8] = 0;
+        data[offset + 9] = 0;
+        data[offset + 10] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset + 11] = (JOCTET)(new_value & 0xFF);
+      } else {
+        data[offset + 2] = 4; /* Format = unsigned long (4 octets) */
+        data[offset + 3] = 0;
+        data[offset + 4] = 1; /* Number Of Components = 1 */
+        data[offset + 5] = 0;
+        data[offset + 6] = 0;
+        data[offset + 7] = 0;
+        data[offset + 8] = (JOCTET)(new_value & 0xFF);
+        data[offset + 9] = (JOCTET)((new_value >> 8) & 0xFF);
+        data[offset + 10] = 0;
+        data[offset + 11] = 0;
+      }
+    }
+    offset += 12;
+  } while (--number_of_tags);
+}
+
+
+/* Adjust output image parameters as needed.
+ *
+ * This must be called after jpeg_copy_critical_parameters()
+ * and before jpeg_write_coefficients().
+ *
+ * The return value is the set of virtual coefficient arrays to be written
+ * (either the ones allocated by jtransform_request_workspace, or the
+ * original source data arrays).  The caller will need to pass this value
+ * to jpeg_write_coefficients().
+ */
+
+GLOBAL(jvirt_barray_ptr *)
+jtransform_adjust_parameters(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                             jvirt_barray_ptr *src_coef_arrays,
+                             jpeg_transform_info *info)
+{
+  /* If force-to-grayscale is requested, adjust destination parameters */
+  if (info->force_grayscale) {
+    /* First, ensure we have YCbCr or grayscale data, and that the source's
+     * Y channel is full resolution.  (No reasonable person would make Y
+     * be less than full resolution, so actually coping with that case
+     * isn't worth extra code space.  But we check it to avoid crashing.)
+     */
+    if (((dstinfo->jpeg_color_space == JCS_YCbCr &&
+          dstinfo->num_components == 3) ||
+         (dstinfo->jpeg_color_space == JCS_GRAYSCALE &&
+          dstinfo->num_components == 1)) &&
+        srcinfo->comp_info[0].h_samp_factor == srcinfo->max_h_samp_factor &&
+        srcinfo->comp_info[0].v_samp_factor == srcinfo->max_v_samp_factor) {
+      /* We use jpeg_set_colorspace to make sure subsidiary settings get fixed
+       * properly.  Among other things, it sets the target h_samp_factor &
+       * v_samp_factor to 1, which typically won't match the source.
+       * We have to preserve the source's quantization table number, however.
+       */
+      int sv_quant_tbl_no = dstinfo->comp_info[0].quant_tbl_no;
+      jpeg_set_colorspace(dstinfo, JCS_GRAYSCALE);
+      dstinfo->comp_info[0].quant_tbl_no = sv_quant_tbl_no;
+    } else {
+      /* Sorry, can't do it */
+      ERREXIT(dstinfo, JERR_CONVERSION_NOTIMPL);
+    }
+  } else if (info->num_components == 1) {
+    /* For a single-component source, we force the destination sampling factors
+     * to 1x1, with or without force_grayscale.  This is useful because some
+     * decoders choke on grayscale images with other sampling factors.
+     */
+    dstinfo->comp_info[0].h_samp_factor = 1;
+    dstinfo->comp_info[0].v_samp_factor = 1;
+  }
+
+  /* Correct the destination's image dimensions as necessary
+   * for rotate/flip, resize, and crop operations.
+   */
+#if JPEG_LIB_VERSION >= 80
+  dstinfo->jpeg_width = info->output_width;
+  dstinfo->jpeg_height = info->output_height;
+#endif
+
+  /* Transpose destination image parameters, adjust quantization */
+  switch (info->transform) {
+  case JXFORM_TRANSPOSE:
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_90:
+  case JXFORM_ROT_270:
+#if JPEG_LIB_VERSION < 80
+    dstinfo->image_width = info->output_height;
+    dstinfo->image_height = info->output_width;
+#endif
+    transpose_critical_parameters(dstinfo);
+    break;
+  case JXFORM_DROP:
+    if (info->drop_width != 0 && info->drop_height != 0)
+      adjust_quant(srcinfo, src_coef_arrays,
+                   info->drop_ptr, info->drop_coef_arrays,
+                   info->trim, dstinfo);
+    break;
+  default:
+#if JPEG_LIB_VERSION < 80
+    dstinfo->image_width = info->output_width;
+    dstinfo->image_height = info->output_height;
+#endif
+    break;
+  }
+
+  /* Adjust Exif properties */
+  if (srcinfo->marker_list != NULL &&
+      srcinfo->marker_list->marker == JPEG_APP0 + 1 &&
+      srcinfo->marker_list->data_length >= 6 &&
+      srcinfo->marker_list->data[0] == 0x45 &&
+      srcinfo->marker_list->data[1] == 0x78 &&
+      srcinfo->marker_list->data[2] == 0x69 &&
+      srcinfo->marker_list->data[3] == 0x66 &&
+      srcinfo->marker_list->data[4] == 0 &&
+      srcinfo->marker_list->data[5] == 0) {
+    /* Suppress output of JFIF marker */
+    dstinfo->write_JFIF_header = FALSE;
+    /* Adjust Exif image parameters */
+#if JPEG_LIB_VERSION >= 80
+    if (dstinfo->jpeg_width != srcinfo->image_width ||
+        dstinfo->jpeg_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+                             srcinfo->marker_list->data_length - 6,
+                             dstinfo->jpeg_width, dstinfo->jpeg_height);
+#else
+    if (dstinfo->image_width != srcinfo->image_width ||
+        dstinfo->image_height != srcinfo->image_height)
+      /* Align data segment to start of TIFF structure for parsing */
+      adjust_exif_parameters(srcinfo->marker_list->data + 6,
+                             srcinfo->marker_list->data_length - 6,
+                             dstinfo->image_width, dstinfo->image_height);
+#endif
+  }
+
+  /* Return the appropriate output data set */
+  if (info->workspace_coef_arrays != NULL)
+    return info->workspace_coef_arrays;
+  return src_coef_arrays;
+}
+
+
+/* Execute the actual transformation, if any.
+ *
+ * This must be called *after* jpeg_write_coefficients, because it depends
+ * on jpeg_write_coefficients to have computed subsidiary values such as
+ * the per-component width and height fields in the destination object.
+ *
+ * Note that some transformations will modify the source data arrays!
+ */
+
+GLOBAL(void)
+jtransform_execute_transform(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                             jvirt_barray_ptr *src_coef_arrays,
+                             jpeg_transform_info *info)
+{
+  jvirt_barray_ptr *dst_coef_arrays = info->workspace_coef_arrays;
+
+  /* Note: conditions tested here should match those in switch statement
+   * in jtransform_request_workspace()
+   */
+  switch (info->transform) {
+  case JXFORM_NONE:
+    if (info->output_width > srcinfo->output_width ||
+        info->output_height > srcinfo->output_height) {
+      if (info->output_width > srcinfo->output_width &&
+          info->crop_width_set == JCROP_REFLECT)
+        do_crop_ext_reflect(srcinfo, dstinfo,
+                            info->x_crop_offset, info->y_crop_offset,
+                            src_coef_arrays, dst_coef_arrays);
+      else if (info->output_width > srcinfo->output_width &&
+               info->crop_width_set == JCROP_FORCE)
+        do_crop_ext_flat(srcinfo, dstinfo,
+                         info->x_crop_offset, info->y_crop_offset,
+                         src_coef_arrays, dst_coef_arrays);
+      else
+        do_crop_ext_zero(srcinfo, dstinfo,
+                         info->x_crop_offset, info->y_crop_offset,
+                         src_coef_arrays, dst_coef_arrays);
+    } else if (info->x_crop_offset != 0 || info->y_crop_offset != 0)
+      do_crop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_FLIP_H:
+    if (info->y_crop_offset != 0 || info->slow_hflip)
+      do_flip_h(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                src_coef_arrays, dst_coef_arrays);
+    else
+      do_flip_h_no_crop(srcinfo, dstinfo, info->x_crop_offset,
+                        src_coef_arrays);
+    break;
+  case JXFORM_FLIP_V:
+    do_flip_v(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSPOSE:
+    do_transpose(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                 src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_TRANSVERSE:
+    do_transverse(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                  src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_90:
+    do_rot_90(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_180:
+    do_rot_180(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_ROT_270:
+    do_rot_270(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+               src_coef_arrays, dst_coef_arrays);
+    break;
+  case JXFORM_WIPE:
+    if (info->crop_width_set == JCROP_REFLECT &&
+        info->y_crop_offset == 0 && info->drop_height ==
+        (JDIMENSION)jdiv_round_up
+          ((long)info->output_height, (long)info->iMCU_sample_height) &&
+        (info->x_crop_offset == 0 ||
+         info->x_crop_offset + info->drop_width ==
+         (JDIMENSION)jdiv_round_up
+           ((long)info->output_width, (long)info->iMCU_sample_width)))
+      do_reflect(srcinfo, dstinfo, info->x_crop_offset,
+                 src_coef_arrays, info->drop_width, info->drop_height);
+    else if (info->crop_width_set == JCROP_FORCE)
+      do_flatten(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+                 src_coef_arrays, info->drop_width, info->drop_height);
+    else
+      do_wipe(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, info->drop_width, info->drop_height);
+    break;
+  case JXFORM_DROP:
+    if (info->drop_width != 0 && info->drop_height != 0)
+      do_drop(srcinfo, dstinfo, info->x_crop_offset, info->y_crop_offset,
+              src_coef_arrays, info->drop_ptr, info->drop_coef_arrays,
+              info->drop_width, info->drop_height);
+    break;
+  }
+}
+
+/* jtransform_perfect_transform
+ *
+ * Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ *
+ * Inputs:
+ *   image_width, image_height: source image dimensions.
+ *   MCU_width, MCU_height: pixel dimensions of MCU.
+ *   transform: transformation identifier.
+ * Parameter sources from initialized jpeg_struct
+ * (after reading source header):
+ *   image_width = cinfo.image_width
+ *   image_height = cinfo.image_height
+ *   MCU_width = cinfo.max_h_samp_factor * cinfo.block_size
+ *   MCU_height = cinfo.max_v_samp_factor * cinfo.block_size
+ * Result:
+ *   TRUE = perfect transformation possible
+ *   FALSE = perfect transformation not possible
+ *           (may use custom action then)
+ */
+
+GLOBAL(boolean)
+jtransform_perfect_transform(JDIMENSION image_width, JDIMENSION image_height,
+                             int MCU_width, int MCU_height,
+                             JXFORM_CODE transform)
+{
+  boolean result = TRUE; /* initialize TRUE */
+
+  switch (transform) {
+  case JXFORM_FLIP_H:
+  case JXFORM_ROT_270:
+    if (image_width % (JDIMENSION)MCU_width)
+      result = FALSE;
+    break;
+  case JXFORM_FLIP_V:
+  case JXFORM_ROT_90:
+    if (image_height % (JDIMENSION)MCU_height)
+      result = FALSE;
+    break;
+  case JXFORM_TRANSVERSE:
+  case JXFORM_ROT_180:
+    if (image_width % (JDIMENSION)MCU_width)
+      result = FALSE;
+    if (image_height % (JDIMENSION)MCU_height)
+      result = FALSE;
+    break;
+  default:
+    break;
+  }
+
+  return result;
+}
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/* Setup decompression object to save desired markers in memory.
+ * This must be called before jpeg_read_header() to have the desired effect.
+ */
+
+GLOBAL(void)
+jcopy_markers_setup(j_decompress_ptr srcinfo, JCOPY_OPTION option)
+{
+#ifdef SAVE_MARKERS_SUPPORTED
+  int m;
+
+  /* Save comments unless JCOPYOPT_NONE or JCOPYOPT_ICC specified */
+  if (option != JCOPYOPT_NONE && option != JCOPYOPT_ICC) {
+    jpeg_save_markers(srcinfo, JPEG_COM, 0xFFFF);
+  }
+  /* Save all APPn markers iff JCOPYOPT_ALL* specified ... */
+  if (option == JCOPYOPT_ALL || option == JCOPYOPT_ALL_EXCEPT_ICC) {
+    for (m = 0; m < 16; m++) {
+      /* ... except APP2 markers if JCOPYOPT_ALL_EXCEPT_ICC specified */
+      if (option == JCOPYOPT_ALL_EXCEPT_ICC && m == 2)
+        continue;
+      jpeg_save_markers(srcinfo, JPEG_APP0 + m, 0xFFFF);
+    }
+  }
+  /* Save only APP2 markers if JCOPYOPT_ICC specified */
+  if (option == JCOPYOPT_ICC) {
+    jpeg_save_markers(srcinfo, JPEG_APP0 + 2, 0xFFFF);
+  }
+#endif /* SAVE_MARKERS_SUPPORTED */
+}
+
+/* Copy markers saved in the given source object to the destination object.
+ * This should be called just after jpeg_start_compress() or
+ * jpeg_write_coefficients().
+ * Note that those routines will have written the SOI, and also the
+ * JFIF APP0 or Adobe APP14 markers if selected.
+ */
+
+GLOBAL(void)
+jcopy_markers_execute(j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+                      JCOPY_OPTION option)
+{
+  jpeg_saved_marker_ptr marker;
+
+  for (marker = srcinfo->marker_list; marker != NULL; marker = marker->next) {
+    if (option == JCOPYOPT_NONE)
+      continue;
+    else if (option == JCOPYOPT_COMMENTS) {
+      if (marker->marker != JPEG_COM)
+        continue;
+    } else if (option == JCOPYOPT_ALL_EXCEPT_ICC) {
+      if (marker->marker == JPEG_APP0 + 2)
+        continue;
+    } else if (option == JCOPYOPT_ICC) {
+      if (marker->marker != JPEG_APP0 + 2)
+        continue;
+    }
+    /* To avoid confusion, we do not output JFIF and Adobe APP14 markers if the
+     * encoder library already wrote one.
+     */
+    if (dstinfo->write_JFIF_header &&
+        marker->marker == JPEG_APP0 &&
+        marker->data_length >= 5 &&
+        marker->data[0] == 0x4A &&
+        marker->data[1] == 0x46 &&
+        marker->data[2] == 0x49 &&
+        marker->data[3] == 0x46 &&
+        marker->data[4] == 0)
+      continue;                 /* reject duplicate JFIF */
+    if (dstinfo->write_Adobe_marker &&
+        marker->marker == JPEG_APP0 + 14 &&
+        marker->data_length >= 5 &&
+        marker->data[0] == 0x41 &&
+        marker->data[1] == 0x64 &&
+        marker->data[2] == 0x6F &&
+        marker->data[3] == 0x62 &&
+        marker->data[4] == 0x65)
+      continue;                 /* reject duplicate Adobe */
+    jpeg_write_marker(dstinfo, marker->marker,
+                      marker->data, marker->data_length);
+  }
+}
diff --git a/thirdparty/libjpeg-turbo/src/transupp.h b/thirdparty/libjpeg-turbo/src/transupp.h
new file mode 100644
index 00000000000..cea1f409214
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/transupp.h
@@ -0,0 +1,231 @@
+/*
+ * transupp.h
+ *
+ * This file was part of the Independent JPEG Group's software:
+ * Copyright (C) 1997-2019, Thomas G. Lane, Guido Vollbeding.
+ * libjpeg-turbo Modifications:
+ * Copyright (C) 2017, 2021, D. R. Commander.
+ * For conditions of distribution and use, see the accompanying README.ijg
+ * file.
+ *
+ * This file contains declarations for image transformation routines and
+ * other utility code used by the jpegtran sample application.  These are
+ * NOT part of the core JPEG library.  But we keep these routines separate
+ * from jpegtran.c to ease the task of maintaining jpegtran-like programs
+ * that have other user interfaces.
+ *
+ * NOTE: all the routines declared here have very specific requirements
+ * about when they are to be executed during the reading and writing of the
+ * source and destination files.  See the comments in transupp.c, or see
+ * jpegtran.c for an example of correct usage.
+ */
+
+/* If you happen not to want the image transform support, disable it here */
+#ifndef TRANSFORMS_SUPPORTED
+#define TRANSFORMS_SUPPORTED  1         /* 0 disables transform code */
+#endif
+
+/*
+ * Although rotating and flipping data expressed as DCT coefficients is not
+ * hard, there is an asymmetry in the JPEG format specification for images
+ * whose dimensions aren't multiples of the iMCU size.  The right and bottom
+ * image edges are padded out to the next iMCU boundary with junk data; but
+ * no padding is possible at the top and left edges.  If we were to flip
+ * the whole image including the pad data, then pad garbage would become
+ * visible at the top and/or left, and real pixels would disappear into the
+ * pad margins --- perhaps permanently, since encoders & decoders may not
+ * bother to preserve DCT blocks that appear to be completely outside the
+ * nominal image area.  So, we have to exclude any partial iMCUs from the
+ * basic transformation.
+ *
+ * Transpose is the only transformation that can handle partial iMCUs at the
+ * right and bottom edges completely cleanly.  flip_h can flip partial iMCUs
+ * at the bottom, but leaves any partial iMCUs at the right edge untouched.
+ * Similarly flip_v leaves any partial iMCUs at the bottom edge untouched.
+ * The other transforms are defined as combinations of these basic transforms
+ * and process edge blocks in a way that preserves the equivalence.
+ *
+ * The "trim" option causes untransformable partial iMCUs to be dropped;
+ * this is not strictly lossless, but it usually gives the best-looking
+ * result for odd-size images.  Note that when this option is active,
+ * the expected mathematical equivalences between the transforms may not hold.
+ * (For example, -rot 270 -trim trims only the bottom edge, but -rot 90 -trim
+ * followed by -rot 180 -trim trims both edges.)
+ *
+ * We also offer a lossless-crop option, which discards data outside a given
+ * image region but losslessly preserves what is inside.  Like the rotate and
+ * flip transforms, lossless crop is restricted by the JPEG format: the upper
+ * left corner of the selected region must fall on an iMCU boundary.  If this
+ * does not hold for the given crop parameters, we silently move the upper left
+ * corner up and/or left to make it so, simultaneously increasing the region
+ * dimensions to keep the lower right crop corner unchanged.  (Thus, the
+ * output image covers at least the requested region, but may cover more.)
+ * The adjustment of the region dimensions may be optionally disabled.
+ *
+ * A complementary lossless wipe option is provided to discard (gray out) data
+ * inside a given image region while losslessly preserving what is outside.
+ * A lossless drop option is also provided, which allows another JPEG image to
+ * be inserted ("dropped") into the source image data at a given position,
+ * replacing the existing image data at that position.  Both the source image
+ * and the drop image must have the same subsampling level.  It is best if they
+ * also have the same quantization (quality.)  Otherwise, the quantization of
+ * the output image will be adapted to accommodate the higher of the source
+ * image quality and the drop image quality.  The trim option can be used with
+ * the drop option to requantize the drop image to match the source image.
+ *
+ * We also provide a lossless-resize option, which is kind of a lossless-crop
+ * operation in the DCT coefficient block domain - it discards higher-order
+ * coefficients and losslessly preserves lower-order coefficients of a
+ * sub-block.
+ *
+ * Rotate/flip transform, resize, and crop can be requested together in a
+ * single invocation.  The crop is applied last --- that is, the crop region
+ * is specified in terms of the destination image after transform/resize.
+ *
+ * We also offer a "force to grayscale" option, which simply discards the
+ * chrominance channels of a YCbCr image.  This is lossless in the sense that
+ * the luminance channel is preserved exactly.  It's not the same kind of
+ * thing as the rotate/flip transformations, but it's convenient to handle it
+ * as part of this package, mainly because the transformation routines have to
+ * be aware of the option to know how many components to work on.
+ */
+
+
+/*
+ * Codes for supported types of image transformations.
+ */
+
+typedef enum {
+  JXFORM_NONE,            /* no transformation */
+  JXFORM_FLIP_H,          /* horizontal flip */
+  JXFORM_FLIP_V,          /* vertical flip */
+  JXFORM_TRANSPOSE,       /* transpose across UL-to-LR axis */
+  JXFORM_TRANSVERSE,      /* transpose across UR-to-LL axis */
+  JXFORM_ROT_90,          /* 90-degree clockwise rotation */
+  JXFORM_ROT_180,         /* 180-degree rotation */
+  JXFORM_ROT_270,         /* 270-degree clockwise (or 90 ccw) */
+  JXFORM_WIPE,            /* wipe */
+  JXFORM_DROP             /* drop */
+} JXFORM_CODE;
+
+/*
+ * Codes for crop parameters, which can individually be unspecified,
+ * positive or negative for xoffset or yoffset,
+ * positive or force or reflect for width or height.
+ */
+
+typedef enum {
+  JCROP_UNSET,
+  JCROP_POS,
+  JCROP_NEG,
+  JCROP_FORCE,
+  JCROP_REFLECT
+} JCROP_CODE;
+
+/*
+ * Transform parameters struct.
+ * NB: application must not change any elements of this struct after
+ * calling jtransform_request_workspace.
+ */
+
+typedef struct {
+  /* Options: set by caller */
+  JXFORM_CODE transform;        /* image transform operator */
+  boolean perfect;              /* if TRUE, fail if partial MCUs are requested */
+  boolean trim;                 /* if TRUE, trim partial MCUs as needed */
+  boolean force_grayscale;      /* if TRUE, convert color image to grayscale */
+  boolean crop;                 /* if TRUE, crop or wipe source image, or drop */
+  boolean slow_hflip;  /* For best performance, the JXFORM_FLIP_H transform
+                          normally modifies the source coefficients in place.
+                          Setting this to TRUE will instead use a slower,
+                          double-buffered algorithm, which leaves the source
+                          coefficients in tact (necessary if other transformed
+                          images must be generated from the same set of
+                          coefficients. */
+
+  /* Crop parameters: application need not set these unless crop is TRUE.
+   * These can be filled in by jtransform_parse_crop_spec().
+   */
+  JDIMENSION crop_width;        /* Width of selected region */
+  JCROP_CODE crop_width_set;    /* (force-disables adjustment) */
+  JDIMENSION crop_height;       /* Height of selected region */
+  JCROP_CODE crop_height_set;   /* (force-disables adjustment) */
+  JDIMENSION crop_xoffset;      /* X offset of selected region */
+  JCROP_CODE crop_xoffset_set;  /* (negative measures from right edge) */
+  JDIMENSION crop_yoffset;      /* Y offset of selected region */
+  JCROP_CODE crop_yoffset_set;  /* (negative measures from bottom edge) */
+
+  /* Drop parameters: set by caller for drop request */
+  j_decompress_ptr drop_ptr;
+  jvirt_barray_ptr *drop_coef_arrays;
+
+  /* Internal workspace: caller should not touch these */
+  int num_components;           /* # of components in workspace */
+  jvirt_barray_ptr *workspace_coef_arrays; /* workspace for transformations */
+  JDIMENSION output_width;      /* cropped destination dimensions */
+  JDIMENSION output_height;
+  JDIMENSION x_crop_offset;     /* destination crop offsets measured in iMCUs */
+  JDIMENSION y_crop_offset;
+  JDIMENSION drop_width;        /* drop/wipe dimensions measured in iMCUs */
+  JDIMENSION drop_height;
+  int iMCU_sample_width;        /* destination iMCU size */
+  int iMCU_sample_height;
+} jpeg_transform_info;
+
+
+#if TRANSFORMS_SUPPORTED
+
+/* Parse a crop specification (written in X11 geometry style) */
+EXTERN(boolean) jtransform_parse_crop_spec(jpeg_transform_info *info,
+                                           const char *spec);
+/* Request any required workspace */
+EXTERN(boolean) jtransform_request_workspace(j_decompress_ptr srcinfo,
+                                             jpeg_transform_info *info);
+/* Adjust output image parameters */
+EXTERN(jvirt_barray_ptr *) jtransform_adjust_parameters
+  (j_decompress_ptr srcinfo, j_compress_ptr dstinfo,
+   jvirt_barray_ptr *src_coef_arrays, jpeg_transform_info *info);
+/* Execute the actual transformation, if any */
+EXTERN(void) jtransform_execute_transform(j_decompress_ptr srcinfo,
+                                          j_compress_ptr dstinfo,
+                                          jvirt_barray_ptr *src_coef_arrays,
+                                          jpeg_transform_info *info);
+/* Determine whether lossless transformation is perfectly
+ * possible for a specified image and transformation.
+ */
+EXTERN(boolean) jtransform_perfect_transform(JDIMENSION image_width,
+                                             JDIMENSION image_height,
+                                             int MCU_width, int MCU_height,
+                                             JXFORM_CODE transform);
+
+/* jtransform_execute_transform used to be called
+ * jtransform_execute_transformation, but some compilers complain about
+ * routine names that long.  This macro is here to avoid breaking any
+ * old source code that uses the original name...
+ */
+#define jtransform_execute_transformation       jtransform_execute_transform
+
+#endif /* TRANSFORMS_SUPPORTED */
+
+
+/*
+ * Support for copying optional markers from source to destination file.
+ */
+
+typedef enum {
+  JCOPYOPT_NONE,           /* copy no optional markers */
+  JCOPYOPT_COMMENTS,       /* copy only comment (COM) markers */
+  JCOPYOPT_ALL,            /* copy all optional markers */
+  JCOPYOPT_ALL_EXCEPT_ICC, /* copy all optional markers except APP2 */
+  JCOPYOPT_ICC             /* copy only ICC profile (APP2) markers */
+} JCOPY_OPTION;
+
+#define JCOPYOPT_DEFAULT  JCOPYOPT_COMMENTS     /* recommended default */
+
+/* Setup decompression object to save desired markers in memory */
+EXTERN(void) jcopy_markers_setup(j_decompress_ptr srcinfo,
+                                 JCOPY_OPTION option);
+/* Copy markers saved in the given source object to the destination object */
+EXTERN(void) jcopy_markers_execute(j_decompress_ptr srcinfo,
+                                   j_compress_ptr dstinfo,
+                                   JCOPY_OPTION option);
diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c b/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c
new file mode 100644
index 00000000000..72f99e236ab
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/turbojpeg-mp.c
@@ -0,0 +1,297 @@
+/*
+ * Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TurboJPEG API functions that must be compiled for multiple data
+   precisions */
+
+#if BITS_IN_JSAMPLE == 8
+#define _JSAMPLE  JSAMPLE
+#define _JSAMPROW  JSAMPROW
+#define _buffer  buffer
+#define _jinit_read_ppm  jinit_read_ppm
+#define _jinit_write_ppm  jinit_write_ppm
+#define _jpeg_crop_scanline  jpeg_crop_scanline
+#define _jpeg_read_scanlines  jpeg_read_scanlines
+#define _jpeg_skip_scanlines  jpeg_skip_scanlines
+#define _jpeg_write_scanlines  jpeg_write_scanlines
+#elif BITS_IN_JSAMPLE == 12
+#define _JSAMPLE  J12SAMPLE
+#define _JSAMPROW  J12SAMPROW
+#define _buffer  buffer12
+#define _jinit_read_ppm  j12init_read_ppm
+#define _jinit_write_ppm  j12init_write_ppm
+#define _jpeg_crop_scanline  jpeg12_crop_scanline
+#define _jpeg_read_scanlines  jpeg12_read_scanlines
+#define _jpeg_skip_scanlines  jpeg12_skip_scanlines
+#define _jpeg_write_scanlines  jpeg12_write_scanlines
+#elif BITS_IN_JSAMPLE == 16
+#define _JSAMPLE  J16SAMPLE
+#define _JSAMPROW  J16SAMPROW
+#define _buffer  buffer16
+#define _jinit_read_ppm  j16init_read_ppm
+#define _jinit_write_ppm  j16init_write_ppm
+#define _jpeg_read_scanlines  jpeg16_read_scanlines
+#define _jpeg_write_scanlines  jpeg16_write_scanlines
+#endif
+
+#define _GET_NAME(name, suffix)  name##suffix
+#define GET_NAME(name, suffix)  _GET_NAME(name, suffix)
+#define _GET_STRING(name, suffix)  #name #suffix
+#define GET_STRING(name, suffix)  _GET_STRING(name, suffix)
+
+
+/******************************** Compressor *********************************/
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int GET_NAME(tj3Compress, BITS_IN_JSAMPLE)
+  (tjhandle handle, const _JSAMPLE *srcBuf, int width, int pitch, int height,
+   int pixelFormat, unsigned char **jpegBuf, size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = GET_STRING(tj3Compress, BITS_IN_JSAMPLE);
+  int i, retval = 0;
+  boolean alloc = TRUE;
+  _JSAMPROW *row_pointer = NULL;
+
+  GET_CINSTANCE(handle)
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || jpegBuf == NULL ||
+      jpegSize == NULL)
+    THROW("Invalid argument");
+
+  if (!this->lossless && this->quality == -1)
+    THROW("TJPARAM_QUALITY must be specified");
+  if (!this->lossless && this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+
+  if ((row_pointer = (_JSAMPROW *)malloc(sizeof(_JSAMPROW) * height)) == NULL)
+    THROW("Memory allocation failure");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = BITS_IN_JSAMPLE;
+#if BITS_IN_JSAMPLE == 8
+  if (this->lossless && this->precision >= 2 &&
+      this->precision <= BITS_IN_JSAMPLE)
+#else
+  if (this->lossless && this->precision >= BITS_IN_JSAMPLE - 3 &&
+      this->precision <= BITS_IN_JSAMPLE)
+#endif
+    cinfo->data_precision = this->precision;
+
+  setCompDefaults(this, pixelFormat);
+  if (this->noRealloc) alloc = FALSE;
+  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+
+  jpeg_start_compress(cinfo, TRUE);
+  if (this->iccBuf != NULL && this->iccSize != 0)
+    jpeg_write_icc_profile(cinfo, this->iccBuf, (unsigned int)this->iccSize);
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = (_JSAMPROW)&srcBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = (_JSAMPROW)&srcBuf[i * (size_t)pitch];
+  }
+  while (cinfo->next_scanline < cinfo->image_height)
+    _jpeg_write_scanlines(cinfo, &row_pointer[cinfo->next_scanline],
+                          cinfo->image_height - cinfo->next_scanline);
+  jpeg_finish_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START && alloc)
+    (*cinfo->dest->term_destination) (cinfo);
+  if (cinfo->global_state > CSTATE_START || retval == -1)
+    jpeg_abort_compress(cinfo);
+  free(row_pointer);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+
+/******************************* Decompressor ********************************/
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int GET_NAME(tj3Decompress, BITS_IN_JSAMPLE)
+  (tjhandle handle, const unsigned char *jpegBuf, size_t jpegSize,
+   _JSAMPLE *dstBuf, int pitch, int pixelFormat)
+{
+  static const char FUNCTION_NAME[] =
+    GET_STRING(tj3Decompress, BITS_IN_JSAMPLE);
+  _JSAMPROW *row_pointer = NULL;
+  int croppedHeight, i, retval = 0;
+#if BITS_IN_JSAMPLE != 16
+  int scaledWidth;
+#endif
+  struct my_progress_mgr progress;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || pitch < 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->maxPixels &&
+      (unsigned long long)this->jpegWidth * this->jpegHeight >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  this->dinfo.out_color_space = pf2cs[pixelFormat];
+#if BITS_IN_JSAMPLE != 16
+  scaledWidth = TJSCALED(dinfo->image_width, this->scalingFactor);
+#endif
+  dinfo->do_fancy_upsampling = !this->fastUpsample;
+  this->dinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+
+  dinfo->scale_num = this->scalingFactor.num;
+  dinfo->scale_denom = this->scalingFactor.denom;
+
+  jpeg_start_decompress(dinfo);
+
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.x != 0 ||
+      (this->croppingRegion.w != 0 && this->croppingRegion.w != scaledWidth)) {
+    JDIMENSION crop_x = this->croppingRegion.x;
+    JDIMENSION crop_w = this->croppingRegion.w;
+
+    _jpeg_crop_scanline(dinfo, &crop_x, &crop_w);
+    if ((int)crop_x != this->croppingRegion.x)
+      THROWI("Unexplained mismatch between specified (%d) and\n"
+             "actual (%d) cropping region left boundary",
+             this->croppingRegion.x, (int)crop_x);
+    if ((int)crop_w != this->croppingRegion.w)
+      THROWI("Unexplained mismatch between specified (%d) and\n"
+             "actual (%d) cropping region width",
+             this->croppingRegion.w, (int)crop_w);
+  }
+#endif
+
+  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
+
+  croppedHeight = dinfo->output_height;
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.y != 0 || this->croppingRegion.h != 0)
+    croppedHeight = this->croppingRegion.h;
+#endif
+  if ((row_pointer =
+       (_JSAMPROW *)malloc(sizeof(_JSAMPROW) * croppedHeight)) == NULL)
+    THROW("Memory allocation failure");
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+  for (i = 0; i < (int)croppedHeight; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = &dstBuf[(croppedHeight - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = &dstBuf[i * (size_t)pitch];
+  }
+
+#if BITS_IN_JSAMPLE != 16
+  if (this->croppingRegion.y != 0 || this->croppingRegion.h != 0) {
+    if (this->croppingRegion.y != 0) {
+      JDIMENSION lines = _jpeg_skip_scanlines(dinfo, this->croppingRegion.y);
+
+      if ((int)lines != this->croppingRegion.y)
+        THROWI("Unexplained mismatch between specified (%d) and\n"
+               "actual (%d) cropping region upper boundary",
+               this->croppingRegion.y, (int)lines);
+    }
+    while ((int)dinfo->output_scanline <
+           this->croppingRegion.y + this->croppingRegion.h)
+      _jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline -
+                                               this->croppingRegion.y],
+                           this->croppingRegion.y + this->croppingRegion.h -
+                           dinfo->output_scanline);
+    if (this->croppingRegion.y + this->croppingRegion.h !=
+        (int)dinfo->output_height) {
+      JDIMENSION lines = _jpeg_skip_scanlines(dinfo, dinfo->output_height -
+                                                     this->croppingRegion.y -
+                                                     this->croppingRegion.h);
+
+      if (lines != dinfo->output_height - this->croppingRegion.y -
+                   this->croppingRegion.h)
+        THROWI("Unexplained mismatch between specified (%d) and\n"
+               "actual (%d) cropping region lower boundary",
+               this->croppingRegion.y + this->croppingRegion.h,
+               (int)(dinfo->output_height - lines));
+    }
+  } else
+#endif
+  {
+    while (dinfo->output_scanline < dinfo->output_height)
+      _jpeg_read_scanlines(dinfo, &row_pointer[dinfo->output_scanline],
+                           dinfo->output_height - dinfo->output_scanline);
+  }
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(row_pointer);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+#undef _JSAMPLE
+#undef _JSAMPROW
+#undef _buffer
+#undef _jinit_read_ppm
+#undef _jinit_write_ppm
+#undef _jpeg_crop_scanline
+#undef _jpeg_read_scanlines
+#undef _jpeg_skip_scanlines
+#undef _jpeg_write_scanlines
diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg.c b/thirdparty/libjpeg-turbo/src/turbojpeg.c
new file mode 100644
index 00000000000..8ce446148a0
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/turbojpeg.c
@@ -0,0 +1,3097 @@
+/*
+ * Copyright (C)2009-2024 D. R. Commander.  All Rights Reserved.
+ * Copyright (C)2021 Alex Richardson.  All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* TurboJPEG/LJT:  this implements the TurboJPEG API using libjpeg or
+   libjpeg-turbo */
+
+#include <ctype.h>
+#include <limits.h>
+#if !defined(_MSC_VER) || _MSC_VER > 1600
+#include <stdint.h>
+#endif
+#include <jinclude.h>
+#define JPEG_INTERNALS
+#include <jpeglib.h>
+#include <jerror.h>
+#include <setjmp.h>
+#include <errno.h>
+#include "./turbojpeg.h"
+#include "./tjutil.h"
+#include "transupp.h"
+#include "./jpegapicomp.h"
+#include "./cdjpeg.h"
+
+extern void jpeg_mem_dest_tj(j_compress_ptr, unsigned char **, size_t *,
+                             boolean);
+extern void jpeg_mem_src_tj(j_decompress_ptr, const unsigned char *, size_t);
+
+#define PAD(v, p)  ((v + (p) - 1) & (~((p) - 1)))
+#define IS_POW2(x)  (((x) & (x - 1)) == 0)
+
+
+/* Error handling (based on example in example.c) */
+
+static THREAD_LOCAL char errStr[JMSG_LENGTH_MAX] = "No error";
+
+struct my_error_mgr {
+  struct jpeg_error_mgr pub;
+  jmp_buf setjmp_buffer;
+  void (*emit_message) (j_common_ptr, int);
+  boolean warning, stopOnWarning;
+};
+typedef struct my_error_mgr *my_error_ptr;
+
+#define JMESSAGE(code, string)  string,
+static const char *turbojpeg_message_table[] = {
+#include "cderror.h"
+  NULL
+};
+
+static void my_error_exit(j_common_ptr cinfo)
+{
+  my_error_ptr myerr = (my_error_ptr)cinfo->err;
+
+  (*cinfo->err->output_message) (cinfo);
+  longjmp(myerr->setjmp_buffer, 1);
+}
+
+/* Based on output_message() in jerror.c */
+
+static void my_output_message(j_common_ptr cinfo)
+{
+  (*cinfo->err->format_message) (cinfo, errStr);
+}
+
+static void my_emit_message(j_common_ptr cinfo, int msg_level)
+{
+  my_error_ptr myerr = (my_error_ptr)cinfo->err;
+
+  myerr->emit_message(cinfo, msg_level);
+  if (msg_level < 0) {
+    myerr->warning = TRUE;
+    if (myerr->stopOnWarning) longjmp(myerr->setjmp_buffer, 1);
+  }
+}
+
+
+/********************** Global structures, macros, etc. **********************/
+
+enum { COMPRESS = 1, DECOMPRESS = 2 };
+
+typedef struct _tjinstance {
+  struct jpeg_compress_struct cinfo;
+  struct jpeg_decompress_struct dinfo;
+  struct my_error_mgr jerr;
+  int init;
+  char errStr[JMSG_LENGTH_MAX];
+  boolean isInstanceError;
+  /* Parameters */
+  boolean bottomUp;
+  boolean noRealloc;
+  int quality;
+  int subsamp;
+  int jpegWidth;
+  int jpegHeight;
+  int precision;
+  int colorspace;
+  boolean fastUpsample;
+  boolean fastDCT;
+  boolean optimize;
+  boolean progressive;
+  int scanLimit;
+  boolean arithmetic;
+  boolean lossless;
+  int losslessPSV;
+  int losslessPt;
+  int restartIntervalBlocks;
+  int restartIntervalRows;
+  int xDensity;
+  int yDensity;
+  int densityUnits;
+  tjscalingfactor scalingFactor;
+  tjregion croppingRegion;
+  int maxMemory;
+  int maxPixels;
+  int saveMarkers;
+  unsigned char *iccBuf, *tempICCBuf;
+  size_t iccSize, tempICCSize;
+} tjinstance;
+
+static tjhandle _tjInitCompress(tjinstance *this);
+static tjhandle _tjInitDecompress(tjinstance *this);
+
+struct my_progress_mgr {
+  struct jpeg_progress_mgr pub;
+  tjinstance *this;
+};
+typedef struct my_progress_mgr *my_progress_ptr;
+
+static void my_progress_monitor(j_common_ptr dinfo)
+{
+  my_error_ptr myerr = (my_error_ptr)dinfo->err;
+  my_progress_ptr myprog = (my_progress_ptr)dinfo->progress;
+
+  if (dinfo->is_decompressor) {
+    int scan_no = ((j_decompress_ptr)dinfo)->input_scan_number;
+
+    if (scan_no > myprog->this->scanLimit) {
+      SNPRINTF(myprog->this->errStr, JMSG_LENGTH_MAX,
+               "Progressive JPEG image has more than %d scans",
+               myprog->this->scanLimit);
+      SNPRINTF(errStr, JMSG_LENGTH_MAX,
+               "Progressive JPEG image has more than %d scans",
+               myprog->this->scanLimit);
+      myprog->this->isInstanceError = TRUE;
+      myerr->warning = FALSE;
+      longjmp(myerr->setjmp_buffer, 1);
+    }
+  }
+}
+
+static const JXFORM_CODE xformtypes[TJ_NUMXOP] = {
+  JXFORM_NONE, JXFORM_FLIP_H, JXFORM_FLIP_V, JXFORM_TRANSPOSE,
+  JXFORM_TRANSVERSE, JXFORM_ROT_90, JXFORM_ROT_180, JXFORM_ROT_270
+};
+
+#define NUMSF  16
+static const tjscalingfactor sf[NUMSF] = {
+  { 2, 1 },
+  { 15, 8 },
+  { 7, 4 },
+  { 13, 8 },
+  { 3, 2 },
+  { 11, 8 },
+  { 5, 4 },
+  { 9, 8 },
+  { 1, 1 },
+  { 7, 8 },
+  { 3, 4 },
+  { 5, 8 },
+  { 1, 2 },
+  { 3, 8 },
+  { 1, 4 },
+  { 1, 8 }
+};
+
+static J_COLOR_SPACE pf2cs[TJ_NUMPF] = {
+  JCS_EXT_RGB, JCS_EXT_BGR, JCS_EXT_RGBX, JCS_EXT_BGRX, JCS_EXT_XBGR,
+  JCS_EXT_XRGB, JCS_GRAYSCALE, JCS_EXT_RGBA, JCS_EXT_BGRA, JCS_EXT_ABGR,
+  JCS_EXT_ARGB, JCS_CMYK
+};
+
+static int cs2pf[JPEG_NUMCS] = {
+  TJPF_UNKNOWN, TJPF_GRAY,
+#if RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 3
+  TJPF_RGB,
+#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 3
+  TJPF_BGR,
+#elif RGB_RED == 0 && RGB_GREEN == 1 && RGB_BLUE == 2 && RGB_PIXELSIZE == 4
+  TJPF_RGBX,
+#elif RGB_RED == 2 && RGB_GREEN == 1 && RGB_BLUE == 0 && RGB_PIXELSIZE == 4
+  TJPF_BGRX,
+#elif RGB_RED == 3 && RGB_GREEN == 2 && RGB_BLUE == 1 && RGB_PIXELSIZE == 4
+  TJPF_XBGR,
+#elif RGB_RED == 1 && RGB_GREEN == 2 && RGB_BLUE == 3 && RGB_PIXELSIZE == 4
+  TJPF_XRGB,
+#endif
+  TJPF_UNKNOWN, TJPF_CMYK, TJPF_UNKNOWN, TJPF_RGB, TJPF_RGBX, TJPF_BGR,
+  TJPF_BGRX, TJPF_XBGR, TJPF_XRGB, TJPF_RGBA, TJPF_BGRA, TJPF_ABGR, TJPF_ARGB,
+  TJPF_UNKNOWN
+};
+
+#define THROWG(m, rv) { \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME, m); \
+  retval = rv;  goto bailout; \
+}
+#ifdef _MSC_VER
+#define THROW_UNIX(m) { \
+  char strerrorBuf[80] = { 0 }; \
+  strerror_s(strerrorBuf, 80, errno); \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerrorBuf); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerrorBuf); \
+  retval = -1;  goto bailout; \
+}
+#else
+#define THROW_UNIX(m) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerror(errno)); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): %s\n%s", FUNCTION_NAME, m, \
+           strerror(errno)); \
+  retval = -1;  goto bailout; \
+}
+#endif
+#define THROWRV(m, rv) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): %s", FUNCTION_NAME, m); \
+  this->isInstanceError = TRUE;  THROWG(m, rv) \
+}
+#define THROW(m)  THROWRV(m, -1)
+#define THROWI(format, val1, val2) { \
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "%s(): " format, FUNCTION_NAME, \
+           val1, val2); \
+  this->isInstanceError = TRUE; \
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): " format, FUNCTION_NAME, val1, \
+           val2); \
+  retval = -1;  goto bailout; \
+}
+
+#define GET_INSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_compress_ptr cinfo = NULL; \
+  j_decompress_ptr dinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  cinfo = &this->cinfo;  dinfo = &this->dinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_CINSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_compress_ptr cinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  cinfo = &this->cinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_DINSTANCE(handle) \
+  tjinstance *this = (tjinstance *)handle; \
+  j_decompress_ptr dinfo = NULL; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return -1; \
+  } \
+  dinfo = &this->dinfo; \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+#define GET_TJINSTANCE(handle, errorReturn) \
+  tjinstance *this = (tjinstance *)handle; \
+  \
+  if (!this) { \
+    SNPRINTF(errStr, JMSG_LENGTH_MAX, "%s(): Invalid handle", FUNCTION_NAME); \
+    return errorReturn; \
+  } \
+  this->jerr.warning = FALSE; \
+  this->isInstanceError = FALSE;
+
+static int getPixelFormat(int pixelSize, int flags)
+{
+  if (pixelSize == 1) return TJPF_GRAY;
+  if (pixelSize == 3) {
+    if (flags & TJ_BGR) return TJPF_BGR;
+    else return TJPF_RGB;
+  }
+  if (pixelSize == 4) {
+    if (flags & TJ_ALPHAFIRST) {
+      if (flags & TJ_BGR) return TJPF_XBGR;
+      else return TJPF_XRGB;
+    } else {
+      if (flags & TJ_BGR) return TJPF_BGRX;
+      else return TJPF_RGBX;
+    }
+  }
+  return -1;
+}
+
+static void setCompDefaults(tjinstance *this, int pixelFormat)
+{
+  int subsamp = this->subsamp;
+
+  this->cinfo.in_color_space = pf2cs[pixelFormat];
+  this->cinfo.input_components = tjPixelSize[pixelFormat];
+  jpeg_set_defaults(&this->cinfo);
+
+  this->cinfo.restart_interval = this->restartIntervalBlocks;
+  this->cinfo.restart_in_rows = this->restartIntervalRows;
+  this->cinfo.X_density = (UINT16)this->xDensity;
+  this->cinfo.Y_density = (UINT16)this->yDensity;
+  this->cinfo.density_unit = (UINT8)this->densityUnits;
+  this->cinfo.mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (this->lossless) {
+#ifdef C_LOSSLESS_SUPPORTED
+    jpeg_enable_lossless(&this->cinfo, this->losslessPSV, this->losslessPt);
+#endif
+    if (pixelFormat == TJPF_GRAY)
+      subsamp = TJSAMP_GRAY;
+    else if (subsamp != TJSAMP_GRAY)
+      subsamp = TJSAMP_444;
+    return;
+  }
+
+  jpeg_set_quality(&this->cinfo, this->quality, TRUE);
+  this->cinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+
+  switch (this->colorspace) {
+  case TJCS_RGB:
+    jpeg_set_colorspace(&this->cinfo, JCS_RGB);  break;
+  case TJCS_YCbCr:
+    jpeg_set_colorspace(&this->cinfo, JCS_YCbCr);  break;
+  case TJCS_GRAY:
+    jpeg_set_colorspace(&this->cinfo, JCS_GRAYSCALE);  break;
+  case TJCS_CMYK:
+    jpeg_set_colorspace(&this->cinfo, JCS_CMYK);  break;
+  case TJCS_YCCK:
+    jpeg_set_colorspace(&this->cinfo, JCS_YCCK);  break;
+  default:
+    if (subsamp == TJSAMP_GRAY)
+      jpeg_set_colorspace(&this->cinfo, JCS_GRAYSCALE);
+    else if (pixelFormat == TJPF_CMYK)
+      jpeg_set_colorspace(&this->cinfo, JCS_YCCK);
+    else
+      jpeg_set_colorspace(&this->cinfo, JCS_YCbCr);
+  }
+
+  if (this->cinfo.data_precision == 8)
+    this->cinfo.optimize_coding = this->optimize;
+#ifdef C_PROGRESSIVE_SUPPORTED
+  if (this->progressive) jpeg_simple_progression(&this->cinfo);
+#endif
+  this->cinfo.arith_code = this->arithmetic;
+
+  this->cinfo.comp_info[0].h_samp_factor = tjMCUWidth[subsamp] / 8;
+  this->cinfo.comp_info[1].h_samp_factor = 1;
+  this->cinfo.comp_info[2].h_samp_factor = 1;
+  if (this->cinfo.num_components > 3)
+    this->cinfo.comp_info[3].h_samp_factor = tjMCUWidth[subsamp] / 8;
+  this->cinfo.comp_info[0].v_samp_factor = tjMCUHeight[subsamp] / 8;
+  this->cinfo.comp_info[1].v_samp_factor = 1;
+  this->cinfo.comp_info[2].v_samp_factor = 1;
+  if (this->cinfo.num_components > 3)
+    this->cinfo.comp_info[3].v_samp_factor = tjMCUHeight[subsamp] / 8;
+}
+
+
+static int getSubsamp(j_decompress_ptr dinfo)
+{
+  int retval = TJSAMP_UNKNOWN, i, k;
+
+  /* The sampling factors actually have no meaning with grayscale JPEG files,
+     and in fact it's possible to generate grayscale JPEGs with sampling
+     factors > 1 (even though those sampling factors are ignored by the
+     decompressor.)  Thus, we need to treat grayscale as a special case. */
+  if (dinfo->num_components == 1 && dinfo->jpeg_color_space == JCS_GRAYSCALE)
+    return TJSAMP_GRAY;
+
+  for (i = 0; i < TJ_NUMSAMP; i++) {
+    if (i == TJSAMP_GRAY) continue;
+
+    if (dinfo->num_components == 3 ||
+        ((dinfo->jpeg_color_space == JCS_YCCK ||
+          dinfo->jpeg_color_space == JCS_CMYK) &&
+         dinfo->num_components == 4)) {
+      if (dinfo->comp_info[0].h_samp_factor == tjMCUWidth[i] / 8 &&
+          dinfo->comp_info[0].v_samp_factor == tjMCUHeight[i] / 8) {
+        int match = 0;
+
+        for (k = 1; k < dinfo->num_components; k++) {
+          int href = 1, vref = 1;
+
+          if ((dinfo->jpeg_color_space == JCS_YCCK ||
+               dinfo->jpeg_color_space == JCS_CMYK) && k == 3) {
+            href = tjMCUWidth[i] / 8;  vref = tjMCUHeight[i] / 8;
+          }
+          if (dinfo->comp_info[k].h_samp_factor == href &&
+              dinfo->comp_info[k].v_samp_factor == vref)
+            match++;
+        }
+        if (match == dinfo->num_components - 1) {
+          retval = i;  break;
+        }
+      }
+      /* Handle 4:2:2 and 4:4:0 images whose sampling factors are specified
+         in non-standard ways. */
+      if (dinfo->comp_info[0].h_samp_factor == 2 &&
+          dinfo->comp_info[0].v_samp_factor == 2 &&
+          (i == TJSAMP_422 || i == TJSAMP_440)) {
+        int match = 0;
+
+        for (k = 1; k < dinfo->num_components; k++) {
+          int href = tjMCUHeight[i] / 8, vref = tjMCUWidth[i] / 8;
+
+          if ((dinfo->jpeg_color_space == JCS_YCCK ||
+               dinfo->jpeg_color_space == JCS_CMYK) && k == 3) {
+            href = vref = 2;
+          }
+          if (dinfo->comp_info[k].h_samp_factor == href &&
+              dinfo->comp_info[k].v_samp_factor == vref)
+            match++;
+        }
+        if (match == dinfo->num_components - 1) {
+          retval = i;  break;
+        }
+      }
+      /* Handle 4:4:4 images whose sampling factors are specified in
+         non-standard ways. */
+      if (dinfo->comp_info[0].h_samp_factor *
+          dinfo->comp_info[0].v_samp_factor <=
+          D_MAX_BLOCKS_IN_MCU / 3 && i == TJSAMP_444) {
+        int match = 0;
+        for (k = 1; k < dinfo->num_components; k++) {
+          if (dinfo->comp_info[k].h_samp_factor ==
+              dinfo->comp_info[0].h_samp_factor &&
+              dinfo->comp_info[k].v_samp_factor ==
+              dinfo->comp_info[0].v_samp_factor)
+            match++;
+          if (match == dinfo->num_components - 1) {
+            retval = i;  break;
+          }
+        }
+      }
+    }
+  }
+  return retval;
+}
+
+
+static void setDecompParameters(tjinstance *this)
+{
+  this->subsamp = getSubsamp(&this->dinfo);
+  this->jpegWidth = this->dinfo.image_width;
+  this->jpegHeight = this->dinfo.image_height;
+  this->precision = this->dinfo.data_precision;
+  switch (this->dinfo.jpeg_color_space) {
+  case JCS_GRAYSCALE:  this->colorspace = TJCS_GRAY;  break;
+  case JCS_RGB:        this->colorspace = TJCS_RGB;  break;
+  case JCS_YCbCr:      this->colorspace = TJCS_YCbCr;  break;
+  case JCS_CMYK:       this->colorspace = TJCS_CMYK;  break;
+  case JCS_YCCK:       this->colorspace = TJCS_YCCK;  break;
+  default:             this->colorspace = -1;  break;
+  }
+  this->progressive = this->dinfo.progressive_mode;
+  this->arithmetic = this->dinfo.arith_code;
+  this->lossless = this->dinfo.master->lossless;
+  this->losslessPSV = this->dinfo.Ss;
+  this->losslessPt = this->dinfo.Al;
+  this->xDensity = this->dinfo.X_density;
+  this->yDensity = this->dinfo.Y_density;
+  this->densityUnits = this->dinfo.density_unit;
+}
+
+
+static void processFlags(tjhandle handle, int flags, int operation)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  this->bottomUp = !!(flags & TJFLAG_BOTTOMUP);
+
+#ifndef NO_PUTENV
+  if (flags & TJFLAG_FORCEMMX) PUTENV_S("JSIMD_FORCEMMX", "1");
+  else if (flags & TJFLAG_FORCESSE) PUTENV_S("JSIMD_FORCESSE", "1");
+  else if (flags & TJFLAG_FORCESSE2) PUTENV_S("JSIMD_FORCESSE2", "1");
+#endif
+
+  this->fastUpsample = !!(flags & TJFLAG_FASTUPSAMPLE);
+  this->noRealloc = !!(flags & TJFLAG_NOREALLOC);
+
+  if (operation == COMPRESS) {
+    if (this->quality >= 96 || flags & TJFLAG_ACCURATEDCT)
+      this->fastDCT = FALSE;
+    else
+      this->fastDCT = TRUE;
+  } else
+    this->fastDCT = !!(flags & TJFLAG_FASTDCT);
+
+  this->jerr.stopOnWarning = !!(flags & TJFLAG_STOPONWARNING);
+  this->progressive = !!(flags & TJFLAG_PROGRESSIVE);
+
+  if (flags & TJFLAG_LIMITSCANS) this->scanLimit = 500;
+}
+
+
+/*************************** General API functions ***************************/
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT tjhandle tj3Init(int initType)
+{
+  static const char FUNCTION_NAME[] = "tj3Init";
+  tjinstance *this = NULL;
+  tjhandle retval = NULL;
+
+  if (initType < 0 || initType >= TJ_NUMINIT)
+    THROWG("Invalid argument", NULL);
+
+  if ((this = (tjinstance *)malloc(sizeof(tjinstance))) == NULL)
+    THROWG("Memory allocation failure", NULL);
+  memset(this, 0, sizeof(tjinstance));
+  SNPRINTF(this->errStr, JMSG_LENGTH_MAX, "No error");
+
+  this->quality = -1;
+  this->subsamp = TJSAMP_UNKNOWN;
+  this->jpegWidth = -1;
+  this->jpegHeight = -1;
+  this->precision = 8;
+  this->colorspace = -1;
+  this->losslessPSV = 1;
+  this->xDensity = 1;
+  this->yDensity = 1;
+  this->scalingFactor = TJUNSCALED;
+  this->saveMarkers = 2;
+
+  switch (initType) {
+  case TJINIT_COMPRESS:  return _tjInitCompress(this);
+  case TJINIT_DECOMPRESS:  return _tjInitDecompress(this);
+  case TJINIT_TRANSFORM:
+    retval = _tjInitCompress(this);
+    if (!retval) return NULL;
+    retval = _tjInitDecompress(this);
+    return retval;
+  }
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT void tj3Destroy(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+  j_compress_ptr cinfo = NULL;
+  j_decompress_ptr dinfo = NULL;
+
+  if (!this) return;
+
+  cinfo = &this->cinfo;  dinfo = &this->dinfo;
+  this->jerr.warning = FALSE;
+  this->isInstanceError = FALSE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) return;
+  if (this->init & COMPRESS) jpeg_destroy_compress(cinfo);
+  if (this->init & DECOMPRESS) jpeg_destroy_decompress(dinfo);
+  free(this->iccBuf);
+  free(this->tempICCBuf);
+  free(this);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDestroy(tjhandle handle)
+{
+  static const char FUNCTION_NAME[] = "tjDestroy";
+  int retval = 0;
+
+  if (!handle) THROWG("Invalid handle", -1);
+
+  SNPRINTF(errStr, JMSG_LENGTH_MAX, "No error");
+  tj3Destroy(handle);
+  if (strcmp(errStr, "No error")) retval = -1;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT char *tj3GetErrorStr(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  if (this && this->isInstanceError) {
+    this->isInstanceError = FALSE;
+    return this->errStr;
+  } else
+    return errStr;
+}
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT char *tjGetErrorStr2(tjhandle handle)
+{
+  return tj3GetErrorStr(handle);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT char *tjGetErrorStr(void)
+{
+  return errStr;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3GetErrorCode(tjhandle handle)
+{
+  tjinstance *this = (tjinstance *)handle;
+
+  if (this && this->jerr.warning) return TJERR_WARNING;
+  else return TJERR_FATAL;
+}
+
+/* TurboJPEG 2.0+ */
+DLLEXPORT int tjGetErrorCode(tjhandle handle)
+{
+  return tj3GetErrorCode(handle);
+}
+
+
+#define SET_PARAM(field, minValue, maxValue) { \
+  if (value < minValue || (maxValue > 0 && value > maxValue)) \
+    THROW("Parameter value out of range"); \
+  this->field = value; \
+}
+
+#define SET_BOOL_PARAM(field) { \
+  if (value < 0 || value > 1) \
+    THROW("Parameter value out of range"); \
+  this->field = (boolean)value; \
+}
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3Set(tjhandle handle, int param, int value)
+{
+  static const char FUNCTION_NAME[] = "tj3Set";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  switch (param) {
+  case TJPARAM_STOPONWARNING:
+    SET_BOOL_PARAM(jerr.stopOnWarning);
+    break;
+  case TJPARAM_BOTTOMUP:
+    SET_BOOL_PARAM(bottomUp);
+    break;
+  case TJPARAM_NOREALLOC:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_NOREALLOC is not applicable to decompression instances.");
+    SET_BOOL_PARAM(noRealloc);
+    break;
+  case TJPARAM_QUALITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_QUALITY is not applicable to decompression instances.");
+    SET_PARAM(quality, 1, 100);
+    break;
+  case TJPARAM_SUBSAMP:
+    SET_PARAM(subsamp, 0, TJ_NUMSAMP - 1);
+    break;
+  case TJPARAM_JPEGWIDTH:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_JPEGWIDTH is not applicable to compression instances.");
+    THROW("TJPARAM_JPEGWIDTH is read-only in decompression instances.");
+    break;
+  case TJPARAM_JPEGHEIGHT:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_JPEGHEIGHT is not applicable to compression instances.");
+    THROW("TJPARAM_JPEGHEIGHT is read-only in decompression instances.");
+    break;
+  case TJPARAM_PRECISION:
+    SET_PARAM(precision, 2, 16);
+    break;
+  case TJPARAM_COLORSPACE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_COLORSPACE is read-only in decompression instances.");
+    SET_PARAM(colorspace, 0, TJ_NUMCS - 1);
+    break;
+  case TJPARAM_FASTUPSAMPLE:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_FASTUPSAMPLE is not applicable to compression instances.");
+    SET_BOOL_PARAM(fastUpsample);
+    break;
+  case TJPARAM_FASTDCT:
+    SET_BOOL_PARAM(fastDCT);
+    break;
+  case TJPARAM_OPTIMIZE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_OPTIMIZE is not applicable to decompression instances.");
+    SET_BOOL_PARAM(optimize);
+    break;
+  case TJPARAM_PROGRESSIVE:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_PROGRESSIVE is read-only in decompression instances.");
+    SET_BOOL_PARAM(progressive);
+    break;
+  case TJPARAM_SCANLIMIT:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_SCANLIMIT is not applicable to compression instances.");
+    SET_PARAM(scanLimit, 0, -1);
+    break;
+  case TJPARAM_ARITHMETIC:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_ARITHMETIC is read-only in decompression instances.");
+    SET_BOOL_PARAM(arithmetic);
+    break;
+  case TJPARAM_LOSSLESS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESS is read-only in decompression instances.");
+    SET_BOOL_PARAM(lossless);
+    break;
+  case TJPARAM_LOSSLESSPSV:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESSPSV is read-only in decompression instances.");
+    SET_PARAM(losslessPSV, 1, 7);
+    break;
+  case TJPARAM_LOSSLESSPT:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_LOSSLESSPT is read-only in decompression instances.");
+    SET_PARAM(losslessPt, 0, 15);
+    break;
+  case TJPARAM_RESTARTBLOCKS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_RESTARTBLOCKS is not applicable to decompression instances.");
+    SET_PARAM(restartIntervalBlocks, 0, 65535);
+    if (value != 0) this->restartIntervalRows = 0;
+    break;
+  case TJPARAM_RESTARTROWS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_RESTARTROWS is not applicable to decompression instances.");
+    SET_PARAM(restartIntervalRows, 0, 65535);
+    if (value != 0) this->restartIntervalBlocks = 0;
+    break;
+  case TJPARAM_XDENSITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_XDENSITY is read-only in decompression instances.");
+    SET_PARAM(xDensity, 1, 65535);
+    break;
+  case TJPARAM_YDENSITY:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_YDENSITY is read-only in decompression instances.");
+    SET_PARAM(yDensity, 1, 65535);
+    break;
+  case TJPARAM_DENSITYUNITS:
+    if (!(this->init & COMPRESS))
+      THROW("TJPARAM_DENSITYUNITS is read-only in decompression instances.");
+    SET_PARAM(densityUnits, 0, 2);
+    break;
+  case TJPARAM_MAXMEMORY:
+    SET_PARAM(maxMemory, 0, (int)(min(LONG_MAX / 1048576L, (long)INT_MAX)));
+    break;
+  case TJPARAM_MAXPIXELS:
+    SET_PARAM(maxPixels, 0, -1);
+    break;
+  case TJPARAM_SAVEMARKERS:
+    if (!(this->init & DECOMPRESS))
+      THROW("TJPARAM_SAVEMARKERS is not applicable to compression instances.");
+    SET_PARAM(saveMarkers, 0, 4);
+    break;
+  default:
+    THROW("Invalid parameter");
+  }
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3Get(tjhandle handle, int param)
+{
+  tjinstance *this = (tjinstance *)handle;
+  if (!this) return -1;
+
+  switch (param) {
+  case TJPARAM_STOPONWARNING:
+    return this->jerr.stopOnWarning;
+  case TJPARAM_BOTTOMUP:
+    return this->bottomUp;
+  case TJPARAM_NOREALLOC:
+    return this->noRealloc;
+  case TJPARAM_QUALITY:
+    return this->quality;
+  case TJPARAM_SUBSAMP:
+    return this->subsamp;
+  case TJPARAM_JPEGWIDTH:
+    return this->jpegWidth;
+  case TJPARAM_JPEGHEIGHT:
+    return this->jpegHeight;
+  case TJPARAM_PRECISION:
+    return this->precision;
+  case TJPARAM_COLORSPACE:
+    return this->colorspace;
+  case TJPARAM_FASTUPSAMPLE:
+    return this->fastUpsample;
+  case TJPARAM_FASTDCT:
+    return this->fastDCT;
+  case TJPARAM_OPTIMIZE:
+    return this->optimize;
+  case TJPARAM_PROGRESSIVE:
+    return this->progressive;
+  case TJPARAM_SCANLIMIT:
+    return this->scanLimit;
+  case TJPARAM_ARITHMETIC:
+    return this->arithmetic;
+  case TJPARAM_LOSSLESS:
+    return this->lossless;
+  case TJPARAM_LOSSLESSPSV:
+    return this->losslessPSV;
+  case TJPARAM_LOSSLESSPT:
+    return this->losslessPt;
+  case TJPARAM_RESTARTBLOCKS:
+    return this->restartIntervalBlocks;
+  case TJPARAM_RESTARTROWS:
+    return this->restartIntervalRows;
+  case TJPARAM_XDENSITY:
+    return this->xDensity;
+  case TJPARAM_YDENSITY:
+    return this->yDensity;
+  case TJPARAM_DENSITYUNITS:
+    return this->densityUnits;
+  case TJPARAM_MAXMEMORY:
+    return this->maxMemory;
+  case TJPARAM_MAXPIXELS:
+    return this->maxPixels;
+  case TJPARAM_SAVEMARKERS:
+    return this->saveMarkers;
+  }
+
+  return -1;
+}
+
+
+/* These are exposed mainly because Windows can't malloc() and free() across
+   DLL boundaries except when the CRT DLL is used, and we don't use the CRT DLL
+   with turbojpeg.dll for compatibility reasons.  However, these functions
+   can potentially be used for other purposes by different implementations. */
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT void *tj3Alloc(size_t bytes)
+{
+  return MALLOC(bytes);
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned char *tjAlloc(int bytes)
+{
+  return (unsigned char *)tj3Alloc((size_t)bytes);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT void tj3Free(void *buf)
+{
+  free(buf);
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT void tjFree(unsigned char *buf)
+{
+  tj3Free(buf);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT size_t tj3JPEGBufSize(int width, int height, int jpegSubsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3JPEGBufSize";
+  unsigned long long retval = 0;
+  int mcuw, mcuh, chromasf;
+
+  if (width < 1 || height < 1 || jpegSubsamp < TJSAMP_UNKNOWN ||
+      jpegSubsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  if (jpegSubsamp == TJSAMP_UNKNOWN)
+    jpegSubsamp = TJSAMP_444;
+
+  /* This allows for rare corner cases in which a JPEG image can actually be
+     larger than the uncompressed input (we wouldn't mention it if it hadn't
+     happened before.) */
+  mcuw = tjMCUWidth[jpegSubsamp];
+  mcuh = tjMCUHeight[jpegSubsamp];
+  chromasf = jpegSubsamp == TJSAMP_GRAY ? 0 : 4 * 64 / (mcuw * mcuh);
+  retval = PAD(width, mcuw) * PAD(height, mcuh) * (2ULL + chromasf) + 2048ULL;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp)
+{
+  static const char FUNCTION_NAME[] = "tjBufSize";
+  size_t retval;
+
+  if (jpegSubsamp < 0)
+    THROWG("Invalid argument", 0);
+
+  retval = tj3JPEGBufSize(width, height, jpegSubsamp);
+
+bailout:
+  return (retval == 0) ? (unsigned long)-1 : (unsigned long)retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT unsigned long TJBUFSIZE(int width, int height)
+{
+  static const char FUNCTION_NAME[] = "TJBUFSIZE";
+  unsigned long long retval = 0;
+
+  if (width < 1 || height < 1)
+    THROWG("Invalid argument", (unsigned long)-1);
+
+  /* This allows for rare corner cases in which a JPEG image can actually be
+     larger than the uncompressed input (we wouldn't mention it if it hadn't
+     happened before.) */
+  retval = PAD(width, 16) * PAD(height, 16) * 6ULL + 2048ULL;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", (unsigned long)-1);
+#endif
+
+bailout:
+  return (unsigned long)retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT size_t tj3YUVBufSize(int width, int align, int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVBufSize";
+  unsigned long long retval = 0;
+  int nc, i;
+
+  if (align < 1 || !IS_POW2(align) || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  for (i = 0; i < nc; i++) {
+    int pw = tj3YUVPlaneWidth(i, width, subsamp);
+    int stride = PAD(pw, align);
+    int ph = tj3YUVPlaneHeight(i, height, subsamp);
+
+    if (pw == 0 || ph == 0) return 0;
+    else retval += (unsigned long long)stride * ph;
+  }
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
+                                      int subsamp)
+{
+  size_t retval = tj3YUVBufSize(width, align, height, subsamp);
+  return (retval == 0) ? (unsigned long)-1 : (unsigned long)retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp)
+{
+  return tjBufSizeYUV2(width, 4, height, subsamp);
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int subsamp)
+{
+  return tjBufSizeYUV(width, height, subsamp);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT size_t tj3YUVPlaneSize(int componentID, int width, int stride,
+                                 int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneSize";
+  unsigned long long retval = 0;
+  int pw, ph;
+
+  if (width < 1 || height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+
+  pw = tj3YUVPlaneWidth(componentID, width, subsamp);
+  ph = tj3YUVPlaneHeight(componentID, height, subsamp);
+  if (pw == 0 || ph == 0) return 0;
+
+  if (stride == 0) stride = pw;
+  else stride = abs(stride);
+
+  retval = (unsigned long long)stride * (ph - 1) + pw;
+#if ULLONG_MAX > ULONG_MAX
+  if (retval > (unsigned long long)((unsigned long)-1))
+    THROWG("Image is too large", 0);
+#endif
+
+bailout:
+  return (size_t)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride,
+                                       int height, int subsamp)
+{
+  size_t retval = tj3YUVPlaneSize(componentID, width, stride, height, subsamp);
+  return (retval == 0) ? -1 : (unsigned long)retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3YUVPlaneWidth(int componentID, int width, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneWidth";
+  unsigned long long pw, retval = 0;
+  int nc;
+
+  if (width < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  if (componentID < 0 || componentID >= nc)
+    THROWG("Invalid argument", 0);
+
+  pw = PAD((unsigned long long)width, tjMCUWidth[subsamp] / 8);
+  if (componentID == 0)
+    retval = pw;
+  else
+    retval = pw * 8 / tjMCUWidth[subsamp];
+
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("Width is too large", 0);
+
+bailout:
+  return (int)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp)
+{
+  int retval = tj3YUVPlaneWidth(componentID, width, subsamp);
+  return (retval == 0) ? -1 : retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3YUVPlaneHeight(int componentID, int height, int subsamp)
+{
+  static const char FUNCTION_NAME[] = "tj3YUVPlaneHeight";
+  unsigned long long ph, retval = 0;
+  int nc;
+
+  if (height < 1 || subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROWG("Invalid argument", 0);
+  nc = (subsamp == TJSAMP_GRAY ? 1 : 3);
+  if (componentID < 0 || componentID >= nc)
+    THROWG("Invalid argument", 0);
+
+  ph = PAD((unsigned long long)height, tjMCUHeight[subsamp] / 8);
+  if (componentID == 0)
+    retval = ph;
+  else
+    retval = ph * 8 / tjMCUHeight[subsamp];
+
+  if (retval > (unsigned long long)INT_MAX)
+    THROWG("Height is too large", 0);
+
+bailout:
+  return (int)retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp)
+{
+  int retval = tj3YUVPlaneHeight(componentID, height, subsamp);
+  return (retval == 0) ? -1 : retval;
+}
+
+
+/******************************** Compressor *********************************/
+
+static tjhandle _tjInitCompress(tjinstance *this)
+{
+  static unsigned char buffer[1];
+  unsigned char *buf = buffer;
+  size_t size = 1;
+
+  /* This is also straight out of example.c */
+  this->cinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = my_error_exit;
+  this->jerr.pub.output_message = my_output_message;
+  this->jerr.emit_message = this->jerr.pub.emit_message;
+  this->jerr.pub.emit_message = my_emit_message;
+  this->jerr.pub.addon_message_table = turbojpeg_message_table;
+  this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE;
+  this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    free(this);
+    return NULL;
+  }
+
+  jpeg_create_compress(&this->cinfo);
+  /* Make an initial call so it will create the destination manager */
+  jpeg_mem_dest_tj(&this->cinfo, &buf, &size, 0);
+
+  this->init |= COMPRESS;
+  return (tjhandle)this;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT tjhandle tjInitCompress(void)
+{
+  return tj3Init(TJINIT_COMPRESS);
+}
+
+
+/* TurboJPEG 3.1+ */
+DLLEXPORT int tj3SetICCProfile(tjhandle handle, unsigned char *iccBuf,
+                               size_t iccSize)
+{
+  static const char FUNCTION_NAME[] = "tj3SetICCProfile";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1)
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (iccBuf == this->iccBuf && iccSize == this->iccSize)
+    return 0;
+
+  free(this->iccBuf);
+  this->iccBuf = NULL;
+  this->iccSize = 0;
+  if (iccBuf && iccSize) {
+    if ((this->iccBuf = (unsigned char *)malloc(iccSize)) == NULL)
+      THROW("Memory allocation failure");
+    memcpy(this->iccBuf, iccBuf, iccSize);
+    this->iccSize = iccSize;
+  }
+
+bailout:
+  return retval;
+}
+
+
+/* tj3Compress*() is implemented in turbojpeg-mp.c */
+#define BITS_IN_JSAMPLE  8
+#include "turbojpeg-mp.c"
+#undef BITS_IN_JSAMPLE
+#define BITS_IN_JSAMPLE  12
+#include "turbojpeg-mp.c"
+#undef BITS_IN_JSAMPLE
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          unsigned char **jpegBuf, unsigned long *jpegSize,
+                          int jpegSubsamp, int jpegQual, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompress2";
+  int retval = 0;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (jpegSize == NULL || jpegSubsamp < 0 || jpegSubsamp >= TJ_NUMSAMP ||
+      jpegQual < 0 || jpegQual > 100)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = jpegSubsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  if (this->noRealloc)
+    size = tj3JPEGBufSize(width, height, this->subsamp);
+  retval = tj3Compress8(handle, srcBuf, width, pitch, height, pixelFormat,
+                        jpegBuf, &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
+                         int pitch, int height, int pixelSize,
+                         unsigned char *jpegBuf, unsigned long *jpegSize,
+                         int jpegSubsamp, int jpegQual, int flags)
+{
+  int retval = 0;
+  unsigned long size = jpegSize ? *jpegSize : 0;
+
+  if (flags & TJ_YUV) {
+    size = tjBufSizeYUV(width, height, jpegSubsamp);
+    retval = tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+                          getPixelFormat(pixelSize, flags), jpegBuf,
+                          jpegSubsamp, flags);
+  } else {
+    retval = tjCompress2(handle, srcBuf, width, pitch, height,
+                         getPixelFormat(pixelSize, flags), &jpegBuf, &size,
+                         jpegSubsamp, jpegQual, flags | TJFLAG_NOREALLOC);
+  }
+  *jpegSize = size;
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3CompressFromYUVPlanes8(tjhandle handle,
+                                        const unsigned char * const *srcPlanes,
+                                        int width, const int *strides,
+                                        int height, unsigned char **jpegBuf,
+                                        size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3CompressFromYUVPlanes8";
+  int i, row, retval = 0;
+  boolean alloc = TRUE;
+  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
+  JSAMPLE *_tmpbuf = NULL, *ptr;
+  JSAMPROW *inbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
+
+  GET_CINSTANCE(handle)
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  inbuf[i] = NULL;
+  }
+
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (!srcPlanes || !srcPlanes[0] || width <= 0 || height <= 0 ||
+      jpegBuf == NULL || jpegSize == NULL)
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
+    THROW("Invalid argument");
+
+  if (this->quality == -1)
+    THROW("TJPARAM_QUALITY must be specified");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = 8;
+
+  if (this->noRealloc) alloc = FALSE;
+  jpeg_mem_dest_tj(cinfo, jpegBuf, jpegSize, alloc);
+  setCompDefaults(this, TJPF_RGB);
+  cinfo->raw_data_in = TRUE;
+
+  jpeg_start_compress(cinfo, TRUE);
+  if (this->iccBuf != NULL && this->iccSize != 0)
+    jpeg_write_icc_profile(cinfo, this->iccBuf, (unsigned int)this->iccSize);
+  for (i = 0; i < cinfo->num_components; i++) {
+    jpeg_component_info *compptr = &cinfo->comp_info[i];
+    int ih;
+
+    iw[i] = compptr->width_in_blocks * DCTSIZE;
+    ih = compptr->height_in_blocks * DCTSIZE;
+    pw[i] = PAD(cinfo->image_width, cinfo->max_h_samp_factor) *
+            compptr->h_samp_factor / cinfo->max_h_samp_factor;
+    ph[i] = PAD(cinfo->image_height, cinfo->max_v_samp_factor) *
+            compptr->v_samp_factor / cinfo->max_v_samp_factor;
+    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
+    th[i] = compptr->v_samp_factor * DCTSIZE;
+    tmpbufsize += iw[i] * th[i];
+    if ((inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
+      THROW("Memory allocation failure");
+    ptr = (JSAMPLE *)srcPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      inbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+  if (usetmpbuf) {
+    if ((_tmpbuf = (JSAMPLE *)malloc(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
+      THROW("Memory allocation failure");
+    ptr = _tmpbuf;
+    for (i = 0; i < cinfo->num_components; i++) {
+      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
+        THROW("Memory allocation failure");
+      for (row = 0; row < th[i]; row++) {
+        tmpbuf[i][row] = ptr;
+        ptr += iw[i];
+      }
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < (int)cinfo->image_height;
+       row += cinfo->max_v_samp_factor * DCTSIZE) {
+    JSAMPARRAY yuvptr[MAX_COMPONENTS];
+    int crow[MAX_COMPONENTS];
+
+    for (i = 0; i < cinfo->num_components; i++) {
+      jpeg_component_info *compptr = &cinfo->comp_info[i];
+
+      crow[i] = row * compptr->v_samp_factor / cinfo->max_v_samp_factor;
+      if (usetmpbuf) {
+        int j, k;
+
+        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
+          memcpy(tmpbuf[i][j], inbuf[i][crow[i] + j], pw[i]);
+          /* Duplicate last sample in row to fill out MCU */
+          for (k = pw[i]; k < iw[i]; k++)
+            tmpbuf[i][j][k] = tmpbuf[i][j][pw[i] - 1];
+        }
+        /* Duplicate last row to fill out MCU */
+        for (j = ph[i] - crow[i]; j < th[i]; j++)
+          memcpy(tmpbuf[i][j], tmpbuf[i][ph[i] - crow[i] - 1], iw[i]);
+        yuvptr[i] = tmpbuf[i];
+      } else
+        yuvptr[i] = &inbuf[i][crow[i]];
+    }
+    jpeg_write_raw_data(cinfo, yuvptr, cinfo->max_v_samp_factor * DCTSIZE);
+  }
+  jpeg_finish_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START && alloc)
+    (*cinfo->dest->term_destination) (cinfo);
+  if (cinfo->global_state > CSTATE_START || retval == -1)
+    jpeg_abort_compress(cinfo);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(inbuf[i]);
+  }
+  free(_tmpbuf);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
+                                      const unsigned char **srcPlanes,
+                                      int width, const int *strides,
+                                      int height, int subsamp,
+                                      unsigned char **jpegBuf,
+                                      unsigned long *jpegSize, int jpegQual,
+                                      int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompressFromYUVPlanes";
+  int retval = 0;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP || jpegSize == NULL ||
+      jpegQual < 0 || jpegQual > 100)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  if (this->noRealloc)
+    size = tj3JPEGBufSize(width, height, this->subsamp);
+  retval = tj3CompressFromYUVPlanes8(handle, srcPlanes, width, strides, height,
+                                     jpegBuf, &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3CompressFromYUV8(tjhandle handle,
+                                  const unsigned char *srcBuf, int width,
+                                  int align, int height,
+                                  unsigned char **jpegBuf, size_t *jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3CompressFromYUV8";
+  const unsigned char *srcPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (srcBuf == NULL || width <= 0 || align < 1 || !IS_POW2(align) ||
+      height <= 0)
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  srcPlanes[0] = srcBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    srcPlanes[1] = srcPlanes[2] = NULL;
+  } else {
+    int pw1 = tjPlaneWidth(1, width, this->subsamp);
+    int ph1 = tjPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
+    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3CompressFromYUVPlanes8(handle, srcPlanes, width, strides, height,
+                                   jpegBuf, jpegSize);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int align, int height, int subsamp,
+                                unsigned char **jpegBuf,
+                                unsigned long *jpegSize, int jpegQual,
+                                int flags)
+{
+  static const char FUNCTION_NAME[] = "tjCompressFromYUV";
+  int retval = -1;
+  size_t size;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->quality = jpegQual;
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  size = (size_t)(*jpegSize);
+  if (this->noRealloc)
+    size = tj3JPEGBufSize(width, height, this->subsamp);
+  retval = tj3CompressFromYUV8(handle, srcBuf, width, align, height, jpegBuf,
+                               &size);
+  *jpegSize = (unsigned long)size;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3EncodeYUVPlanes8(tjhandle handle, const unsigned char *srcBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat, unsigned char **dstPlanes,
+                                  int *strides)
+{
+  static const char FUNCTION_NAME[] = "tj3EncodeYUVPlanes8";
+  JSAMPROW *row_pointer = NULL;
+  JSAMPLE *_tmpbuf[MAX_COMPONENTS], *_tmpbuf2[MAX_COMPONENTS];
+  JSAMPROW *tmpbuf[MAX_COMPONENTS], *tmpbuf2[MAX_COMPONENTS];
+  JSAMPROW *outbuf[MAX_COMPONENTS];
+  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
+  JSAMPLE *ptr;
+  jpeg_component_info *compptr;
+
+  GET_CINSTANCE(handle)
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;
+    tmpbuf2[i] = NULL;  _tmpbuf2[i] = NULL;  outbuf[i] = NULL;
+  }
+
+  if ((this->init & COMPRESS) == 0)
+    THROW("Instance has not been initialized for compression");
+
+  if (srcBuf == NULL || width <= 0 || pitch < 0 || height <= 0 ||
+      pixelFormat < 0 || pixelFormat >= TJ_NUMPF || !dstPlanes ||
+      !dstPlanes[0])
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+  if (pixelFormat == TJPF_CMYK)
+    THROW("Cannot generate YUV images from packed-pixel CMYK images");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  cinfo->image_width = width;
+  cinfo->image_height = height;
+  cinfo->data_precision = 8;
+
+  setCompDefaults(this, pixelFormat);
+
+  /* Execute only the parts of jpeg_start_compress() that we need.  If we
+     were to call the whole jpeg_start_compress() function, then it would try
+     to write the file headers, which could overflow the output buffer if the
+     YUV image were very small. */
+  if (cinfo->global_state != CSTATE_START)
+    THROW("libjpeg API is in the wrong state");
+  (*cinfo->err->reset_error_mgr) ((j_common_ptr)cinfo);
+  jinit_c_master_control(cinfo, FALSE);
+  jinit_color_converter(cinfo);
+  jinit_downsampler(cinfo);
+  (*cinfo->cconvert->start_pass) (cinfo);
+
+  pw0 = PAD(width, cinfo->max_h_samp_factor);
+  ph0 = PAD(height, cinfo->max_v_samp_factor);
+
+  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = (JSAMPROW)&srcBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = (JSAMPROW)&srcBuf[i * (size_t)pitch];
+  }
+  if (height < ph0)
+    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
+
+  for (i = 0; i < cinfo->num_components; i++) {
+    compptr = &cinfo->comp_info[i];
+    _tmpbuf[i] = (JSAMPLE *)MALLOC(
+      PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
+          compptr->h_samp_factor, 32) *
+      cinfo->max_v_samp_factor + 32);
+    if (!_tmpbuf[i])
+      THROW("Memory allocation failure");
+    tmpbuf[i] =
+      (JSAMPROW *)malloc(sizeof(JSAMPROW) * cinfo->max_v_samp_factor);
+    if (!tmpbuf[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < cinfo->max_v_samp_factor; row++) {
+      unsigned char *_tmpbuf_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
+
+      tmpbuf[i][row] = &_tmpbuf_aligned[
+        PAD((compptr->width_in_blocks * cinfo->max_h_samp_factor * DCTSIZE) /
+            compptr->h_samp_factor, 32) * row];
+    }
+    _tmpbuf2[i] =
+      (JSAMPLE *)MALLOC(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
+                        compptr->v_samp_factor + 32);
+    if (!_tmpbuf2[i])
+      THROW("Memory allocation failure");
+    tmpbuf2[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
+    if (!tmpbuf2[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < compptr->v_samp_factor; row++) {
+      unsigned char *_tmpbuf2_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf2[i], 32);
+
+      tmpbuf2[i][row] =
+        &_tmpbuf2_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
+    }
+    pw[i] = pw0 * compptr->h_samp_factor / cinfo->max_h_samp_factor;
+    ph[i] = ph0 * compptr->v_samp_factor / cinfo->max_v_samp_factor;
+    outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
+    if (!outbuf[i])
+      THROW("Memory allocation failure");
+    ptr = dstPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      outbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < ph0; row += cinfo->max_v_samp_factor) {
+    (*cinfo->cconvert->color_convert) (cinfo, &row_pointer[row], tmpbuf, 0,
+                                       cinfo->max_v_samp_factor);
+    (cinfo->downsample->downsample) (cinfo, tmpbuf, 0, tmpbuf2, 0);
+    for (i = 0, compptr = cinfo->comp_info; i < cinfo->num_components;
+         i++, compptr++)
+      jcopy_sample_rows(tmpbuf2[i], 0, outbuf[i],
+        row * compptr->v_samp_factor / cinfo->max_v_samp_factor,
+        compptr->v_samp_factor, pw[i]);
+  }
+  cinfo->next_scanline += height;
+  jpeg_abort_compress(cinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START) jpeg_abort_compress(cinfo);
+  free(row_pointer);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(_tmpbuf[i]);
+    free(tmpbuf2[i]);
+    free(_tmpbuf2[i]);
+    free(outbuf[i]);
+  }
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int pitch, int height,
+                                int pixelFormat, unsigned char **dstPlanes,
+                                int *strides, int subsamp, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjEncodeYUVPlanes";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  return tj3EncodeYUVPlanes8(handle, srcBuf, width, pitch, height, pixelFormat,
+                             dstPlanes, strides);
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3EncodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char *dstBuf, int align)
+{
+  static const char FUNCTION_NAME[] = "tj3EncodeYUV8";
+  unsigned char *dstPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (width <= 0 || height <= 0 || dstBuf == NULL || align < 1 ||
+      !IS_POW2(align))
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  dstPlanes[0] = dstBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    dstPlanes[1] = dstPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
+    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3EncodeYUVPlanes8(handle, srcBuf, width, pitch, height, pixelFormat,
+                             dstPlanes, strides);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int align, int subsamp,
+                           int flags)
+{
+  static const char FUNCTION_NAME[] = "tjEncodeYUV3";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, COMPRESS);
+
+  return tj3EncodeYUV8(handle, srcBuf, width, pitch, height, pixelFormat,
+                       dstBuf, align);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
+                           int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int subsamp, int flags)
+{
+  return tjEncodeYUV3(handle, srcBuf, width, pitch, height, pixelFormat,
+                      dstBuf, 4, subsamp, flags);
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
+                          int pitch, int height, int pixelSize,
+                          unsigned char *dstBuf, int subsamp, int flags)
+{
+  return tjEncodeYUV2(handle, srcBuf, width, pitch, height,
+                      getPixelFormat(pixelSize, flags), dstBuf, subsamp,
+                      flags);
+}
+
+
+/******************************* Decompressor ********************************/
+
+static tjhandle _tjInitDecompress(tjinstance *this)
+{
+  static unsigned char buffer[1];
+
+  /* This is also straight out of example.c */
+  this->dinfo.err = jpeg_std_error(&this->jerr.pub);
+  this->jerr.pub.error_exit = my_error_exit;
+  this->jerr.pub.output_message = my_output_message;
+  this->jerr.emit_message = this->jerr.pub.emit_message;
+  this->jerr.pub.emit_message = my_emit_message;
+  this->jerr.pub.addon_message_table = turbojpeg_message_table;
+  this->jerr.pub.first_addon_message = JMSG_FIRSTADDONCODE;
+  this->jerr.pub.last_addon_message = JMSG_LASTADDONCODE;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    free(this);
+    return NULL;
+  }
+
+  jpeg_create_decompress(&this->dinfo);
+  /* Make an initial call so it will create the source manager */
+  jpeg_mem_src_tj(&this->dinfo, buffer, 1);
+
+  this->init |= DECOMPRESS;
+  return (tjhandle)this;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT tjhandle tjInitDecompress(void)
+{
+  return tj3Init(TJINIT_DECOMPRESS);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3DecompressHeader(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressHeader";
+  int retval = 0;
+  unsigned char *iccPtr = NULL;
+  unsigned int iccLen = 0;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    return -1;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+
+  /* Extract ICC profile if TJPARAM_SAVEMARKERS is 2 or 4.  (We could
+     eventually reuse this mechanism to save other markers, if needed.)
+     Because ICC profiles can be large, we extract them by default but allow
+     the user to override that behavior. */
+  if (this->saveMarkers == 2 || this->saveMarkers == 4)
+    jpeg_save_markers(dinfo, JPEG_APP0 + 2, 0xFFFF);
+  /* jpeg_read_header() calls jpeg_abort() and returns JPEG_HEADER_TABLES_ONLY
+     if the datastream is a tables-only datastream.  Since we aren't using a
+     suspending data source, the only other value it can return is
+     JPEG_HEADER_OK. */
+  if (jpeg_read_header(dinfo, FALSE) == JPEG_HEADER_TABLES_ONLY)
+    return 0;
+
+  setDecompParameters(this);
+
+  if (this->saveMarkers == 2 || this->saveMarkers == 4) {
+    if (jpeg_read_icc_profile(dinfo, &iccPtr, &iccLen)) {
+      free(this->tempICCBuf);
+      this->tempICCBuf = iccPtr;
+      this->tempICCSize = (size_t)iccLen;
+    }
+  }
+
+  jpeg_abort_decompress(dinfo);
+
+  if (this->colorspace < 0)
+    THROW("Could not determine colorspace of JPEG image");
+  if (this->jpegWidth < 1 || this->jpegHeight < 1)
+    THROW("Invalid data returned in header");
+
+bailout:
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressHeader3(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp,
+                                  int *jpegColorspace)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressHeader3";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (width == NULL || height == NULL || jpegSubsamp == NULL ||
+      jpegColorspace == NULL)
+    THROW("Invalid argument");
+
+  retval = tj3DecompressHeader(handle, jpegBuf, jpegSize);
+
+  *width = tj3Get(handle, TJPARAM_JPEGWIDTH);
+  *height = tj3Get(handle, TJPARAM_JPEGHEIGHT);
+  *jpegSubsamp = tj3Get(handle, TJPARAM_SUBSAMP);
+  if (*jpegSubsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+  *jpegColorspace = tj3Get(handle, TJPARAM_COLORSPACE);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp)
+{
+  int jpegColorspace;
+
+  return tjDecompressHeader3(handle, jpegBuf, jpegSize, width, height,
+                             jpegSubsamp, &jpegColorspace);
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
+                                 unsigned long jpegSize, int *width,
+                                 int *height)
+{
+  int jpegSubsamp;
+
+  return tjDecompressHeader2(handle, jpegBuf, jpegSize, width, height,
+                             &jpegSubsamp);
+}
+
+
+/* TurboJPEG 3.1+ */
+DLLEXPORT int tj3GetICCProfile(tjhandle handle, unsigned char **iccBuf,
+                               size_t *iccSize)
+{
+  static const char FUNCTION_NAME[] = "tj3GetICCProfile";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (iccSize == NULL)
+    THROW("Invalid argument");
+
+  if (!this->tempICCBuf || !this->tempICCSize) {
+    if (iccBuf) *iccBuf = NULL;
+    *iccSize = 0;
+    this->jerr.warning = TRUE;
+    THROW("No ICC profile data has been extracted");
+  }
+
+  *iccSize = this->tempICCSize;
+  if (iccBuf == NULL)
+    return 0;
+  *iccBuf = this->tempICCBuf;
+  this->tempICCBuf = NULL;
+  this->tempICCSize = 0;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT tjscalingfactor *tj3GetScalingFactors(int *numScalingFactors)
+{
+  static const char FUNCTION_NAME[] = "tj3GetScalingFactors";
+  tjscalingfactor *retval = (tjscalingfactor *)sf;
+
+  if (numScalingFactors == NULL)
+    THROWG("Invalid argument", NULL);
+
+  *numScalingFactors = NUMSF;
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numScalingFactors)
+{
+  return tj3GetScalingFactors(numScalingFactors);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3SetScalingFactor(tjhandle handle,
+                                  tjscalingfactor scalingFactor)
+{
+  static const char FUNCTION_NAME[] = "tj3SetScalingFactor";
+  int i, retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  for (i = 0; i < NUMSF; i++) {
+    if (scalingFactor.num == sf[i].num && scalingFactor.denom == sf[i].denom)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Unsupported scaling factor");
+
+  this->scalingFactor = scalingFactor;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3SetCroppingRegion(tjhandle handle, tjregion croppingRegion)
+{
+  static const char FUNCTION_NAME[] = "tj3SetCroppingRegion";
+  int retval = 0, scaledWidth, scaledHeight;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (croppingRegion.x == 0 && croppingRegion.y == 0 &&
+      croppingRegion.w == 0 && croppingRegion.h == 0) {
+    this->croppingRegion = croppingRegion;
+    return 0;
+  }
+
+  if (croppingRegion.x < 0 || croppingRegion.y < 0 || croppingRegion.w < 0 ||
+      croppingRegion.h < 0)
+    THROW("Invalid cropping region");
+  if (this->jpegWidth < 0 || this->jpegHeight < 0)
+    THROW("JPEG header has not yet been read");
+  if ((this->precision != 8 && this->precision != 12) || this->lossless)
+    THROW("Cannot partially decompress lossless JPEG images");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  scaledWidth = TJSCALED(this->jpegWidth, this->scalingFactor);
+  scaledHeight = TJSCALED(this->jpegHeight, this->scalingFactor);
+
+  if (croppingRegion.x %
+      TJSCALED(tjMCUWidth[this->subsamp], this->scalingFactor) != 0)
+    THROWI("The left boundary of the cropping region (%d) is not\n"
+           "divisible by the scaled iMCU width (%d)",
+           croppingRegion.x,
+           TJSCALED(tjMCUWidth[this->subsamp], this->scalingFactor));
+  if (croppingRegion.w == 0)
+    croppingRegion.w = scaledWidth - croppingRegion.x;
+  if (croppingRegion.h == 0)
+    croppingRegion.h = scaledHeight - croppingRegion.y;
+  if (croppingRegion.w <= 0 || croppingRegion.h <= 0 ||
+      croppingRegion.x + croppingRegion.w > scaledWidth ||
+      croppingRegion.y + croppingRegion.h > scaledHeight)
+    THROW("The cropping region exceeds the scaled image dimensions");
+
+  this->croppingRegion = croppingRegion;
+
+bailout:
+  return retval;
+}
+
+
+/* tj3Decompress*() is implemented in turbojpeg-mp.c */
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, unsigned char *dstBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompress2";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  if (tj3SetCroppingRegion(handle, TJUNCROPPED) == -1)
+    return -1;
+  return tj3Decompress8(handle, jpegBuf, jpegSize, dstBuf, pitch, pixelFormat);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.0+ */
+DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
+                           unsigned long jpegSize, unsigned char *dstBuf,
+                           int width, int pitch, int height, int pixelSize,
+                           int flags)
+{
+  if (flags & TJ_YUV)
+    return tjDecompressToYUV(handle, jpegBuf, jpegSize, dstBuf, flags);
+  else
+    return tjDecompress2(handle, jpegBuf, jpegSize, dstBuf, width, pitch,
+                         height, getPixelFormat(pixelSize, flags), flags);
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3DecompressToYUVPlanes8(tjhandle handle,
+                                        const unsigned char *jpegBuf,
+                                        size_t jpegSize,
+                                        unsigned char **dstPlanes,
+                                        int *strides)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressToYUVPlanes8";
+  int i, row, retval = 0;
+  int pw[MAX_COMPONENTS], ph[MAX_COMPONENTS], iw[MAX_COMPONENTS],
+    tmpbufsize = 0, usetmpbuf = 0, th[MAX_COMPONENTS];
+  JSAMPLE *_tmpbuf = NULL, *ptr;
+  JSAMPROW *outbuf[MAX_COMPONENTS], *tmpbuf[MAX_COMPONENTS];
+  int dctsize;
+  struct my_progress_mgr progress;
+
+  GET_DINSTANCE(handle);
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  outbuf[i] = NULL;
+  }
+
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || !dstPlanes || !dstPlanes[0])
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->maxPixels &&
+      (unsigned long long)this->jpegWidth * this->jpegHeight >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  if (this->subsamp != TJSAMP_GRAY && (!dstPlanes[1] || !dstPlanes[2]))
+    THROW("Invalid argument");
+
+  if (dinfo->num_components > 3)
+    THROW("JPEG image must have 3 or fewer components");
+
+  dinfo->scale_num = this->scalingFactor.num;
+  dinfo->scale_denom = this->scalingFactor.denom;
+  jpeg_calc_output_dimensions(dinfo);
+
+  dctsize = DCTSIZE * this->scalingFactor.num / this->scalingFactor.denom;
+
+  for (i = 0; i < dinfo->num_components; i++) {
+    jpeg_component_info *compptr = &dinfo->comp_info[i];
+    int ih;
+
+    iw[i] = compptr->width_in_blocks * dctsize;
+    ih = compptr->height_in_blocks * dctsize;
+    pw[i] = tj3YUVPlaneWidth(i, dinfo->output_width, this->subsamp);
+    ph[i] = tj3YUVPlaneHeight(i, dinfo->output_height, this->subsamp);
+    if (iw[i] != pw[i] || ih != ph[i]) usetmpbuf = 1;
+    th[i] = compptr->v_samp_factor * dctsize;
+    tmpbufsize += iw[i] * th[i];
+    if ((outbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i])) == NULL)
+      THROW("Memory allocation failure");
+    ptr = dstPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      outbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+  if (usetmpbuf) {
+    if ((_tmpbuf = (JSAMPLE *)MALLOC(sizeof(JSAMPLE) * tmpbufsize)) == NULL)
+      THROW("Memory allocation failure");
+    ptr = _tmpbuf;
+    for (i = 0; i < dinfo->num_components; i++) {
+      if ((tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * th[i])) == NULL)
+        THROW("Memory allocation failure");
+      for (row = 0; row < th[i]; row++) {
+        tmpbuf[i][row] = ptr;
+        ptr += iw[i];
+      }
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  dinfo->do_fancy_upsampling = !this->fastUpsample;
+  dinfo->dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+  dinfo->raw_data_out = TRUE;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  jpeg_start_decompress(dinfo);
+  for (row = 0; row < (int)dinfo->output_height;
+       row += dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size) {
+    JSAMPARRAY yuvptr[MAX_COMPONENTS];
+    int crow[MAX_COMPONENTS];
+
+    for (i = 0; i < dinfo->num_components; i++) {
+      jpeg_component_info *compptr = &dinfo->comp_info[i];
+
+      if (this->subsamp == TJSAMP_420) {
+        /* When 4:2:0 subsampling is used with IDCT scaling, libjpeg will try
+           to be clever and use the IDCT to perform upsampling on the U and V
+           planes.  For instance, if the output image is to be scaled by 1/2
+           relative to the JPEG image, then the scaling factor and upsampling
+           effectively cancel each other, so a normal 8x8 IDCT can be used.
+           However, this is not desirable when using the decompress-to-YUV
+           functionality in TurboJPEG, since we want to output the U and V
+           planes in their subsampled form.  Thus, we have to override some
+           internal libjpeg parameters to force it to use the "scaled" IDCT
+           functions on the U and V planes. */
+        compptr->_DCT_scaled_size = dctsize;
+        compptr->MCU_sample_width = tjMCUWidth[this->subsamp] *
+          this->scalingFactor.num / this->scalingFactor.denom *
+          compptr->v_samp_factor / dinfo->max_v_samp_factor;
+        dinfo->idct->inverse_DCT[i] = dinfo->idct->inverse_DCT[0];
+      }
+      crow[i] = row * compptr->v_samp_factor / dinfo->max_v_samp_factor;
+      if (usetmpbuf) yuvptr[i] = tmpbuf[i];
+      else yuvptr[i] = &outbuf[i][crow[i]];
+    }
+    jpeg_read_raw_data(dinfo, yuvptr,
+                       dinfo->max_v_samp_factor * dinfo->_min_DCT_scaled_size);
+    if (usetmpbuf) {
+      int j;
+
+      for (i = 0; i < dinfo->num_components; i++) {
+        for (j = 0; j < MIN(th[i], ph[i] - crow[i]); j++) {
+          memcpy(outbuf[i][crow[i] + j], tmpbuf[i][j], pw[i]);
+        }
+      }
+    }
+  }
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(outbuf[i]);
+  }
+  free(_tmpbuf);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
+                                      const unsigned char *jpegBuf,
+                                      unsigned long jpegSize,
+                                      unsigned char **dstPlanes, int width,
+                                      int *strides, int height, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressToYUVPlanes";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  return tj3DecompressToYUVPlanes8(handle, jpegBuf, jpegSize, dstPlanes,
+                                   strides);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3DecompressToYUV8(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize,
+                                  unsigned char *dstBuf, int align)
+{
+  static const char FUNCTION_NAME[] = "tj3DecompressToYUV8";
+  unsigned char *dstPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+  int width, height;
+
+  GET_DINSTANCE(handle);
+
+  if (jpegBuf == NULL || jpegSize <= 0 || dstBuf == NULL || align < 1 ||
+      !IS_POW2(align))
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+  }
+  setDecompParameters(this);
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("Could not determine subsampling level of JPEG image");
+
+  width = TJSCALED(dinfo->image_width, this->scalingFactor);
+  height = TJSCALED(dinfo->image_height, this->scalingFactor);
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  dstPlanes[0] = dstBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    dstPlanes[1] = dstPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    dstPlanes[1] = dstPlanes[0] + strides[0] * ph0;
+    dstPlanes[2] = dstPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3DecompressToYUVPlanes8(handle, jpegBuf, jpegSize, dstPlanes,
+                                   strides);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
+                                 unsigned long jpegSize, unsigned char *dstBuf,
+                                 int width, int align, int height, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecompressToYUV2";
+  int i, retval = 0, jpegwidth, jpegheight, scaledw, scaledh;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || width < 0 || height < 0)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+  jpeg_read_header(dinfo, TRUE);
+  jpegwidth = dinfo->image_width;  jpegheight = dinfo->image_height;
+  if (width == 0) width = jpegwidth;
+  if (height == 0) height = jpegheight;
+  for (i = 0; i < NUMSF; i++) {
+    scaledw = TJSCALED(jpegwidth, sf[i]);
+    scaledh = TJSCALED(jpegheight, sf[i]);
+    if (scaledw <= width && scaledh <= height)
+      break;
+  }
+  if (i >= NUMSF)
+    THROW("Could not scale down to desired image dimensions");
+
+  width = scaledw;  height = scaledh;
+
+  processFlags(handle, flags, DECOMPRESS);
+
+  if (tj3SetScalingFactor(handle, sf[i]) == -1)
+    return -1;
+  return tj3DecompressToYUV8(handle, jpegBuf, (size_t)jpegSize, dstBuf, align);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.1+ */
+DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
+                                unsigned long jpegSize, unsigned char *dstBuf,
+                                int flags)
+{
+  return tjDecompressToYUV2(handle, jpegBuf, jpegSize, dstBuf, 0, 4, 0, flags);
+}
+
+
+static void setDecodeDefaults(tjinstance *this, int pixelFormat)
+{
+  int i;
+
+  this->dinfo.scale_num = this->dinfo.scale_denom = 1;
+
+  if (this->subsamp == TJSAMP_GRAY) {
+    this->dinfo.num_components = this->dinfo.comps_in_scan = 1;
+    this->dinfo.jpeg_color_space = JCS_GRAYSCALE;
+  } else {
+    this->dinfo.num_components = this->dinfo.comps_in_scan = 3;
+    this->dinfo.jpeg_color_space = JCS_YCbCr;
+  }
+
+  this->dinfo.comp_info = (jpeg_component_info *)
+    (*this->dinfo.mem->alloc_small) ((j_common_ptr)&this->dinfo, JPOOL_IMAGE,
+                                     this->dinfo.num_components *
+                                     sizeof(jpeg_component_info));
+
+  for (i = 0; i < this->dinfo.num_components; i++) {
+    jpeg_component_info *compptr = &this->dinfo.comp_info[i];
+
+    compptr->h_samp_factor = (i == 0) ? tjMCUWidth[this->subsamp] / 8 : 1;
+    compptr->v_samp_factor = (i == 0) ? tjMCUHeight[this->subsamp] / 8 : 1;
+    compptr->component_index = i;
+    compptr->component_id = i + 1;
+    compptr->quant_tbl_no = compptr->dc_tbl_no =
+      compptr->ac_tbl_no = (i == 0) ? 0 : 1;
+    this->dinfo.cur_comp_info[i] = compptr;
+  }
+  this->dinfo.data_precision = 8;
+  for (i = 0; i < 2; i++) {
+    if (this->dinfo.quant_tbl_ptrs[i] == NULL)
+      this->dinfo.quant_tbl_ptrs[i] =
+        jpeg_alloc_quant_table((j_common_ptr)&this->dinfo);
+  }
+
+  this->dinfo.mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+}
+
+
+static int my_read_markers(j_decompress_ptr dinfo)
+{
+  return JPEG_REACHED_SOS;
+}
+
+static void my_reset_marker_reader(j_decompress_ptr dinfo)
+{
+}
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3DecodeYUVPlanes8(tjhandle handle,
+                                  const unsigned char * const *srcPlanes,
+                                  const int *strides, unsigned char *dstBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat)
+{
+  static const char FUNCTION_NAME[] = "tj3DecodeYUVPlanes8";
+  JSAMPROW *row_pointer = NULL;
+  JSAMPLE *_tmpbuf[MAX_COMPONENTS];
+  JSAMPROW *tmpbuf[MAX_COMPONENTS], *inbuf[MAX_COMPONENTS];
+  int i, retval = 0, row, pw0, ph0, pw[MAX_COMPONENTS], ph[MAX_COMPONENTS];
+  JSAMPLE *ptr;
+  jpeg_component_info *compptr;
+  int (*old_read_markers) (j_decompress_ptr);
+  void (*old_reset_marker_reader) (j_decompress_ptr);
+
+  GET_DINSTANCE(handle);
+
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    tmpbuf[i] = NULL;  _tmpbuf[i] = NULL;  inbuf[i] = NULL;
+  }
+
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (!srcPlanes || !srcPlanes[0] || dstBuf == NULL || width <= 0 ||
+      pitch < 0 || height <= 0 || pixelFormat < 0 || pixelFormat >= TJ_NUMPF)
+    THROW("Invalid argument");
+  if (this->subsamp != TJSAMP_GRAY && (!srcPlanes[1] || !srcPlanes[2]))
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+  if (pixelFormat == TJPF_CMYK)
+    THROW("Cannot decode YUV images into packed-pixel CMYK images.");
+
+  if (pitch == 0) pitch = width * tjPixelSize[pixelFormat];
+  dinfo->image_width = width;
+  dinfo->image_height = height;
+
+  dinfo->progressive_mode = dinfo->inputctl->has_multiple_scans = FALSE;
+  dinfo->Ss = dinfo->Ah = dinfo->Al = 0;
+  dinfo->Se = DCTSIZE2 - 1;
+  setDecodeDefaults(this, pixelFormat);
+  old_read_markers = dinfo->marker->read_markers;
+  dinfo->marker->read_markers = my_read_markers;
+  old_reset_marker_reader = dinfo->marker->reset_marker_reader;
+  dinfo->marker->reset_marker_reader = my_reset_marker_reader;
+  jpeg_read_header(dinfo, TRUE);
+  dinfo->marker->read_markers = old_read_markers;
+  dinfo->marker->reset_marker_reader = old_reset_marker_reader;
+
+  this->dinfo.out_color_space = pf2cs[pixelFormat];
+  this->dinfo.dct_method = this->fastDCT ? JDCT_FASTEST : JDCT_ISLOW;
+  dinfo->do_fancy_upsampling = FALSE;
+  dinfo->Se = DCTSIZE2 - 1;
+  jinit_master_decompress(dinfo);
+  (*dinfo->upsample->start_pass) (dinfo);
+
+  pw0 = PAD(width, dinfo->max_h_samp_factor);
+  ph0 = PAD(height, dinfo->max_v_samp_factor);
+
+  if (pitch == 0) pitch = dinfo->output_width * tjPixelSize[pixelFormat];
+
+  if ((row_pointer = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph0)) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < height; i++) {
+    if (this->bottomUp)
+      row_pointer[i] = &dstBuf[(height - i - 1) * (size_t)pitch];
+    else
+      row_pointer[i] = &dstBuf[i * (size_t)pitch];
+  }
+  if (height < ph0)
+    for (i = height; i < ph0; i++) row_pointer[i] = row_pointer[height - 1];
+
+  for (i = 0; i < dinfo->num_components; i++) {
+    compptr = &dinfo->comp_info[i];
+    _tmpbuf[i] =
+      (JSAMPLE *)malloc(PAD(compptr->width_in_blocks * DCTSIZE, 32) *
+                        compptr->v_samp_factor + 32);
+    if (!_tmpbuf[i])
+      THROW("Memory allocation failure");
+    tmpbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * compptr->v_samp_factor);
+    if (!tmpbuf[i])
+      THROW("Memory allocation failure");
+    for (row = 0; row < compptr->v_samp_factor; row++) {
+      unsigned char *_tmpbuf_aligned =
+        (unsigned char *)PAD((JUINTPTR)_tmpbuf[i], 32);
+
+      tmpbuf[i][row] =
+        &_tmpbuf_aligned[PAD(compptr->width_in_blocks * DCTSIZE, 32) * row];
+    }
+    pw[i] = pw0 * compptr->h_samp_factor / dinfo->max_h_samp_factor;
+    ph[i] = ph0 * compptr->v_samp_factor / dinfo->max_v_samp_factor;
+    inbuf[i] = (JSAMPROW *)malloc(sizeof(JSAMPROW) * ph[i]);
+    if (!inbuf[i])
+      THROW("Memory allocation failure");
+    ptr = (JSAMPLE *)srcPlanes[i];
+    for (row = 0; row < ph[i]; row++) {
+      inbuf[i][row] = ptr;
+      ptr += (strides && strides[i] != 0) ? strides[i] : pw[i];
+    }
+  }
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  for (row = 0; row < ph0; row += dinfo->max_v_samp_factor) {
+    JDIMENSION inrow = 0, outrow = 0;
+
+    for (i = 0, compptr = dinfo->comp_info; i < dinfo->num_components;
+         i++, compptr++)
+      jcopy_sample_rows(inbuf[i],
+        row * compptr->v_samp_factor / dinfo->max_v_samp_factor, tmpbuf[i], 0,
+        compptr->v_samp_factor, pw[i]);
+    (dinfo->upsample->upsample) (dinfo, tmpbuf, &inrow,
+                                 dinfo->max_v_samp_factor, &row_pointer[row],
+                                 &outrow, dinfo->max_v_samp_factor);
+  }
+  jpeg_abort_decompress(dinfo);
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(row_pointer);
+  for (i = 0; i < MAX_COMPONENTS; i++) {
+    free(tmpbuf[i]);
+    free(_tmpbuf[i]);
+    free(inbuf[i]);
+  }
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
+                                const unsigned char **srcPlanes,
+                                const int *strides, int subsamp,
+                                unsigned char *dstBuf, int width, int pitch,
+                                int height, int pixelFormat, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecodeYUVPlanes";
+  int retval = 0;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, DECOMPRESS);
+
+  return tj3DecodeYUVPlanes8(handle, srcPlanes, strides, dstBuf, width, pitch,
+                             height, pixelFormat);
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3DecodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int align, unsigned char *dstBuf, int width,
+                            int pitch, int height, int pixelFormat)
+{
+  static const char FUNCTION_NAME[] = "tj3DecodeYUV8";
+  const unsigned char *srcPlanes[3];
+  int pw0, ph0, strides[3], retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (srcBuf == NULL || align < 1 || !IS_POW2(align) || width <= 0 ||
+      height <= 0)
+    THROW("Invalid argument");
+
+  if (this->subsamp == TJSAMP_UNKNOWN)
+    THROW("TJPARAM_SUBSAMP must be specified");
+
+  pw0 = tj3YUVPlaneWidth(0, width, this->subsamp);
+  ph0 = tj3YUVPlaneHeight(0, height, this->subsamp);
+  srcPlanes[0] = srcBuf;
+  strides[0] = PAD(pw0, align);
+  if (this->subsamp == TJSAMP_GRAY) {
+    strides[1] = strides[2] = 0;
+    srcPlanes[1] = srcPlanes[2] = NULL;
+  } else {
+    int pw1 = tj3YUVPlaneWidth(1, width, this->subsamp);
+    int ph1 = tj3YUVPlaneHeight(1, height, this->subsamp);
+
+    strides[1] = strides[2] = PAD(pw1, align);
+    if ((unsigned long long)strides[0] * (unsigned long long)ph0 >
+        (unsigned long long)INT_MAX ||
+        (unsigned long long)strides[1] * (unsigned long long)ph1 >
+        (unsigned long long)INT_MAX)
+      THROW("Image or row alignment is too large");
+    srcPlanes[1] = srcPlanes[0] + strides[0] * ph0;
+    srcPlanes[2] = srcPlanes[1] + strides[1] * ph1;
+  }
+
+  return tj3DecodeYUVPlanes8(handle, srcPlanes, strides, dstBuf, width, pitch,
+                             height, pixelFormat);
+
+bailout:
+  return retval;
+}
+
+/* TurboJPEG 1.4+ */
+DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags)
+{
+  static const char FUNCTION_NAME[] = "tjDecodeYUV";
+  int retval = -1;
+
+  GET_TJINSTANCE(handle, -1);
+
+  if (subsamp < 0 || subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  this->subsamp = subsamp;
+  processFlags(handle, flags, DECOMPRESS);
+
+  return tj3DecodeYUV8(handle, srcBuf, align, dstBuf, width, pitch, height,
+                       pixelFormat);
+
+bailout:
+  return retval;
+}
+
+
+/******************************** Transformer ********************************/
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT tjhandle tjInitTransform(void)
+{
+  return tj3Init(TJINIT_TRANSFORM);
+}
+
+
+static int getDstSubsamp(int srcSubsamp, const tjtransform *transform)
+{
+  int dstSubsamp;
+
+  if (!transform)
+    return srcSubsamp;
+
+  dstSubsamp = (transform->options & TJXOPT_GRAY) ? TJSAMP_GRAY : srcSubsamp;
+
+  if (transform->op == TJXOP_TRANSPOSE || transform->op == TJXOP_TRANSVERSE ||
+      transform->op == TJXOP_ROT90 || transform->op == TJXOP_ROT270) {
+    if (dstSubsamp == TJSAMP_422) dstSubsamp = TJSAMP_440;
+    else if (dstSubsamp == TJSAMP_440) dstSubsamp = TJSAMP_422;
+    else if (dstSubsamp == TJSAMP_411) dstSubsamp = TJSAMP_441;
+    else if (dstSubsamp == TJSAMP_441) dstSubsamp = TJSAMP_411;
+  }
+
+  return dstSubsamp;
+}
+
+static int getTransformedSpecs(tjhandle handle, int *width, int *height,
+                               int *subsamp, const tjtransform *transform,
+                               const char *FUNCTION_NAME)
+{
+  int retval = 0, dstWidth, dstHeight, dstSubsamp;
+
+  GET_TJINSTANCE(handle, -1);
+  if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for transformation");
+
+  if (!width || !height || !subsamp || !transform || *width < 1 ||
+      *height < 1 || *subsamp < TJSAMP_UNKNOWN || *subsamp >= TJ_NUMSAMP)
+    THROW("Invalid argument");
+
+  dstWidth = *width;  dstHeight = *height;
+  if (transform->op == TJXOP_TRANSPOSE || transform->op == TJXOP_TRANSVERSE ||
+      transform->op == TJXOP_ROT90 || transform->op == TJXOP_ROT270) {
+    dstWidth = *height;  dstHeight = *width;
+  }
+  dstSubsamp = getDstSubsamp(*subsamp, transform);
+
+  if (transform->options & TJXOPT_CROP) {
+    int croppedWidth, croppedHeight;
+
+    if (transform->r.x < 0 || transform->r.y < 0 || transform->r.w < 0 ||
+        transform->r.h < 0)
+      THROW("Invalid cropping region");
+    if (dstSubsamp == TJSAMP_UNKNOWN)
+      THROW("Could not determine subsampling level of JPEG image");
+    if ((transform->r.x % tjMCUWidth[dstSubsamp]) != 0 ||
+        (transform->r.y % tjMCUHeight[dstSubsamp]) != 0)
+      THROWI("To crop this JPEG image, x must be a multiple of %d\n"
+             "and y must be a multiple of %d.", tjMCUWidth[dstSubsamp],
+             tjMCUHeight[dstSubsamp]);
+    if (transform->r.x >= dstWidth || transform->r.y >= dstHeight)
+      THROW("The cropping region exceeds the destination image dimensions");
+    croppedWidth = transform->r.w == 0 ? dstWidth - transform->r.x :
+                                         transform->r.w;
+    croppedHeight = transform->r.h == 0 ? dstHeight - transform->r.y :
+                                          transform->r.h;
+    if (transform->r.x + croppedWidth > dstWidth ||
+        transform->r.y + croppedHeight > dstHeight)
+      THROW("The cropping region exceeds the destination image dimensions");
+    dstWidth = croppedWidth;  dstHeight = croppedHeight;
+  }
+
+  *width = dstWidth;  *height = dstHeight;  *subsamp = dstSubsamp;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.1+ */
+DLLEXPORT size_t tj3TransformBufSize(tjhandle handle,
+                                     const tjtransform *transform)
+{
+  static const char FUNCTION_NAME[] = "tj3TransformBufSize";
+  size_t retval = 0;
+  int dstWidth, dstHeight, dstSubsamp;
+
+  GET_TJINSTANCE(handle, 0);
+  if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0)
+    THROWRV("Instance has not been initialized for transformation", 0);
+
+  if (transform == NULL)
+    THROWRV("Invalid argument", 0)
+
+  if (this->jpegWidth < 0 || this->jpegHeight < 0)
+    THROWRV("JPEG header has not yet been read", 0);
+
+  dstWidth = this->jpegWidth;
+  dstHeight = this->jpegHeight;
+  dstSubsamp = this->subsamp;
+  if (getTransformedSpecs(handle, &dstWidth, &dstHeight, &dstSubsamp,
+                          transform, FUNCTION_NAME) == -1) {
+    retval = 0;
+    goto bailout;
+  }
+
+  retval = tj3JPEGBufSize(dstWidth, dstHeight, dstSubsamp);
+  if ((this->saveMarkers == 2 || this->saveMarkers == 4) &&
+      !(transform->options & TJXOPT_COPYNONE))
+    retval += this->tempICCSize;
+  else
+    retval += this->iccSize;
+
+bailout:
+  return retval;
+}
+
+
+/* TurboJPEG 3.0+ */
+DLLEXPORT int tj3Transform(tjhandle handle, const unsigned char *jpegBuf,
+                           size_t jpegSize, int n, unsigned char **dstBufs,
+                           size_t *dstSizes, const tjtransform *t)
+{
+  static const char FUNCTION_NAME[] = "tj3Transform";
+  jpeg_transform_info *xinfo = NULL;
+  jvirt_barray_ptr *srccoefs, *dstcoefs;
+  int retval = 0, i, saveMarkers = 0, srcSubsamp;
+  boolean alloc = TRUE;
+  struct my_progress_mgr progress;
+
+  GET_INSTANCE(handle);
+  if ((this->init & COMPRESS) == 0 || (this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for transformation");
+
+  if (jpegBuf == NULL || jpegSize <= 0 || n < 1 || dstBufs == NULL ||
+      dstSizes == NULL || t == NULL)
+    THROW("Invalid argument");
+
+  if (this->scanLimit) {
+    memset(&progress, 0, sizeof(struct my_progress_mgr));
+    progress.pub.progress_monitor = my_progress_monitor;
+    progress.this = this;
+    dinfo->progress = &progress.pub;
+  } else
+    dinfo->progress = NULL;
+
+  dinfo->mem->max_memory_to_use = (long)this->maxMemory * 1048576L;
+
+  if ((xinfo =
+       (jpeg_transform_info *)malloc(sizeof(jpeg_transform_info) * n)) == NULL)
+    THROW("Memory allocation failure");
+  memset(xinfo, 0, sizeof(jpeg_transform_info) * n);
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  if (dinfo->global_state <= DSTATE_INHEADER)
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+
+  for (i = 0; i < n; i++) {
+    if (t[i].op < 0 || t[i].op >= TJ_NUMXOP)
+      THROW("Invalid transform operation");
+    xinfo[i].transform = xformtypes[t[i].op];
+    xinfo[i].perfect = (t[i].options & TJXOPT_PERFECT) ? 1 : 0;
+    xinfo[i].trim = (t[i].options & TJXOPT_TRIM) ? 1 : 0;
+    xinfo[i].force_grayscale = (t[i].options & TJXOPT_GRAY) ? 1 : 0;
+    xinfo[i].crop = (t[i].options & TJXOPT_CROP) ? 1 : 0;
+    if (n != 1 && t[i].op == TJXOP_HFLIP) xinfo[i].slow_hflip = 1;
+    else xinfo[i].slow_hflip = 0;
+
+    if (xinfo[i].crop) {
+      if (t[i].r.x < 0 || t[i].r.y < 0 || t[i].r.w < 0 || t[i].r.h < 0)
+        THROW("Invalid cropping region");
+      xinfo[i].crop_xoffset = t[i].r.x;  xinfo[i].crop_xoffset_set = JCROP_POS;
+      xinfo[i].crop_yoffset = t[i].r.y;  xinfo[i].crop_yoffset_set = JCROP_POS;
+      if (t[i].r.w != 0) {
+        xinfo[i].crop_width = t[i].r.w;  xinfo[i].crop_width_set = JCROP_POS;
+      } else
+        xinfo[i].crop_width = JCROP_UNSET;
+      if (t[i].r.h != 0) {
+        xinfo[i].crop_height = t[i].r.h;  xinfo[i].crop_height_set = JCROP_POS;
+      } else
+        xinfo[i].crop_height = JCROP_UNSET;
+    }
+    if (!(t[i].options & TJXOPT_COPYNONE)) saveMarkers = 1;
+  }
+
+  jcopy_markers_setup(dinfo, saveMarkers ?
+                             (JCOPY_OPTION)this->saveMarkers : JCOPYOPT_NONE);
+  if (dinfo->global_state <= DSTATE_INHEADER)
+    jpeg_read_header(dinfo, TRUE);
+  if (this->maxPixels &&
+      (unsigned long long)dinfo->image_width * dinfo->image_height >
+      (unsigned long long)this->maxPixels)
+    THROW("Image is too large");
+  srcSubsamp = getSubsamp(&this->dinfo);
+
+  for (i = 0; i < n; i++) {
+    if (!jtransform_request_workspace(dinfo, &xinfo[i]))
+      THROW("Transform is not perfect");
+
+    if (xinfo[i].crop) {
+      int dstSubsamp = getDstSubsamp(srcSubsamp, &t[i]);
+
+      if (dstSubsamp == TJSAMP_UNKNOWN)
+        THROW("Could not determine subsampling level of destination image");
+      if ((t[i].r.x % tjMCUWidth[dstSubsamp]) != 0 ||
+          (t[i].r.y % tjMCUHeight[dstSubsamp]) != 0)
+        THROWI("To crop this JPEG image, x must be a multiple of %d\n"
+               "and y must be a multiple of %d.", tjMCUWidth[dstSubsamp],
+               tjMCUHeight[dstSubsamp]);
+    }
+  }
+
+  srccoefs = jpeg_read_coefficients(dinfo);
+
+  for (i = 0; i < n; i++) {
+    if (this->noRealloc) alloc = FALSE;
+    if (!(t[i].options & TJXOPT_NOOUTPUT))
+      jpeg_mem_dest_tj(cinfo, &dstBufs[i], &dstSizes[i], alloc);
+    jpeg_copy_critical_parameters(dinfo, cinfo);
+    dstcoefs = jtransform_adjust_parameters(dinfo, cinfo, srccoefs, &xinfo[i]);
+    if (this->optimize || t[i].options & TJXOPT_OPTIMIZE)
+      cinfo->optimize_coding = TRUE;
+#ifdef C_PROGRESSIVE_SUPPORTED
+    if (this->progressive || t[i].options & TJXOPT_PROGRESSIVE)
+      jpeg_simple_progression(cinfo);
+#endif
+    if (this->arithmetic || t[i].options & TJXOPT_ARITHMETIC) {
+      cinfo->arith_code = TRUE;
+      cinfo->optimize_coding = FALSE;
+    }
+    cinfo->restart_interval = this->restartIntervalBlocks;
+    cinfo->restart_in_rows = this->restartIntervalRows;
+    if (!(t[i].options & TJXOPT_NOOUTPUT)) {
+      jpeg_write_coefficients(cinfo, dstcoefs);
+      jcopy_markers_execute(dinfo, cinfo, t[i].options & TJXOPT_COPYNONE ?
+                                          JCOPYOPT_NONE :
+                                          (JCOPY_OPTION)this->saveMarkers);
+      if (this->iccBuf != NULL && this->iccSize != 0)
+        jpeg_write_icc_profile(cinfo, this->iccBuf,
+                               (unsigned int)this->iccSize);
+    } else
+      jinit_c_master_control(cinfo, TRUE);
+    jtransform_execute_transformation(dinfo, cinfo, srccoefs, &xinfo[i]);
+    if (t[i].customFilter) {
+      int ci, y;
+      JDIMENSION by;
+
+      for (ci = 0; ci < cinfo->num_components; ci++) {
+        jpeg_component_info *compptr = &cinfo->comp_info[ci];
+        tjregion arrayRegion = { 0, 0, 0, 0 };
+        tjregion planeRegion = { 0, 0, 0, 0 };
+
+        arrayRegion.w = compptr->width_in_blocks * DCTSIZE;
+        arrayRegion.h = DCTSIZE;
+        planeRegion.w = compptr->width_in_blocks * DCTSIZE;
+        planeRegion.h = compptr->height_in_blocks * DCTSIZE;
+
+        for (by = 0; by < compptr->height_in_blocks;
+             by += compptr->v_samp_factor) {
+          JBLOCKARRAY barray = (dinfo->mem->access_virt_barray)
+            ((j_common_ptr)dinfo, dstcoefs[ci], by, compptr->v_samp_factor,
+             TRUE);
+
+          for (y = 0; y < compptr->v_samp_factor; y++) {
+            if (t[i].customFilter(barray[y][0], arrayRegion, planeRegion, ci,
+                                  i, (tjtransform *)&t[i]) == -1)
+              THROW("Error in custom filter");
+            arrayRegion.y += DCTSIZE;
+          }
+        }
+      }
+    }
+    if (!(t[i].options & TJXOPT_NOOUTPUT)) jpeg_finish_compress(cinfo);
+  }
+
+  jpeg_finish_decompress(dinfo);
+
+bailout:
+  if (cinfo->global_state > CSTATE_START) {
+    if (alloc) (*cinfo->dest->term_destination) (cinfo);
+    jpeg_abort_compress(cinfo);
+  }
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  free(xinfo);
+  if (this->jerr.warning) retval = -1;
+  return retval;
+}
+
+/* TurboJPEG 1.2+ */
+DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
+                          unsigned long jpegSize, int n,
+                          unsigned char **dstBufs, unsigned long *dstSizes,
+                          tjtransform *t, int flags)
+{
+  static const char FUNCTION_NAME[] = "tjTransform";
+  int i, retval = 0, srcSubsamp = -1;
+  size_t *sizes = NULL;
+
+  GET_DINSTANCE(handle);
+  if ((this->init & DECOMPRESS) == 0)
+    THROW("Instance has not been initialized for decompression");
+
+  if (n < 1 || dstSizes == NULL)
+    THROW("Invalid argument");
+
+  if (setjmp(this->jerr.setjmp_buffer)) {
+    /* If we get here, the JPEG code has signaled an error. */
+    retval = -1;  goto bailout;
+  }
+
+  processFlags(handle, flags, COMPRESS);
+
+  if (this->noRealloc) {
+    jpeg_mem_src_tj(dinfo, jpegBuf, jpegSize);
+    jpeg_read_header(dinfo, TRUE);
+    srcSubsamp = getSubsamp(dinfo);
+  }
+
+  if ((sizes = (size_t *)malloc(n * sizeof(size_t))) == NULL)
+    THROW("Memory allocation failure");
+  for (i = 0; i < n; i++) {
+    sizes[i] = (size_t)dstSizes[i];
+    if (this->noRealloc) {
+      int dstWidth = dinfo->image_width, dstHeight = dinfo->image_height;
+      int dstSubsamp = srcSubsamp;
+
+      if (getTransformedSpecs(handle, &dstWidth, &dstHeight, &dstSubsamp,
+                              &t[i], FUNCTION_NAME) == -1) {
+        retval = -1;
+        goto bailout;
+      }
+      sizes[i] = tj3JPEGBufSize(dstWidth, dstHeight, dstSubsamp);
+    }
+  }
+  retval = tj3Transform(handle, jpegBuf, (size_t)jpegSize, n, dstBufs, sizes,
+                        t);
+  for (i = 0; i < n; i++)
+    dstSizes[i] = (unsigned long)sizes[i];
+
+bailout:
+  if (dinfo->global_state > DSTATE_START) jpeg_abort_decompress(dinfo);
+  if (this->jerr.warning) retval = -1;
+  free(sizes);
+  return retval;
+}
diff --git a/thirdparty/libjpeg-turbo/src/turbojpeg.h b/thirdparty/libjpeg-turbo/src/turbojpeg.h
new file mode 100644
index 00000000000..274eb7a3644
--- /dev/null
+++ b/thirdparty/libjpeg-turbo/src/turbojpeg.h
@@ -0,0 +1,2809 @@
+/*
+ * Copyright (C)2009-2015, 2017, 2020-2024 D. R. Commander.
+ *                                         All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright notice,
+ *   this list of conditions and the following disclaimer.
+ * - Redistributions in binary form must reproduce the above copyright notice,
+ *   this list of conditions and the following disclaimer in the documentation
+ *   and/or other materials provided with the distribution.
+ * - Neither the name of the libjpeg-turbo Project nor the names of its
+ *   contributors may be used to endorse or promote products derived from this
+ *   software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS",
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __TURBOJPEG_H__
+#define __TURBOJPEG_H__
+
+#include <stddef.h>
+
+#if defined(_WIN32) && defined(DLLDEFINE)
+#define DLLEXPORT  __declspec(dllexport)
+#else
+#define DLLEXPORT
+#endif
+#define DLLCALL
+
+
+/**
+ * @addtogroup TurboJPEG
+ * TurboJPEG API.  This API provides an interface for generating, decoding, and
+ * transforming planar YUV and JPEG images in memory.
+ *
+ * @anchor YUVnotes
+ * YUV Image Format Notes
+ * ----------------------
+ * Technically, the JPEG format uses the YCbCr colorspace (which is technically
+ * not a colorspace but a color transform), but per the convention of the
+ * digital video community, the TurboJPEG API uses "YUV" to refer to an image
+ * format consisting of Y, Cb, and Cr image planes.
+ *
+ * Each plane is simply a 2D array of bytes, each byte representing the value
+ * of one of the components (Y, Cb, or Cr) at a particular location in the
+ * image.  The width and height of each plane are determined by the image
+ * width, height, and level of chrominance subsampling.  The luminance plane
+ * width is the image width padded to the nearest multiple of the horizontal
+ * subsampling factor (1 in the case of 4:4:4, grayscale, 4:4:0, or 4:4:1; 2 in
+ * the case of 4:2:2 or 4:2:0; 4 in the case of 4:1:1.)  Similarly, the
+ * luminance plane height is the image height padded to the nearest multiple of
+ * the vertical subsampling factor (1 in the case of 4:4:4, 4:2:2, grayscale,
+ * or 4:1:1; 2 in the case of 4:2:0 or 4:4:0; 4 in the case of 4:4:1.)  This is
+ * irrespective of any additional padding that may be specified as an argument
+ * to the various YUV functions.  The chrominance plane width is equal to the
+ * luminance plane width divided by the horizontal subsampling factor, and the
+ * chrominance plane height is equal to the luminance plane height divided by
+ * the vertical subsampling factor.
+ *
+ * For example, if the source image is 35 x 35 pixels and 4:2:2 subsampling is
+ * used, then the luminance plane would be 36 x 35 bytes, and each of the
+ * chrominance planes would be 18 x 35 bytes.  If you specify a row alignment
+ * of 4 bytes on top of this, then the luminance plane would be 36 x 35 bytes,
+ * and each of the chrominance planes would be 20 x 35 bytes.
+ *
+ * @{
+ */
+
+
+/**
+ * The number of initialization options
+ */
+#define TJ_NUMINIT  3
+
+/**
+ * Initialization options
+ */
+enum TJINIT {
+  /**
+   * Initialize the TurboJPEG instance for compression.
+   */
+  TJINIT_COMPRESS,
+  /**
+   * Initialize the TurboJPEG instance for decompression.
+   */
+  TJINIT_DECOMPRESS,
+  /**
+   * Initialize the TurboJPEG instance for lossless transformation (both
+   * compression and decompression.)
+   */
+  TJINIT_TRANSFORM
+};
+
+
+/**
+ * The number of chrominance subsampling options
+ */
+#define TJ_NUMSAMP  7
+
+/**
+ * Chrominance subsampling options
+ *
+ * When pixels are converted from RGB to YCbCr (see #TJCS_YCbCr) or from CMYK
+ * to YCCK (see #TJCS_YCCK) as part of the JPEG compression process, some of
+ * the Cb and Cr (chrominance) components can be discarded or averaged together
+ * to produce a smaller image with little perceptible loss of image quality.
+ * (The human eye is more sensitive to small changes in brightness than to
+ * small changes in color.)  This is called "chrominance subsampling".
+ */
+enum TJSAMP {
+  /**
+   * 4:4:4 chrominance subsampling (no chrominance subsampling)
+   *
+   * The JPEG or YUV image will contain one chrominance component for every
+   * pixel in the source image.
+   */
+  TJSAMP_444,
+  /**
+   * 4:2:2 chrominance subsampling
+   *
+   * The JPEG or YUV image will contain one chrominance component for every 2x1
+   * block of pixels in the source image.
+   */
+  TJSAMP_422,
+  /**
+   * 4:2:0 chrominance subsampling
+   *
+   * The JPEG or YUV image will contain one chrominance component for every 2x2
+   * block of pixels in the source image.
+   */
+  TJSAMP_420,
+  /**
+   * Grayscale
+   *
+   * The JPEG or YUV image will contain no chrominance components.
+   */
+  TJSAMP_GRAY,
+  /**
+   * 4:4:0 chrominance subsampling
+   *
+   * The JPEG or YUV image will contain one chrominance component for every 1x2
+   * block of pixels in the source image.
+   *
+   * @note 4:4:0 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_440,
+  /**
+   * 4:1:1 chrominance subsampling
+   *
+   * The JPEG or YUV image will contain one chrominance component for every 4x1
+   * block of pixels in the source image.  All else being equal, a JPEG image
+   * with 4:1:1 subsampling is almost exactly the same size as a JPEG image
+   * with 4:2:0 subsampling, and in the aggregate, both subsampling methods
+   * produce approximately the same perceptual quality.  However, 4:1:1 is
+   * better able to reproduce sharp horizontal features.
+   *
+   * @note 4:1:1 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_411,
+  /**
+   * 4:4:1 chrominance subsampling
+   *
+   * The JPEG or YUV image will contain one chrominance component for every 1x4
+   * block of pixels in the source image.  All else being equal, a JPEG image
+   * with 4:4:1 subsampling is almost exactly the same size as a JPEG image
+   * with 4:2:0 subsampling, and in the aggregate, both subsampling methods
+   * produce approximately the same perceptual quality.  However, 4:4:1 is
+   * better able to reproduce sharp vertical features.
+   *
+   * @note 4:4:1 subsampling is not fully accelerated in libjpeg-turbo.
+   */
+  TJSAMP_441,
+  /**
+   * Unknown subsampling
+   *
+   * The JPEG image uses an unusual type of chrominance subsampling.  Such
+   * images can be decompressed into packed-pixel images, but they cannot be
+   * - decompressed into planar YUV images,
+   * - losslessly transformed if #TJXOPT_CROP is specified and #TJXOPT_GRAY is
+   * not specified, or
+   * - partially decompressed using a cropping region.
+   */
+  TJSAMP_UNKNOWN = -1
+};
+
+/**
+ * iMCU width (in pixels) for a given level of chrominance subsampling
+ *
+ * In a typical lossy JPEG image, 8x8 blocks of DCT coefficients for each
+ * component are interleaved in a single scan.  If the image uses chrominance
+ * subsampling, then multiple luminance blocks are stored together, followed by
+ * a single block for each chrominance component.  The minimum set of
+ * full-resolution luminance block(s) and corresponding (possibly subsampled)
+ * chrominance blocks necessary to represent at least one DCT block per
+ * component is called a "Minimum Coded Unit" or "MCU".  (For example, an MCU
+ * in an interleaved lossy JPEG image that uses 4:2:2 subsampling consists of
+ * two luminance blocks followed by one block for each chrominance component.)
+ * In a non-interleaved lossy JPEG image, each component is stored in a
+ * separate scan, and an MCU is a single DCT block, so we use the term "iMCU"
+ * (interleaved MCU) to refer to the equivalent of an MCU in an interleaved
+ * JPEG image.  For the common case of interleaved JPEG images, an iMCU is the
+ * same as an MCU.
+ *
+ * iMCU sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ * - 8x32 for 4:4:1
+ */
+static const int tjMCUWidth[TJ_NUMSAMP]  = { 8, 16, 16, 8, 8, 32, 8 };
+
+/**
+ * iMCU height (in pixels) for a given level of chrominance subsampling
+ *
+ * In a typical lossy JPEG image, 8x8 blocks of DCT coefficients for each
+ * component are interleaved in a single scan.  If the image uses chrominance
+ * subsampling, then multiple luminance blocks are stored together, followed by
+ * a single block for each chrominance component.  The minimum set of
+ * full-resolution luminance block(s) and corresponding (possibly subsampled)
+ * chrominance blocks necessary to represent at least one DCT block per
+ * component is called a "Minimum Coded Unit" or "MCU".  (For example, an MCU
+ * in an interleaved lossy JPEG image that uses 4:2:2 subsampling consists of
+ * two luminance blocks followed by one block for each chrominance component.)
+ * In a non-interleaved lossy JPEG image, each component is stored in a
+ * separate scan, and an MCU is a single DCT block, so we use the term "iMCU"
+ * (interleaved MCU) to refer to the equivalent of an MCU in an interleaved
+ * JPEG image.  For the common case of interleaved JPEG images, an iMCU is the
+ * same as an MCU.
+ *
+ * iMCU sizes:
+ * - 8x8 for no subsampling or grayscale
+ * - 16x8 for 4:2:2
+ * - 8x16 for 4:4:0
+ * - 16x16 for 4:2:0
+ * - 32x8 for 4:1:1
+ * - 8x32 for 4:4:1
+ */
+static const int tjMCUHeight[TJ_NUMSAMP] = { 8, 8, 16, 8, 16, 8, 32 };
+
+
+/**
+ * The number of pixel formats
+ */
+#define TJ_NUMPF  12
+
+/**
+ * Pixel formats
+ */
+enum TJPF {
+  /**
+   * RGB pixel format
+   *
+   * The red, green, and blue components in the image are stored in 3-sample
+   * pixels in the order R, G, B from lowest to highest memory address within
+   * each pixel.
+   */
+  TJPF_RGB,
+  /**
+   * BGR pixel format
+   *
+   * The red, green, and blue components in the image are stored in 3-sample
+   * pixels in the order B, G, R from lowest to highest memory address within
+   * each pixel.
+   */
+  TJPF_BGR,
+  /**
+   * RGBX pixel format
+   *
+   * The red, green, and blue components in the image are stored in 4-sample
+   * pixels in the order R, G, B from lowest to highest memory address within
+   * each pixel.  The X component is ignored when compressing/encoding and
+   * undefined when decompressing/decoding.
+   */
+  TJPF_RGBX,
+  /**
+   * BGRX pixel format
+   *
+   * The red, green, and blue components in the image are stored in 4-sample
+   * pixels in the order B, G, R from lowest to highest memory address within
+   * each pixel.  The X component is ignored when compressing/encoding and
+   * undefined when decompressing/decoding.
+   */
+  TJPF_BGRX,
+  /**
+   * XBGR pixel format
+   *
+   * The red, green, and blue components in the image are stored in 4-sample
+   * pixels in the order R, G, B from highest to lowest memory address within
+   * each pixel.  The X component is ignored when compressing/encoding and
+   * undefined when decompressing/decoding.
+   */
+  TJPF_XBGR,
+  /**
+   * XRGB pixel format
+   *
+   * The red, green, and blue components in the image are stored in 4-sample
+   * pixels in the order B, G, R from highest to lowest memory address within
+   * each pixel.  The X component is ignored when compressing/encoding and
+   * undefined when decompressing/decoding.
+   */
+  TJPF_XRGB,
+  /**
+   * Grayscale pixel format
+   *
+   * Each 1-sample pixel represents a luminance (brightness) level from 0 to
+   * the maximum sample value (which is, for instance, 255 for 8-bit samples or
+   * 4095 for 12-bit samples or 65535 for 16-bit samples.)
+   */
+  TJPF_GRAY,
+  /**
+   * RGBA pixel format
+   *
+   * This is the same as @ref TJPF_RGBX, except that when
+   * decompressing/decoding, the X component is guaranteed to be equal to the
+   * maximum sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_RGBA,
+  /**
+   * BGRA pixel format
+   *
+   * This is the same as @ref TJPF_BGRX, except that when
+   * decompressing/decoding, the X component is guaranteed to be equal to the
+   * maximum sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_BGRA,
+  /**
+   * ABGR pixel format
+   *
+   * This is the same as @ref TJPF_XBGR, except that when
+   * decompressing/decoding, the X component is guaranteed to be equal to the
+   * maximum sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_ABGR,
+  /**
+   * ARGB pixel format
+   *
+   * This is the same as @ref TJPF_XRGB, except that when
+   * decompressing/decoding, the X component is guaranteed to be equal to the
+   * maximum sample value, which can be interpreted as an opaque alpha channel.
+   */
+  TJPF_ARGB,
+  /**
+   * CMYK pixel format
+   *
+   * Unlike RGB, which is an additive color model used primarily for display,
+   * CMYK (Cyan/Magenta/Yellow/Key) is a subtractive color model used primarily
+   * for printing.  In the CMYK color model, the value of each color component
+   * typically corresponds to an amount of cyan, magenta, yellow, or black ink
+   * that is applied to a white background.  In order to convert between CMYK
+   * and RGB, it is necessary to use a color management system (CMS.)  A CMS
+   * will attempt to map colors within the printer's gamut to perceptually
+   * similar colors in the display's gamut and vice versa, but the mapping is
+   * typically not 1:1 or reversible, nor can it be defined with a simple
+   * formula.  Thus, such a conversion is out of scope for a codec library.
+   * However, the TurboJPEG API allows for compressing packed-pixel CMYK images
+   * into YCCK JPEG images (see #TJCS_YCCK) and decompressing YCCK JPEG images
+   * into packed-pixel CMYK images.
+   */
+  TJPF_CMYK,
+  /**
+   * Unknown pixel format
+   *
+   * Currently this is only used by #tj3LoadImage8(), #tj3LoadImage12(), and
+   * #tj3LoadImage16().
+   */
+  TJPF_UNKNOWN = -1
+};
+
+/**
+ * Red offset (in samples) for a given pixel format
+ *
+ * This specifies the number of samples that the red component is offset from
+ * the start of the pixel.  For instance, if an 8-bit-per-component pixel of
+ * format TJPF_BGRX is stored in `unsigned char pixel[]`, then the red
+ * component is `pixel[tjRedOffset[TJPF_BGRX]]`.  The offset is -1 if the pixel
+ * format does not have a red component.
+ */
+static const int tjRedOffset[TJ_NUMPF] = {
+  0, 2, 0, 2, 3, 1, -1, 0, 2, 3, 1, -1
+};
+/**
+ * Green offset (in samples) for a given pixel format
+ *
+ * This specifies the number of samples that the green component is offset from
+ * the start of the pixel.  For instance, if an 8-bit-per-component pixel of
+ * format TJPF_BGRX is stored in `unsigned char pixel[]`, then the green
+ * component is `pixel[tjGreenOffset[TJPF_BGRX]]`.  The offset is -1 if the
+ * pixel format does not have a green component.
+ */
+static const int tjGreenOffset[TJ_NUMPF] = {
+  1, 1, 1, 1, 2, 2, -1, 1, 1, 2, 2, -1
+};
+/**
+ * Blue offset (in samples) for a given pixel format
+ *
+ * This specifies the number of samples that the blue component is offset from
+ * the start of the pixel.  For instance, if an 8-bit-per-component pixel of
+ * format TJPF_BGRX is stored in `unsigned char pixel[]`, then the blue
+ * component is `pixel[tjBlueOffset[TJPF_BGRX]]`.  The offset is -1 if the
+ * pixel format does not have a blue component.
+ */
+static const int tjBlueOffset[TJ_NUMPF] = {
+  2, 0, 2, 0, 1, 3, -1, 2, 0, 1, 3, -1
+};
+/**
+ * Alpha offset (in samples) for a given pixel format
+ *
+ * This specifies the number of samples that the alpha component is offset from
+ * the start of the pixel.  For instance, if an 8-bit-per-component pixel of
+ * format TJPF_BGRA is stored in `unsigned char pixel[]`, then the alpha
+ * component is `pixel[tjAlphaOffset[TJPF_BGRA]]`.  The offset is -1 if the
+ * pixel format does not have an alpha component.
+ */
+static const int tjAlphaOffset[TJ_NUMPF] = {
+  -1, -1, -1, -1, -1, -1, -1, 3, 3, 0, 0, -1
+};
+/**
+ * Pixel size (in samples) for a given pixel format
+ */
+static const int tjPixelSize[TJ_NUMPF] = {
+  3, 3, 4, 4, 4, 4, 1, 4, 4, 4, 4, 4
+};
+
+
+/**
+ * The number of JPEG colorspaces
+ */
+#define TJ_NUMCS  5
+
+/**
+ * JPEG colorspaces
+ */
+enum TJCS {
+  /**
+   * RGB colorspace
+   *
+   * When generating the JPEG image, the R, G, and B components in the source
+   * image are reordered into image planes, but no colorspace conversion or
+   * subsampling is performed.  RGB JPEG images can be generated from and
+   * decompressed to packed-pixel images with any of the extended RGB or
+   * grayscale pixel formats, but they cannot be generated from or
+   * decompressed to planar YUV images.
+   */
+  TJCS_RGB,
+  /**
+   * YCbCr colorspace
+   *
+   * YCbCr is not an absolute colorspace but rather a mathematical
+   * transformation of RGB designed solely for storage and transmission.  YCbCr
+   * images must be converted to RGB before they can be displayed.  In the
+   * YCbCr colorspace, the Y (luminance) component represents the black & white
+   * portion of the original image, and the Cb and Cr (chrominance) components
+   * represent the color portion of the original image.  Historically, the
+   * analog equivalent of this transformation allowed the same signal to be
+   * displayed to both black & white and color televisions, but JPEG images use
+   * YCbCr primarily because it allows the color data to be optionally
+   * subsampled in order to reduce network and disk usage.  YCbCr is the most
+   * common JPEG colorspace, and YCbCr JPEG images can be generated from and
+   * decompressed to packed-pixel images with any of the extended RGB or
+   * grayscale pixel formats.  YCbCr JPEG images can also be generated from
+   * and decompressed to planar YUV images.
+   */
+  TJCS_YCbCr,
+  /**
+   * Grayscale colorspace
+   *
+   * The JPEG image retains only the luminance data (Y component), and any
+   * color data from the source image is discarded.  Grayscale JPEG images can
+   * be generated from and decompressed to packed-pixel images with any of the
+   * extended RGB or grayscale pixel formats, or they can be generated from
+   * and decompressed to planar YUV images.
+   */
+  TJCS_GRAY,
+  /**
+   * CMYK colorspace
+   *
+   * When generating the JPEG image, the C, M, Y, and K components in the
+   * source image are reordered into image planes, but no colorspace conversion
+   * or subsampling is performed.  CMYK JPEG images can only be generated from
+   * and decompressed to packed-pixel images with the CMYK pixel format.
+   */
+  TJCS_CMYK,
+  /**
+   * YCCK colorspace
+   *
+   * YCCK (AKA "YCbCrK") is not an absolute colorspace but rather a
+   * mathematical transformation of CMYK designed solely for storage and
+   * transmission.  It is to CMYK as YCbCr is to RGB.  CMYK pixels can be
+   * reversibly transformed into YCCK, and as with YCbCr, the chrominance
+   * components in the YCCK pixels can be subsampled without incurring major
+   * perceptual loss.  YCCK JPEG images can only be generated from and
+   * decompressed to packed-pixel images with the CMYK pixel format.
+   */
+  TJCS_YCCK
+};
+
+
+/**
+ * Parameters
+ */
+enum TJPARAM {
+  /**
+   * Error handling behavior
+   *
+   * **Value**
+   * - `0` *[default]* Allow the current compression/decompression/transform
+   * operation to complete unless a fatal error is encountered.
+   * - `1` Immediately discontinue the current
+   * compression/decompression/transform operation if a warning (non-fatal
+   * error) occurs.
+   */
+  TJPARAM_STOPONWARNING,
+  /**
+   * Row order in packed-pixel source/destination images
+   *
+   * **Value**
+   * - `0` *[default]* top-down (X11) order
+   * - `1` bottom-up (Windows, OpenGL) order
+   */
+  TJPARAM_BOTTOMUP,
+  /**
+   * JPEG destination buffer (re)allocation [compression, lossless
+   * transformation]
+   *
+   * **Value**
+   * - `0` *[default]* Attempt to allocate or reallocate the JPEG destination
+   * buffer as needed.
+   * - `1` Generate an error if the JPEG destination buffer is invalid or too
+   * small.
+   */
+  TJPARAM_NOREALLOC,
+  /**
+   * Perceptual quality of lossy JPEG images [compression only]
+   *
+   * **Value**
+   * - `1`-`100` (`1` = worst quality but best compression, `100` = best
+   * quality but worst compression) *[no default; must be explicitly
+   * specified]*
+   */
+  TJPARAM_QUALITY,
+  /**
+   * Chrominance subsampling level
+   *
+   * The JPEG or YUV image uses (decompression, decoding) or will use (lossy
+   * compression, encoding) the specified level of chrominance subsampling.
+   *
+   * **Value**
+   * - One of the @ref TJSAMP "chrominance subsampling options" *[no default;
+   * must be explicitly specified for lossy compression, encoding, and
+   * decoding]*
+   */
+  TJPARAM_SUBSAMP,
+  /**
+   * JPEG width (in pixels) [decompression only, read-only]
+   */
+  TJPARAM_JPEGWIDTH,
+  /**
+   * JPEG height (in pixels) [decompression only, read-only]
+   */
+  TJPARAM_JPEGHEIGHT,
+  /**
+   * Data precision (bits per sample)
+   *
+   * The JPEG image uses (decompression) or will use (lossless compression) the
+   * specified number of bits per sample.  This parameter also specifies the
+   * target data precision when loading a PBMPLUS file with #tj3LoadImage8(),
+   * #tj3LoadImage12(), or #tj3LoadImage16() and the source data precision when
+   * saving a PBMPLUS file with #tj3SaveImage8(), #tj3SaveImage12(), or
+   * #tj3SaveImage16().
+   *
+   * The data precision is the number of bits in the maximum sample value,
+   * which may not be the same as the width of the data type used to store the
+   * sample.
+   *
+   * **Value**
+   * - `8` or `12` for lossy JPEG images; `2` to `16` for lossless JPEG and
+   * PBMPLUS images
+   *
+   * 12-bit JPEG data precision implies #TJPARAM_OPTIMIZE unless
+   * #TJPARAM_ARITHMETIC is set.
+   */
+  TJPARAM_PRECISION,
+  /**
+   * JPEG colorspace
+   *
+   * The JPEG image uses (decompression) or will use (lossy compression) the
+   * specified colorspace.
+   *
+   * **Value**
+   * - One of the @ref TJCS "JPEG colorspaces" *[default for lossy compression:
+   * automatically selected based on the subsampling level and pixel format]*
+   */
+  TJPARAM_COLORSPACE,
+  /**
+   * Chrominance upsampling algorithm [lossy decompression only]
+   *
+   * **Value**
+   * - `0` *[default]* Use smooth upsampling when decompressing a JPEG image
+   * that was generated using chrominance subsampling.  This creates a smooth
+   * transition between neighboring chrominance components in order to reduce
+   * upsampling artifacts in the decompressed image.
+   * - `1` Use the fastest chrominance upsampling algorithm available, which
+   * may combine upsampling with color conversion.
+   */
+  TJPARAM_FASTUPSAMPLE,
+  /**
+   * DCT/IDCT algorithm [lossy compression and decompression]
+   *
+   * **Value**
+   * - `0` *[default]* Use the most accurate DCT/IDCT algorithm available.
+   * - `1` Use the fastest DCT/IDCT algorithm available.
+   *
+   * This parameter is provided mainly for backward compatibility with libjpeg,
+   * which historically implemented several different DCT/IDCT algorithms
+   * because of performance limitations with 1990s CPUs.  In the libjpeg-turbo
+   * implementation of the TurboJPEG API:
+   * - The "fast" and "accurate" DCT/IDCT algorithms perform similarly on
+   * modern x86/x86-64 CPUs that support AVX2 instructions.
+   * - The "fast" algorithm is generally only about 5-15% faster than the
+   * "accurate" algorithm on other types of CPUs.
+   * - The difference in accuracy between the "fast" and "accurate" algorithms
+   * is the most pronounced at JPEG quality levels above 90 and tends to be
+   * more pronounced with decompression than with compression.
+   * - For JPEG quality levels above 97, the "fast" algorithm degrades and is
+   * not fully accelerated, so it is slower than the "accurate" algorithm.
+   */
+  TJPARAM_FASTDCT,
+  /**
+   * Huffman table optimization [lossy compression, lossless transformation]
+   *
+   * **Value**
+   * - `0` *[default]* The JPEG image will use the default Huffman tables.
+   * - `1` Optimal Huffman tables will be computed for the JPEG image.  For
+   * lossless transformation, this can also be specified using
+   * #TJXOPT_OPTIMIZE.
+   *
+   * Huffman table optimization improves compression slightly (generally 5% or
+   * less), but it reduces compression performance considerably.
+   */
+  TJPARAM_OPTIMIZE,
+  /**
+   * Progressive JPEG
+   *
+   * In a progressive JPEG image, the DCT coefficients are split across
+   * multiple "scans" of increasing quality.  Thus, a low-quality scan
+   * containing the lowest-frequency DCT coefficients can be transmitted first
+   * and refined with subsequent higher-quality scans containing
+   * higher-frequency DCT coefficients.  When using Huffman entropy coding, the
+   * progressive JPEG format also provides an "end-of-bands (EOB) run" feature
+   * that allows large groups of zeroes, potentially spanning multiple MCUs,
+   * to be represented using only a few bytes.
+   *
+   * **Value**
+   * - `0` *[default for compression, lossless transformation]* The lossy JPEG
+   * image is (decompression) or will be (compression, lossless transformation)
+   * single-scan.
+   * - `1` The lossy JPEG image is (decompression) or will be (compression,
+   * lossless transformation) progressive.  For lossless transformation, this
+   * can also be specified using #TJXOPT_PROGRESSIVE.
+   *
+   * Progressive JPEG images generally have better compression ratios than
+   * single-scan JPEG images (much better if the image has large areas of solid
+   * color), but progressive JPEG compression and decompression is considerably
+   * slower than single-scan JPEG compression and decompression.  Can be
+   * combined with #TJPARAM_ARITHMETIC.  Implies #TJPARAM_OPTIMIZE unless
+   * #TJPARAM_ARITHMETIC is also set.
+   */
+  TJPARAM_PROGRESSIVE,
+  /**
+   * Progressive JPEG scan limit for lossy JPEG images [decompression, lossless
+   * transformation]
+   *
+   * Setting this parameter causes the decompression and transform functions to
+   * return an error if the number of scans in a progressive JPEG image exceeds
+   * the specified limit.  The primary purpose of this is to allow
+   * security-critical applications to guard against an exploit of the
+   * progressive JPEG format described in
+   * <a href="https://libjpeg-turbo.org/pmwiki/uploads/About/TwoIssueswiththeJPEGStandard.pdf" target="_blank">this report</a>.
+   *
+   * **Value**
+   * - maximum number of progressive JPEG scans that the decompression and
+   * transform functions will process *[default: `0` (no limit)]*
+   *
+   * @see #TJPARAM_PROGRESSIVE
+   */
+  TJPARAM_SCANLIMIT,
+  /**
+   * Arithmetic entropy coding
+   *
+   * **Value**
+   * - `0` *[default for compression, lossless transformation]* The lossy JPEG
+   * image uses (decompression) or will use (compression, lossless
+   * transformation) Huffman entropy coding.
+   * - `1` The lossy JPEG image uses (decompression) or will use (compression,
+   * lossless transformation) arithmetic entropy coding.  For lossless
+   * transformation, this can also be specified using #TJXOPT_ARITHMETIC.
+   *
+   * Arithmetic entropy coding generally improves compression relative to
+   * Huffman entropy coding, but it reduces compression and decompression
+   * performance considerably.  Can be combined with #TJPARAM_PROGRESSIVE.
+   */
+  TJPARAM_ARITHMETIC,
+  /**
+   * Lossless JPEG
+   *
+   * **Value**
+   * - `0` *[default for compression]* The JPEG image is (decompression) or
+   * will be (compression) lossy/DCT-based.
+   * - `1` The JPEG image is (decompression) or will be (compression)
+   * lossless/predictive.
+   *
+   * In most cases, lossless JPEG compression and decompression is considerably
+   * slower than lossy JPEG compression and decompression, and lossless JPEG
+   * images are much larger than lossy JPEG images.  Thus, lossless JPEG images
+   * are typically used only for applications that require mathematically
+   * lossless compression.  Also note that the following features are not
+   * available with lossless JPEG images:
+   * - Colorspace conversion (lossless JPEG images always use #TJCS_RGB,
+   * #TJCS_GRAY, or #TJCS_CMYK, depending on the pixel format of the source
+   * image)
+   * - Chrominance subsampling (lossless JPEG images always use #TJSAMP_444)
+   * - JPEG quality selection
+   * - DCT/IDCT algorithm selection
+   * - Progressive JPEG
+   * - Arithmetic entropy coding
+   * - Compression from/decompression to planar YUV images
+   * - Decompression scaling
+   * - Lossless transformation
+   *
+   * @see #TJPARAM_LOSSLESSPSV, #TJPARAM_LOSSLESSPT
+   */
+  TJPARAM_LOSSLESS,
+  /**
+   * Lossless JPEG predictor selection value (PSV)
+   *
+   * **Value**
+   * - `1`-`7` *[default for compression: `1`]*
+   *
+   * Lossless JPEG compression shares no algorithms with lossy JPEG
+   * compression.  Instead, it uses differential pulse-code modulation (DPCM),
+   * an algorithm whereby each sample is encoded as the difference between the
+   * sample's value and a "predictor", which is based on the values of
+   * neighboring samples.  If Ra is the sample immediately to the left of the
+   * current sample, Rb is the sample immediately above the current sample, and
+   * Rc is the sample diagonally to the left and above the current sample, then
+   * the relationship between the predictor selection value and the predictor
+   * is as follows:
+   *
+   * PSV | Predictor
+   * ----|----------
+   * 1   | Ra
+   * 2   | Rb
+   * 3   | Rc
+   * 4   | Ra + Rb – Rc
+   * 5   | Ra + (Rb – Rc) / 2
+   * 6   | Rb + (Ra – Rc) / 2
+   * 7   | (Ra + Rb) / 2
+   *
+   * Predictors 1-3 are 1-dimensional predictors, whereas Predictors 4-7 are
+   * 2-dimensional predictors.  The best predictor for a particular image
+   * depends on the image.
+   *
+   * @see #TJPARAM_LOSSLESS
+   */
+  TJPARAM_LOSSLESSPSV,
+  /**
+   * Lossless JPEG point transform (Pt)
+   *
+   * **Value**
+   * - `0` through ***precision*** *- 1*, where ***precision*** is the JPEG
+   * data precision in bits *[default for compression: `0`]*
+   *
+   * A point transform value of `0` is necessary in order to generate a fully
+   * lossless JPEG image.  (A non-zero point transform value right-shifts the
+   * input samples by the specified number of bits, which is effectively a form
+   * of lossy color quantization.)
+   *
+   * @see #TJPARAM_LOSSLESS, #TJPARAM_PRECISION
+   */
+  TJPARAM_LOSSLESSPT,
+  /**
+   * JPEG restart marker interval in MCUs [lossy compression,
+   * lossless transformation]
+   *
+   * The nature of entropy coding is such that a corrupt JPEG image cannot
+   * be decompressed beyond the point of corruption unless it contains restart
+   * markers.  A restart marker stops and restarts the entropy coding algorithm
+   * so that, if a JPEG image is corrupted, decompression can resume at the
+   * next marker.  Thus, adding more restart markers improves the fault
+   * tolerance of the JPEG image, but adding too many restart markers can
+   * adversely affect the compression ratio and performance.
+   *
+   * In typical JPEG images, an MCU (Minimum Coded Unit) is the minimum set of
+   * interleaved "data units" (8x8 DCT blocks if the image is lossy or samples
+   * if the image is lossless) necessary to represent at least one data unit
+   * per component.  (For example, an MCU in an interleaved lossy JPEG image
+   * that uses 4:2:2 subsampling consists of two luminance blocks followed by
+   * one block for each chrominance component.)  In single-component or
+   * non-interleaved JPEG images, an MCU is the same as a data unit.
+   *
+   * **Value**
+   * - the number of MCUs between each restart marker *[default: `0` (no
+   * restart markers)]*
+   *
+   * Setting this parameter to a non-zero value sets #TJPARAM_RESTARTROWS to 0.
+   */
+  TJPARAM_RESTARTBLOCKS,
+  /**
+   * JPEG restart marker interval in MCU rows [compression,
+   * lossless transformation]
+   *
+   * See #TJPARAM_RESTARTBLOCKS for a description of restart markers and MCUs.
+   * An MCU row is a row of MCUs spanning the entire width of the image.
+   *
+   * **Value**
+   * - the number of MCU rows between each restart marker *[default: `0` (no
+   * restart markers)]*
+   *
+   * Setting this parameter to a non-zero value sets #TJPARAM_RESTARTBLOCKS to
+   * 0.
+   */
+  TJPARAM_RESTARTROWS,
+  /**
+   * JPEG horizontal pixel density
+   *
+   * **Value**
+   * - The JPEG image has (decompression) or will have (compression) the
+   * specified horizontal pixel density *[default for compression: `1`]*.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value of #TJPARAM_DENSITYUNITS
+   * is `2`.
+   *
+   * This parameter has no effect unless the JPEG colorspace (see
+   * #TJPARAM_COLORSPACE) is #TJCS_YCbCr or #TJCS_GRAY.
+   *
+   * @see TJPARAM_DENSITYUNITS
+   */
+  TJPARAM_XDENSITY,
+  /**
+   * JPEG vertical pixel density
+   *
+   * **Value**
+   * - The JPEG image has (decompression) or will have (compression) the
+   * specified vertical pixel density *[default for compression: `1`]*.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value of #TJPARAM_DENSITYUNITS
+   * is `2`.
+   *
+   * This parameter has no effect unless the JPEG colorspace (see
+   * #TJPARAM_COLORSPACE) is #TJCS_YCbCr or #TJCS_GRAY.
+   *
+   * @see TJPARAM_DENSITYUNITS
+   */
+  TJPARAM_YDENSITY,
+  /**
+   * JPEG pixel density units
+   *
+   * **Value**
+   * - `0` *[default for compression]* The pixel density of the JPEG image is
+   * expressed (decompression) or will be expressed (compression) in unknown
+   * units.
+   * - `1` The pixel density of the JPEG image is expressed (decompression) or
+   * will be expressed (compression) in units of pixels/inch.
+   * - `2` The pixel density of the JPEG image is expressed (decompression) or
+   * will be expressed (compression) in units of pixels/cm.
+   *
+   * This value is stored in or read from the JPEG header.  It does not affect
+   * the contents of the JPEG image.  Note that this parameter is set by
+   * #tj3LoadImage8() when loading a Windows BMP file that contains pixel
+   * density information, and the value of this parameter is stored to a
+   * Windows BMP file by #tj3SaveImage8() if the value is `2`.
+   *
+   * This parameter has no effect unless the JPEG colorspace (see
+   * #TJPARAM_COLORSPACE) is #TJCS_YCbCr or #TJCS_GRAY.
+   *
+   * @see TJPARAM_XDENSITY, TJPARAM_YDENSITY
+   */
+  TJPARAM_DENSITYUNITS,
+  /**
+   * Memory limit for intermediate buffers
+   *
+   * **Value**
+   * - the maximum amount of memory (in megabytes) that will be allocated for
+   * intermediate buffers, which are used with progressive JPEG compression and
+   * decompression, Huffman table optimization, lossless JPEG compression, and
+   * lossless transformation *[default: `0` (no limit)]*
+   */
+  TJPARAM_MAXMEMORY,
+  /**
+   * Image size limit [decompression, lossless transformation, packed-pixel
+   * image loading]
+   *
+   * Setting this parameter causes the decompression, transform, and image
+   * loading functions to return an error if the number of pixels in the source
+   * image exceeds the specified limit.  This allows security-critical
+   * applications to guard against excessive memory consumption.
+   *
+   * **Value**
+   * - maximum number of pixels that the decompression, transform, and image
+   * loading functions will process *[default: `0` (no limit)]*
+   */
+  TJPARAM_MAXPIXELS,
+  /**
+   * Marker copying behavior [decompression, lossless transformation]
+   *
+   * **Value [lossless transformation]**
+   * - `0` Do not copy any extra markers (including comments, JFIF thumbnails,
+   * Exif data, and ICC profile data) from the source image to the destination
+   * image.
+   * - `1` Do not copy any extra markers, except comment (COM) markers, from
+   * the source image to the destination image.
+   * - `2` *[default]* Copy all extra markers from the source image to the
+   * destination image.
+   * - `3` Copy all extra markers, except ICC profile data (APP2 markers), from
+   * the source image to the destination image.
+   * - `4` Do not copy any extra markers, except ICC profile data (APP2
+   * markers), from the source image to the destination image.
+   *
+   * #TJXOPT_COPYNONE overrides this parameter for a particular transform.
+   * This parameter overrides any ICC profile that was previously associated
+   * with the TurboJPEG instance using #tj3SetICCProfile().
+   *
+   * When decompressing, #tj3DecompressHeader() extracts the ICC profile from a
+   * JPEG image if this parameter is set to `2` or `4`.  #tj3GetICCProfile()
+   * can then be used to retrieve the profile.
+   */
+  TJPARAM_SAVEMARKERS
+};
+
+
+/**
+ * The number of error codes
+ */
+#define TJ_NUMERR  2
+
+/**
+ * Error codes
+ */
+enum TJERR {
+  /**
+   * The error was non-fatal and recoverable, but the destination image may
+   * still be corrupt.
+   */
+  TJERR_WARNING,
+  /**
+   * The error was fatal and non-recoverable.
+   */
+  TJERR_FATAL
+};
+
+
+/**
+ * The number of transform operations
+ */
+#define TJ_NUMXOP  8
+
+/**
+ * Transform operations for #tj3Transform()
+ */
+enum TJXOP {
+  /**
+   * Do not transform the position of the image pixels.
+   */
+  TJXOP_NONE,
+  /**
+   * Flip (mirror) image horizontally.  This transform is imperfect if there
+   * are any partial iMCUs on the right edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_HFLIP,
+  /**
+   * Flip (mirror) image vertically.  This transform is imperfect if there are
+   * any partial iMCUs on the bottom edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_VFLIP,
+  /**
+   * Transpose image (flip/mirror along upper left to lower right axis.)  This
+   * transform is always perfect.
+   */
+  TJXOP_TRANSPOSE,
+  /**
+   * Transverse transpose image (flip/mirror along upper right to lower left
+   * axis.)  This transform is imperfect if there are any partial iMCUs in the
+   * image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_TRANSVERSE,
+  /**
+   * Rotate image clockwise by 90 degrees.  This transform is imperfect if
+   * there are any partial iMCUs on the bottom edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT90,
+  /**
+   * Rotate image 180 degrees.  This transform is imperfect if there are any
+   * partial iMCUs in the image (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT180,
+  /**
+   * Rotate image counter-clockwise by 90 degrees.  This transform is imperfect
+   * if there are any partial iMCUs on the right edge (see #TJXOPT_PERFECT.)
+   */
+  TJXOP_ROT270
+};
+
+
+/**
+ * This option causes #tj3Transform() to return an error if the transform is
+ * not perfect.  Lossless transforms operate on iMCUs, the size of which
+ * depends on the level of chrominance subsampling used (see #tjMCUWidth and
+ * #tjMCUHeight.)  If the image's width or height is not evenly divisible by
+ * the iMCU size, then there will be partial iMCUs on the right and/or bottom
+ * edges.  It is not possible to move these partial iMCUs to the top or left of
+ * the image, so any transform that would require that is "imperfect."  If this
+ * option is not specified, then any partial iMCUs that cannot be transformed
+ * will be left in place, which will create odd-looking strips on the right or
+ * bottom edge of the image.
+ */
+#define TJXOPT_PERFECT  (1 << 0)
+/**
+ * Discard any partial iMCUs that cannot be transformed.
+ */
+#define TJXOPT_TRIM  (1 << 1)
+/**
+ * Enable lossless cropping.  See #tj3Transform() for more information.
+ */
+#define TJXOPT_CROP  (1 << 2)
+/**
+ * Discard the color data in the source image, and generate a grayscale
+ * destination image.
+ */
+#define TJXOPT_GRAY  (1 << 3)
+/**
+ * Do not generate a destination image.  (This can be used in conjunction with
+ * a custom filter to capture the transformed DCT coefficients without
+ * transcoding them.)
+ */
+#define TJXOPT_NOOUTPUT  (1 << 4)
+/**
+ * Generate a progressive destination image instead of a single-scan
+ * destination image.  Progressive JPEG images generally have better
+ * compression ratios than single-scan JPEG images (much better if the image
+ * has large areas of solid color), but progressive JPEG decompression is
+ * considerably slower than single-scan JPEG decompression.  Can be combined
+ * with #TJXOPT_ARITHMETIC.  Implies #TJXOPT_OPTIMIZE unless #TJXOPT_ARITHMETIC
+ * is also specified.
+ */
+#define TJXOPT_PROGRESSIVE  (1 << 5)
+/**
+ * Do not copy any extra markers (including Exif and ICC profile data) from the
+ * source image to the destination image.
+ */
+#define TJXOPT_COPYNONE  (1 << 6)
+/**
+ * Enable arithmetic entropy coding in the destination image.  Arithmetic
+ * entropy coding generally improves compression relative to Huffman entropy
+ * coding (the default), but it reduces decompression performance considerably.
+ * Can be combined with #TJXOPT_PROGRESSIVE.
+ */
+#define TJXOPT_ARITHMETIC  (1 << 7)
+/**
+ * Enable Huffman table optimization for the destination image.  Huffman table
+ * optimization improves compression slightly (generally 5% or less.)
+ */
+#define TJXOPT_OPTIMIZE  (1 << 8)
+
+
+/**
+ * Scaling factor
+ */
+typedef struct {
+  /**
+   * Numerator
+   */
+  int num;
+  /**
+   * Denominator
+   */
+  int denom;
+} tjscalingfactor;
+
+/**
+ * Cropping region
+ */
+typedef struct {
+  /**
+   * The left boundary of the cropping region.  For lossless transformation,
+   * this must be evenly divisible by the iMCU width (see #tjMCUWidth) of the
+   * destination image.  For decompression, this must be evenly divisible by
+   * the scaled iMCU width of the source image.
+   */
+  int x;
+  /**
+   * The upper boundary of the cropping region.  For lossless transformation,
+   * this must be evenly divisible by the iMCU height (see #tjMCUHeight) of the
+   * destination image.
+   */
+  int y;
+  /**
+   * The width of the cropping region.  Setting this to 0 is the equivalent of
+   * setting it to the width of the source JPEG image - x.
+   */
+  int w;
+  /**
+   * The height of the cropping region.  Setting this to 0 is the equivalent of
+   * setting it to the height of the source JPEG image - y.
+   */
+  int h;
+} tjregion;
+
+/**
+ * A #tjregion structure that specifies no cropping
+ */
+static const tjregion TJUNCROPPED = { 0, 0, 0, 0 };
+
+/**
+ * Lossless transform
+ */
+typedef struct tjtransform {
+  /**
+   * Cropping region
+   */
+  tjregion r;
+  /**
+   * One of the @ref TJXOP "transform operations"
+   */
+  int op;
+  /**
+   * The bitwise OR of one of more of the @ref TJXOPT_ARITHMETIC
+   * "transform options"
+   */
+  int options;
+  /**
+   * Arbitrary data that can be accessed within the body of the callback
+   * function
+   */
+  void *data;
+  /**
+   * A callback function that can be used to modify the DCT coefficients after
+   * they are losslessly transformed but before they are transcoded to a new
+   * JPEG image.  This allows for custom filters or other transformations to be
+   * applied in the frequency domain.
+   *
+   * @param coeffs pointer to an array of transformed DCT coefficients.  (NOTE:
+   * This pointer is not guaranteed to be valid once the callback returns, so
+   * applications wishing to hand off the DCT coefficients to another function
+   * or library should make a copy of them within the body of the callback.)
+   *
+   * @param arrayRegion #tjregion structure containing the width and height of
+   * the array pointed to by `coeffs` as well as its offset relative to the
+   * component plane.  TurboJPEG implementations may choose to split each
+   * component plane into multiple DCT coefficient arrays and call the callback
+   * function once for each array.
+   *
+   * @param planeRegion #tjregion structure containing the width and height of
+   * the component plane to which `coeffs` belongs
+   *
+   * @param componentID ID number of the component plane to which `coeffs`
+   * belongs.  (Y, Cb, and Cr have, respectively, ID's of 0, 1, and 2 in
+   * typical JPEG images.)
+   *
+   * @param transformID ID number of the transformed image to which `coeffs`
+   * belongs.  This is the same as the index of the transform in the
+   * `transforms` array that was passed to #tj3Transform().
+   *
+   * @param transform a pointer to a #tjtransform structure that specifies the
+   * parameters and/or cropping region for this transform
+   *
+   * @return 0 if the callback was successful, or -1 if an error occurred.
+   */
+  int (*customFilter) (short *coeffs, tjregion arrayRegion,
+                       tjregion planeRegion, int componentID, int transformID,
+                       struct tjtransform *transform);
+} tjtransform;
+
+/**
+ * TurboJPEG instance handle
+ */
+typedef void *tjhandle;
+
+
+/**
+ * Compute the scaled value of `dimension` using the given scaling factor.
+ * This macro performs the integer equivalent of `ceil(dimension *
+ * scalingFactor)`.
+ */
+#define TJSCALED(dimension, scalingFactor) \
+  (((dimension) * scalingFactor.num + scalingFactor.denom - 1) / \
+   scalingFactor.denom)
+
+/**
+ * A #tjscalingfactor structure that specifies a scaling factor of 1/1 (no
+ * scaling)
+ */
+static const tjscalingfactor TJUNSCALED = { 1, 1 };
+
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/**
+ * Create a new TurboJPEG instance.
+ *
+ * @param initType one of the @ref TJINIT "initialization options"
+ *
+ * @return a handle to the newly-created instance, or NULL if an error occurred
+ * (see #tj3GetErrorStr().)
+ */
+DLLEXPORT tjhandle tj3Init(int initType);
+
+
+/**
+ * Destroy a TurboJPEG instance.
+ *
+ * @param handle handle to a TurboJPEG instance.  If the handle is NULL, then
+ * this function has no effect.
+ */
+DLLEXPORT void tj3Destroy(tjhandle handle);
+
+
+/**
+ * Returns a descriptive error message explaining why the last command failed.
+ *
+ * @param handle handle to a TurboJPEG instance, or NULL if the error was
+ * generated by a global function (but note that retrieving the error message
+ * for a global function is thread-safe only on platforms that support
+ * thread-local storage.)
+ *
+ * @return a descriptive error message explaining why the last command failed.
+ */
+DLLEXPORT char *tj3GetErrorStr(tjhandle handle);
+
+
+/**
+ * Returns a code indicating the severity of the last error.  See
+ * @ref TJERR "Error codes".
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @return a code indicating the severity of the last error.  See
+ * @ref TJERR "Error codes".
+ */
+DLLEXPORT int tj3GetErrorCode(tjhandle handle);
+
+
+/**
+ * Set the value of a parameter.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param param one of the @ref TJPARAM "parameters"
+ *
+ * @param value value of the parameter (refer to @ref TJPARAM
+ * "parameter documentation")
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3Set(tjhandle handle, int param, int value);
+
+
+/**
+ * Get the value of a parameter.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param param one of the @ref TJPARAM "parameters"
+ *
+ * @return the value of the specified parameter, or -1 if the value is unknown.
+ */
+DLLEXPORT int tj3Get(tjhandle handle, int param);
+
+
+/**
+ * Allocate a byte buffer for use with TurboJPEG.  You should always use this
+ * function to allocate the JPEG destination buffer(s) for the compression and
+ * transform functions unless you are disabling automatic buffer (re)allocation
+ * (by setting #TJPARAM_NOREALLOC.)
+ *
+ * @param bytes the number of bytes to allocate
+ *
+ * @return a pointer to a newly-allocated buffer with the specified number of
+ * bytes.
+ *
+ * @see tj3Free()
+ */
+DLLEXPORT void *tj3Alloc(size_t bytes);
+
+
+/**
+ * Free a byte buffer previously allocated by TurboJPEG.  You should always use
+ * this function to free JPEG destination buffer(s) that were automatically
+ * (re)allocated by the compression and transform functions or that were
+ * manually allocated using #tj3Alloc().
+ *
+ * @param buffer address of the buffer to free.  If the address is NULL, then
+ * this function has no effect.
+ *
+ * @see tj3Alloc()
+ */
+DLLEXPORT void tj3Free(void *buffer);
+
+
+/**
+ * The maximum size of the buffer (in bytes) required to hold a JPEG image with
+ * the given parameters.  The number of bytes returned by this function is
+ * larger than the size of the uncompressed source image.  The reason for this
+ * is that the JPEG format uses 16-bit coefficients, so it is possible for a
+ * very high-quality source image with very high-frequency content to expand
+ * rather than compress when converted to the JPEG format.  Such images
+ * represent very rare corner cases, but since there is no way to predict the
+ * size of a JPEG image prior to compression, the corner cases have to be
+ * handled.
+ *
+ * @param width width (in pixels) of the image
+ *
+ * @param height height (in pixels) of the image
+ *
+ * @param jpegSubsamp the level of chrominance subsampling to be used when
+ * generating the JPEG image (see @ref TJSAMP
+ * "Chrominance subsampling options".)  #TJSAMP_UNKNOWN is treated like
+ * #TJSAMP_444, since a buffer large enough to hold a JPEG image with no
+ * subsampling should also be large enough to hold a JPEG image with an
+ * arbitrary level of subsampling.  Note that lossless JPEG images always
+ * use #TJSAMP_444.
+ *
+ * @return the maximum size of the buffer (in bytes) required to hold the
+ * image, or 0 if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3JPEGBufSize(int width, int height, int jpegSubsamp);
+
+
+/**
+ * The size of the buffer (in bytes) required to hold a unified planar YUV
+ * image with the given parameters.
+ *
+ * @param width width (in pixels) of the image
+ *
+ * @param align row alignment (in bytes) of the image (must be a power of 2.)
+ * Setting this parameter to n specifies that each row in each plane of the
+ * image will be padded to the nearest multiple of n bytes (1 = unpadded.)
+ *
+ * @param height height (in pixels) of the image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the size of the buffer (in bytes) required to hold the image, or 0
+ * if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3YUVBufSize(int width, int align, int height, int subsamp);
+
+
+/**
+ * The size of the buffer (in bytes) required to hold a YUV image plane with
+ * the given parameters.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param width width (in pixels) of the YUV image.  NOTE: This is the width of
+ * the whole image, not the plane width.
+ *
+ * @param stride bytes per row in the image plane.  Setting this to 0 is the
+ * equivalent of setting it to the plane width.
+ *
+ * @param height height (in pixels) of the YUV image.  NOTE: This is the height
+ * of the whole image, not the plane height.
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the size of the buffer (in bytes) required to hold the YUV image
+ * plane, or 0 if the arguments are out of bounds.
+ */
+DLLEXPORT size_t tj3YUVPlaneSize(int componentID, int width, int stride,
+                                 int height, int subsamp);
+
+
+/**
+ * The plane width of a YUV image plane with the given parameters.  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for a description of plane width.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param width width (in pixels) of the YUV image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the plane width of a YUV image plane with the given parameters, or 0
+ * if the arguments are out of bounds.
+ */
+DLLEXPORT int tj3YUVPlaneWidth(int componentID, int width, int subsamp);
+
+
+/**
+ * The plane height of a YUV image plane with the given parameters.  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for a description of plane height.
+ *
+ * @param componentID ID number of the image plane (0 = Y, 1 = U/Cb, 2 = V/Cr)
+ *
+ * @param height height (in pixels) of the YUV image
+ *
+ * @param subsamp level of chrominance subsampling in the image (see
+ * @ref TJSAMP "Chrominance subsampling options".)
+ *
+ * @return the plane height of a YUV image plane with the given parameters, or
+ * 0 if the arguments are out of bounds.
+ */
+DLLEXPORT int tj3YUVPlaneHeight(int componentID, int height, int subsamp);
+
+
+/**
+ * Embed an ICC (International Color Consortium) color management profile in
+ * JPEG images generated by subsequent compression and lossless transformation
+ * operations.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param iccBuf pointer to a byte buffer containing an ICC profile.  A copy is
+ * made of the ICC profile, so this buffer can be freed or reused as soon as
+ * this function returns.  Setting this parameter to NULL or setting `iccSize`
+ * to 0 removes any ICC profile that was previously associated with the
+ * TurboJPEG instance.
+ *
+ * @param iccSize size of the ICC profile (in bytes.)  Setting this parameter
+ * to 0 or setting `iccBuf` to NULL removes any ICC profile that was previously
+ * associated with the TurboJPEG instance.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SetICCProfile(tjhandle handle, unsigned char *iccBuf,
+                               size_t iccSize);
+
+
+/**
+ * Compress a packed-pixel RGB, grayscale, or CMYK image with 2 to 8 bits of
+ * data precision per sample into a JPEG image with the same data precision.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK source image to be compressed.  This buffer should normally be
+ * `pitch * height` samples in size.  However, you can also use this parameter
+ * to compress from a specific region of a larger buffer.  The data precision
+ * of the source image (from 2 to 8 bits per sample) can be specified using
+ * #TJPARAM_PRECISION and defaults to 8 if #TJPARAM_PRECISION is unset or out
+ * of range.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch samples per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to compress from a specific region of a larger buffer.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() and adding the return value to the size of the ICC profile
+ * (if any) that was previously associated with the TurboJPEG instance (see
+ * #tj3SetICCProfile().)  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1 or 3, then `*jpegSize` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `*jpegBuf` upon return from this
+ * function, as it may have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Compress8(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char **jpegBuf, size_t *jpegSize);
+
+/**
+ * Compress a packed-pixel RGB, grayscale, or CMYK image with 9 to 12 bits of
+ * data precision per sample into a JPEG image with the same data precision.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK source image to be compressed.  This buffer should normally be
+ * `pitch * height` samples in size.  However, you can also use this parameter
+ * to compress from a specific region of a larger buffer.  The data precision
+ * of the source image (from 9 to 12 bits per sample) can be specified using
+ * #TJPARAM_PRECISION and defaults to 12 if #TJPARAM_PRECISION is unset or out
+ * of range.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch samples per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to compress from a specific region of a larger buffer.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() and adding the return value to the size of the ICC profile
+ * (if any) that was previously associated with the TurboJPEG instance (see
+ * #tj3SetICCProfile().)  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1 or 3, then `*jpegSize` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `*jpegBuf` upon return from this
+ * function, as it may have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Compress12(tjhandle handle, const short *srcBuf, int width,
+                            int pitch, int height, int pixelFormat,
+                            unsigned char **jpegBuf, size_t *jpegSize);
+
+/**
+ * Compress a packed-pixel RGB, grayscale, or CMYK image with 13 to 16 bits of
+ * data precision per sample into a lossless JPEG image with the same data
+ * precision.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK source image to be compressed.  This buffer should normally be
+ * `pitch * height` samples in size.  However, you can also use this parameter
+ * to compress from a specific region of a larger buffer.  The data precision
+ * of the source image (from 13 to 16 bits per sample) can be specified using
+ * #TJPARAM_PRECISION and defaults to 16 if #TJPARAM_PRECISION is unset or out
+ * of range.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch samples per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to compress from a specific region of a larger buffer.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() and adding the return value to the size of the ICC profile
+ * (if any) that was previously associated with the TurboJPEG instance (see
+ * #tj3SetICCProfile().)  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1 or 3, then `*jpegSize` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `*jpegBuf` upon return from this
+ * function, as it may have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Compress16(tjhandle handle, const unsigned short *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char **jpegBuf, size_t *jpegSize);
+
+
+/**
+ * Compress a set of 8-bit-per-sample Y, U (Cb), and V (Cr) image planes into
+ * an 8-bit-per-sample JPEG image.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if compressing a grayscale image) that contain a YUV
+ * source image to be compressed.  These planes can be contiguous or
+ * non-contiguous in memory.  The size of each plane should match the value
+ * returned by #tj3YUVPlaneSize() for the given image width, height, strides,
+ * and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  Refer to
+ * @ref YUVnotes "YUV Image Format Notes" for more details.
+ *
+ * @param width width (in pixels) of the source image.  If the width is not an
+ * even multiple of the iMCU width (see #tjMCUWidth), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV source image.  Setting the stride
+ * for any plane to 0 is the same as setting it to the plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
+ * padding in each plane or to create a JPEG image from a subregion of a larger
+ * planar YUV image.
+ *
+ * @param height height (in pixels) of the source image.  If the height is not
+ * an even multiple of the iMCU height (see #tjMCUHeight), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() and adding the return value to the size of the ICC profile
+ * (if any) that was previously associated with the TurboJPEG instance (see
+ * #tj3SetICCProfile().)  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1 or 3, then `*jpegSize` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `*jpegBuf` upon return from this
+ * function, as it may have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3CompressFromYUVPlanes8(tjhandle handle,
+                                        const unsigned char * const *srcPlanes,
+                                        int width, const int *strides,
+                                        int height, unsigned char **jpegBuf,
+                                        size_t *jpegSize);
+
+
+/**
+ * Compress an 8-bit-per-sample unified planar YUV image into an
+ * 8-bit-per-sample JPEG image.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be compressed.  The size of this buffer should match the value
+ * returned by #tj3YUVBufSize() for the given image width, height, row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes should be stored sequentially in the
+ * buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param width width (in pixels) of the source image.  If the width is not an
+ * even multiple of the iMCU width (see #tjMCUWidth), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param align row alignment (in bytes) of the source image (must be a power
+ * of 2.)  Setting this parameter to n indicates that each row in each plane of
+ * the source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
+ *
+ * @param height height (in pixels) of the source image.  If the height is not
+ * an even multiple of the iMCU height (see #tjMCUHeight), then an intermediate
+ * buffer copy will be performed.
+ *
+ * @param jpegBuf address of a pointer to a byte buffer that will receive the
+ * JPEG image.  TurboJPEG has the ability to reallocate the JPEG buffer to
+ * accommodate the size of the JPEG image.  Thus, you can choose to:
+ * -# pre-allocate the JPEG buffer with an arbitrary size using #tj3Alloc() and
+ * let TurboJPEG grow the buffer as needed,
+ * -# set `*jpegBuf` to NULL to tell TurboJPEG to allocate the buffer for you,
+ * or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3JPEGBufSize() and adding the return value to the size of the ICC profile
+ * (if any) that was previously associated with the TurboJPEG instance (see
+ * #tj3SetICCProfile().)  This should ensure that the buffer never has to be
+ * re-allocated.  (Setting #TJPARAM_NOREALLOC guarantees that it won't be.)
+ * .
+ * If you choose option 1 or 3, then `*jpegSize` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `*jpegBuf` upon return from this
+ * function, as it may have changed.
+ *
+ * @param jpegSize pointer to a size_t variable that holds the size of the JPEG
+ * buffer.  If `*jpegBuf` points to a pre-allocated buffer, then `*jpegSize`
+ * should be set to the size of the buffer.  Upon return, `*jpegSize` will
+ * contain the size of the JPEG image (in bytes.)  If `*jpegBuf` points to a
+ * JPEG buffer that is being reused from a previous call to one of the JPEG
+ * compression functions, then `*jpegSize` is ignored.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3CompressFromYUV8(tjhandle handle,
+                                  const unsigned char *srcBuf, int width,
+                                  int align, int height,
+                                  unsigned char **jpegBuf, size_t *jpegSize);
+
+
+/**
+ * Encode an 8-bit-per-sample packed-pixel RGB or grayscale image into separate
+ * 8-bit-per-sample Y, U (Cb), and V (Cr) image planes.  This function performs
+ * color conversion (which is accelerated in the libjpeg-turbo implementation)
+ * but does not execute any of the other steps in the JPEG compression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded.  This buffer should normally be `pitch * height`
+ * bytes in size.  However, you can also use this parameter to encode from a
+ * specific region of a larger buffer.
+ *
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to encode from a specific region of a larger packed-pixel image.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if generating a grayscale image) that will receive the
+ * encoded image.  These planes can be contiguous or non-contiguous in memory.
+ * Use #tj3YUVPlaneSize() to determine the appropriate size for each plane
+ * based on the image width, height, strides, and level of chrominance
+ * subsampling (see #TJPARAM_SUBSAMP.)  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the plane width (see @ref YUVnotes
+ * "YUV Image Format Notes".)  If `strides` is NULL, then the strides for all
+ * planes will be set to their respective plane widths.  You can adjust the
+ * strides in order to add an arbitrary amount of row padding to each plane or
+ * to encode an RGB or grayscale image into a subregion of a larger planar YUV
+ * image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3EncodeYUVPlanes8(tjhandle handle, const unsigned char *srcBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat, unsigned char **dstPlanes,
+                                  int *strides);
+
+
+/**
+ * Encode an 8-bit-per-sample packed-pixel RGB or grayscale image into an
+ * 8-bit-per-sample unified planar YUV image.  This function performs color
+ * conversion (which is accelerated in the libjpeg-turbo implementation) but
+ * does not execute any of the other steps in the JPEG compression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * compression
+ *
+ * @param srcBuf pointer to a buffer containing a packed-pixel RGB or grayscale
+ * source image to be encoded.  This buffer should normally be `pitch * height`
+ * bytes in size.  However, you can also use this parameter to encode from a
+ * specific region of a larger buffer.
+ *
+ * @param width width (in pixels) of the source image
+ *
+ * @param pitch bytes per row in the source image.  Normally this should be
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>, if the image is unpadded.
+ * (Setting this parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can also use this
+ * parameter to specify the row alignment/padding of the source image, to skip
+ * rows, or to encode from a specific region of a larger packed-pixel image.
+ *
+ * @param height height (in pixels) of the source image
+ *
+ * @param pixelFormat pixel format of the source image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * image.  Use #tj3YUVBufSize() to determine the appropriate size for this
+ * buffer based on the image width, height, row alignment, and level of
+ * chrominance subsampling (see #TJPARAM_SUBSAMP.)  The Y, U (Cb), and V (Cr)
+ * image planes will be stored sequentially in the buffer.  (Refer to
+ * @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3EncodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            unsigned char *dstBuf, int align);
+
+
+/**
+ * Retrieve information about a JPEG image without decompressing it, or prime
+ * the decompressor with quantization and Huffman tables.  If a JPEG image is
+ * passed to this function, then the @ref TJPARAM "parameters" that describe
+ * the JPEG image will be set when the function returns.  If a JPEG image is
+ * passed to this function and #TJPARAM_SAVEMARKERS is set to `2` or `4`, then
+ * the ICC profile (if any) will be extracted from the JPEG image.
+ * (#tj3GetICCProfile() can then be used to retrieve the profile.)
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing a JPEG image or an
+ * "abbreviated table specification" (AKA "tables-only") datastream.  Passing a
+ * tables-only datastream to this function primes the decompressor with
+ * quantization and Huffman tables that can be used when decompressing
+ * subsequent "abbreviated image" datastreams.  This is useful, for instance,
+ * when decompressing video streams in which all frames share the same
+ * quantization and Huffman tables.
+ *
+ * @param jpegSize size of the JPEG image or tables-only datastream (in bytes)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressHeader(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize);
+
+
+/**
+ * Retrieve the ICC (International Color Consortium) color management profile
+ * (if any) that was previously extracted from a JPEG image.
+ *
+ * @note To extract the ICC profile from a JPEG image, call
+ * #tj3DecompressHeader() with #TJPARAM_SAVEMARKERS set to `2` or `4`.  Once
+ * the ICC profile is retrieved, it must be re-extracted before it can be
+ * retrieved again.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param iccBuf address of a pointer to a byte buffer.  Upon return:
+ * - If `iccBuf` is not NULL and there is an ICC profile to retrieve, then
+ * `*iccBuf` will point to a byte buffer containing the ICC profile.  This
+ * buffer should be freed using #tj3Free().
+ * - If `iccBuf` is not NULL and there is no ICC profile to retrieve, then
+ * `*iccBuf` will be NULL.
+ * - If `iccBuf` is NULL, then only the ICC profile size will be retrieved, and
+ * the ICC profile can be retrieved later.
+ *
+ * @param iccSize address of a size_t variable.  Upon return, the variable will
+ * contain the ICC profile size (or 0 if there is no ICC profile to retrieve.)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3GetICCProfile(tjhandle handle, unsigned char **iccBuf,
+                               size_t *iccSize);
+
+
+/**
+ * Returns a list of fractional scaling factors that the JPEG decompressor
+ * supports.
+ *
+ * @param numScalingFactors pointer to an integer variable that will receive
+ * the number of elements in the list
+ *
+ * @return a pointer to a list of fractional scaling factors, or NULL if an
+ * error is encountered (see #tj3GetErrorStr().)
+ */
+DLLEXPORT tjscalingfactor *tj3GetScalingFactors(int *numScalingFactors);
+
+
+/**
+ * Set the scaling factor for subsequent lossy decompression operations.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param scalingFactor #tjscalingfactor structure that specifies a fractional
+ * scaling factor that the decompressor supports (see #tj3GetScalingFactors()),
+ * or <tt>#TJUNSCALED</tt> for no scaling.  Decompression scaling is a function
+ * of the IDCT algorithm, so scaling factors are generally limited to multiples
+ * of 1/8.  If the entire JPEG image will be decompressed, then the width and
+ * height of the scaled destination image can be determined by calling
+ * #TJSCALED() with the JPEG width and height (see #TJPARAM_JPEGWIDTH and
+ * #TJPARAM_JPEGHEIGHT) and the specified scaling factor.  When decompressing
+ * into a planar YUV image, an intermediate buffer copy will be performed if
+ * the width or height of the scaled destination image is not an even multiple
+ * of the iMCU size (see #tjMCUWidth and #tjMCUHeight.)  Note that
+ * decompression scaling is not available (and the specified scaling factor is
+ * ignored) when decompressing lossless JPEG images (see #TJPARAM_LOSSLESS),
+ * since the IDCT algorithm is not used with those images.  Note also that
+ * #TJPARAM_FASTDCT is ignored when decompression scaling is enabled.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SetScalingFactor(tjhandle handle,
+                                  tjscalingfactor scalingFactor);
+
+
+/**
+ * Set the cropping region for partially decompressing a lossy JPEG image into
+ * a packed-pixel image
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param croppingRegion #tjregion structure that specifies a subregion of the
+ * JPEG image to decompress, or <tt>#TJUNCROPPED</tt> for no cropping.  The
+ * left boundary of the cropping region must be evenly divisible by the scaled
+ * iMCU width-- <tt>#TJSCALED(#tjMCUWidth[subsamp], scalingFactor)</tt>, where
+ * `subsamp` is the level of chrominance subsampling in the JPEG image (see
+ * #TJPARAM_SUBSAMP) and `scalingFactor` is the decompression scaling factor
+ * (see #tj3SetScalingFactor().)  The cropping region should be specified
+ * relative to the scaled image dimensions.  Unless `croppingRegion` is
+ * <tt>#TJUNCROPPED</tt>, the JPEG header must be read (see
+ * #tj3DecompressHeader()) prior to calling this function.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SetCroppingRegion(tjhandle handle, tjregion croppingRegion);
+
+
+/**
+ * Decompress a JPEG image with 2 to 8 bits of data precision per sample into a
+ * packed-pixel RGB, grayscale, or CMYK image with the same data precision.
+ * The @ref TJPARAM "parameters" that describe the JPEG image will be set when
+ * this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel
+ * decompressed image.  This buffer should normally be
+ * `pitch * destinationHeight` samples in size.  However, you can also use this
+ * parameter to decompress into a specific region of a larger buffer.  NOTE:
+ * If the JPEG image is lossy, then `destinationHeight` is either the scaled
+ * JPEG height (see #TJSCALED(), #TJPARAM_JPEGHEIGHT, and
+ * #tj3SetScalingFactor()) or the height of the cropping region (see
+ * #tj3SetCroppingRegion().)  If the JPEG image is lossless, then
+ * `destinationHeight` is the JPEG height.
+ *
+ * @param pitch samples per row in the destination image.  Normally this should
+ * be set to <tt>destinationWidth * #tjPixelSize[pixelFormat]</tt>, if the
+ * destination image should be unpadded.  (Setting this parameter to 0 is the
+ * equivalent of setting it to
+ * <tt>destinationWidth * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decompress into a specific region of
+ * a larger buffer.  NOTE: If the JPEG image is lossy, then `destinationWidth`
+ * is either the scaled JPEG width (see #TJSCALED(), #TJPARAM_JPEGWIDTH, and
+ * #tj3SetScalingFactor()) or the width of the cropping region (see
+ * #tj3SetCroppingRegion().)  If the JPEG image is lossless, then
+ * `destinationWidth` is the JPEG width.
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref
+ * TJPF "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Decompress8(tjhandle handle, const unsigned char *jpegBuf,
+                             size_t jpegSize, unsigned char *dstBuf, int pitch,
+                             int pixelFormat);
+
+/**
+ * Decompress a JPEG image with 9 to 12 bits of data precision per sample into
+ * a packed-pixel RGB, grayscale, or CMYK image with the same data precision.
+ *
+ * \details \copydetails tj3Decompress8()
+ */
+DLLEXPORT int tj3Decompress12(tjhandle handle, const unsigned char *jpegBuf,
+                              size_t jpegSize, short *dstBuf, int pitch,
+                              int pixelFormat);
+
+/**
+ * Decompress a lossless JPEG image with 13 to 16 bits of data precision per
+ * sample into a packed-pixel RGB, grayscale, or CMYK image with the same
+ * data precision.
+ *
+ * \details \copydetails tj3Decompress8()
+ */
+DLLEXPORT int tj3Decompress16(tjhandle handle, const unsigned char *jpegBuf,
+                              size_t jpegSize, unsigned short *dstBuf,
+                              int pitch, int pixelFormat);
+
+
+/**
+ * Decompress an 8-bit-per-sample JPEG image into separate 8-bit-per-sample Y,
+ * U (Cb), and V (Cr) image planes.  This function performs JPEG decompression
+ * but leaves out the color conversion step, so a planar YUV image is generated
+ * instead of a packed-pixel image.  The @ref TJPARAM "parameters" that
+ * describe the JPEG image will be set when this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if decompressing a grayscale image) that will receive
+ * the decompressed image.  These planes can be contiguous or non-contiguous in
+ * memory.  Use #tj3YUVPlaneSize() to determine the appropriate size for each
+ * plane based on the scaled JPEG width and height (see #TJSCALED(),
+ * #TJPARAM_JPEGWIDTH, #TJPARAM_JPEGHEIGHT, and #tj3SetScalingFactor()),
+ * strides, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  Refer
+ * to @ref YUVnotes "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV image.  Setting the stride for any
+ * plane to 0 is the same as setting it to the scaled plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective scaled plane widths.
+ * You can adjust the strides in order to add an arbitrary amount of row
+ * padding to each plane or to decompress the JPEG image into a subregion of a
+ * larger planar YUV image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressToYUVPlanes8(tjhandle handle,
+                                        const unsigned char *jpegBuf,
+                                        size_t jpegSize,
+                                        unsigned char **dstPlanes,
+                                        int *strides);
+
+
+/**
+ * Decompress an 8-bit-per-sample JPEG image into an 8-bit-per-sample unified
+ * planar YUV image.  This function performs JPEG decompression but leaves out
+ * the color conversion step, so a planar YUV image is generated instead of a
+ * packed-pixel image.  The @ref TJPARAM "parameters" that describe the JPEG
+ * image will be set when this function returns.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG image to
+ * decompress
+ *
+ * @param jpegSize size of the JPEG image (in bytes)
+ *
+ * @param dstBuf pointer to a buffer that will receive the unified planar YUV
+ * decompressed image.  Use #tj3YUVBufSize() to determine the appropriate size
+ * for this buffer based on the scaled JPEG width and height (see #TJSCALED(),
+ * #TJPARAM_JPEGWIDTH, #TJPARAM_JPEGHEIGHT, and #tj3SetScalingFactor()), row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes will be stored sequentially in the
+ * buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV image (must be a power of
+ * 2.)  Setting this parameter to n will cause each row in each plane of the
+ * YUV image to be padded to the nearest multiple of n bytes (1 = unpadded.)
+ * To generate images suitable for X Video, `align` should be set to 4.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecompressToYUV8(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  size_t jpegSize,
+                                  unsigned char *dstBuf, int align);
+
+
+/**
+ * Decode a set of 8-bit-per-sample Y, U (Cb), and V (Cr) image planes into an
+ * 8-bit-per-sample packed-pixel RGB or grayscale image.  This function
+ * performs color conversion (which is accelerated in the libjpeg-turbo
+ * implementation) but does not execute any of the other steps in the JPEG
+ * decompression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param srcPlanes an array of pointers to Y, U (Cb), and V (Cr) image planes
+ * (or just a Y plane, if decoding a grayscale image) that contain a YUV image
+ * to be decoded.  These planes can be contiguous or non-contiguous in memory.
+ * The size of each plane should match the value returned by #tj3YUVPlaneSize()
+ * for the given image width, height, strides, and level of chrominance
+ * subsampling (see #TJPARAM_SUBSAMP.)  Refer to @ref YUVnotes
+ * "YUV Image Format Notes" for more details.
+ *
+ * @param strides an array of integers, each specifying the number of bytes per
+ * row in the corresponding plane of the YUV source image.  Setting the stride
+ * for any plane to 0 is the same as setting it to the plane width (see
+ * @ref YUVnotes "YUV Image Format Notes".)  If `strides` is NULL, then the
+ * strides for all planes will be set to their respective plane widths.  You
+ * can adjust the strides in order to specify an arbitrary amount of row
+ * padding in each plane or to decode a subregion of a larger planar YUV image.
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size.
+ * However, you can also use this parameter to decode into a specific region of
+ * a larger buffer.
+ *
+ * @param width width (in pixels) of the source and destination images
+ *
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded.  (Setting this parameter to 0 is the equivalent of
+ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decode into a specific region of a
+ * larger buffer.
+ *
+ * @param height height (in pixels) of the source and destination images
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecodeYUVPlanes8(tjhandle handle,
+                                  const unsigned char * const *srcPlanes,
+                                  const int *strides, unsigned char *dstBuf,
+                                  int width, int pitch, int height,
+                                  int pixelFormat);
+
+
+/**
+ * Decode an 8-bit-per-sample unified planar YUV image into an 8-bit-per-sample
+ * packed-pixel RGB or grayscale image.  This function performs color
+ * conversion (which is accelerated in the libjpeg-turbo implementation) but
+ * does not execute any of the other steps in the JPEG decompression process.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * decompression
+ *
+ * @param srcBuf pointer to a buffer containing a unified planar YUV source
+ * image to be decoded.  The size of this buffer should match the value
+ * returned by #tj3YUVBufSize() for the given image width, height, row
+ * alignment, and level of chrominance subsampling (see #TJPARAM_SUBSAMP.)  The
+ * Y, U (Cb), and V (Cr) image planes should be stored sequentially in the
+ * source buffer.  (Refer to @ref YUVnotes "YUV Image Format Notes".)
+ *
+ * @param align row alignment (in bytes) of the YUV source image (must be a
+ * power of 2.)  Setting this parameter to n indicates that each row in each
+ * plane of the YUV source image is padded to the nearest multiple of n bytes
+ * (1 = unpadded.)
+ *
+ * @param dstBuf pointer to a buffer that will receive the packed-pixel decoded
+ * image.  This buffer should normally be `pitch * height` bytes in size.
+ * However, you can also use this parameter to decode into a specific region of
+ * a larger buffer.
+ *
+ * @param width width (in pixels) of the source and destination images
+ *
+ * @param pitch bytes per row in the destination image.  Normally this should
+ * be set to <tt>width * #tjPixelSize[pixelFormat]</tt>, if the destination
+ * image should be unpadded.  (Setting this parameter to 0 is the equivalent of
+ * setting it to <tt>width * #tjPixelSize[pixelFormat]</tt>.)  However, you can
+ * also use this parameter to specify the row alignment/padding of the
+ * destination image, to skip rows, or to decode into a specific region of a
+ * larger buffer.
+ *
+ * @param height height (in pixels) of the source and destination images
+ *
+ * @param pixelFormat pixel format of the destination image (see @ref TJPF
+ * "Pixel formats".)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3DecodeYUV8(tjhandle handle, const unsigned char *srcBuf,
+                            int align, unsigned char *dstBuf, int width,
+                            int pitch, int height, int pixelFormat);
+
+
+/**
+ * The maximum size of the buffer (in bytes) required to hold a JPEG image
+ * transformed with the given transform parameters and/or cropping region.
+ * This function is a wrapper for #tj3JPEGBufSize() that takes into account
+ * cropping, transposition of the width and height (which affects the
+ * destination image dimensions and level of chrominance subsampling),
+ * grayscale conversion, and the ICC profile (if any) that was previously
+ * associated with the TurboJPEG instance (see #tj3SetICCProfile()) or
+ * extracted from the source image (see #tj3GetICCProfile() and
+ * #TJPARAM_SAVEMARKERS.)  The JPEG header must be read (see
+ * tj3DecompressHeader()) prior to calling this function.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * lossless transformation
+ *
+ * @param transform pointer to a #tjtransform structure that specifies the
+ * transform parameters and/or cropping region for the JPEG image.
+ *
+ * @return the maximum size of the buffer (in bytes) required to hold the
+ * transformed image, or 0 if an error occurred (see #tj3GetErrorStr() and
+ * #tj3GetErrorCode().)
+ */
+DLLEXPORT size_t tj3TransformBufSize(tjhandle handle,
+                                     const tjtransform *transform);
+
+
+/**
+ * Losslessly transform a JPEG image into another JPEG image.  Lossless
+ * transforms work by moving the raw DCT coefficients from one JPEG image
+ * structure to another without altering the values of the coefficients.  While
+ * this is typically faster than decompressing the image, transforming it, and
+ * re-compressing it, lossless transforms are not free.  Each lossless
+ * transform requires reading and performing entropy decoding on all of the
+ * coefficients in the source image, regardless of the size of the destination
+ * image.  Thus, this function provides a means of generating multiple
+ * transformed images from the same source or applying multiple transformations
+ * simultaneously, in order to eliminate the need to read the source
+ * coefficients multiple times.
+ *
+ * @param handle handle to a TurboJPEG instance that has been initialized for
+ * lossless transformation
+ *
+ * @param jpegBuf pointer to a byte buffer containing the JPEG source image to
+ * transform
+ *
+ * @param jpegSize size of the JPEG source image (in bytes)
+ *
+ * @param n the number of transformed JPEG images to generate
+ *
+ * @param dstBufs pointer to an array of n byte buffers.  `dstBufs[i]` will
+ * receive a JPEG image that has been transformed using the parameters in
+ * `transforms[i]`.  TurboJPEG has the ability to reallocate the JPEG
+ * destination buffer to accommodate the size of the transformed JPEG image.
+ * Thus, you can choose to:
+ * -# pre-allocate the JPEG destination buffer with an arbitrary size using
+ * #tj3Alloc() and let TurboJPEG grow the buffer as needed,
+ * -# set `dstBufs[i]` to NULL to tell TurboJPEG to allocate the buffer for
+ * you, or
+ * -# pre-allocate the buffer to a "worst case" size determined by calling
+ * #tj3TransformBufSize().  Under normal circumstances, this should ensure that
+ * the buffer never has to be re-allocated.  (Setting #TJPARAM_NOREALLOC
+ * guarantees that it won't be.)  Note, however, that there are some rare cases
+ * (such as transforming images with a large amount of embedded Exif data) in
+ * which the transformed JPEG image will be larger than the worst-case size,
+ * and #TJPARAM_NOREALLOC cannot be used in those cases unless the embedded
+ * data is discarded using #TJXOPT_COPYNONE or #TJPARAM_SAVEMARKERS.
+ * .
+ * If you choose option 1 or 3, then `dstSizes[i]` should be set to the size of
+ * your pre-allocated buffer.  In any case, unless you have set
+ * #TJPARAM_NOREALLOC, you should always check `dstBufs[i]` upon return from
+ * this function, as it may have changed.
+ *
+ * @param dstSizes pointer to an array of n size_t variables that will receive
+ * the actual sizes (in bytes) of each transformed JPEG image.  If `dstBufs[i]`
+ * points to a pre-allocated buffer, then `dstSizes[i]` should be set to the
+ * size of the buffer.  Upon return, `dstSizes[i]` will contain the size of the
+ * transformed JPEG image (in bytes.)
+ *
+ * @param transforms pointer to an array of n #tjtransform structures, each of
+ * which specifies the transform parameters and/or cropping region for the
+ * corresponding transformed JPEG image.
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr()
+ * and #tj3GetErrorCode().)
+ */
+DLLEXPORT int tj3Transform(tjhandle handle, const unsigned char *jpegBuf,
+                           size_t jpegSize, int n, unsigned char **dstBufs,
+                           size_t *dstSizes, const tjtransform *transforms);
+
+
+/**
+ * Load a packed-pixel image with 2 to 8 bits of data precision per sample from
+ * disk into memory.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file containing a packed-pixel image in Windows
+ * BMP or PBMPLUS (PPM/PGM) format.  Windows BMP files require 8-bit-per-sample
+ * data precision.  When loading a PBMPLUS file, the target data precision
+ * (from 2 to 8 bits per sample) can be specified using #TJPARAM_PRECISION and
+ * defaults to 8 if #TJPARAM_PRECISION is unset or out of range.  If the data
+ * precision of the PBMPLUS file does not match the target data precision, then
+ * upconverting or downconverting will be performed.
+ *
+ * @param width pointer to an integer variable that will receive the width (in
+ * pixels) of the packed-pixel image
+ *
+ * @param align row alignment (in samples) of the packed-pixel buffer to be
+ * returned (must be a power of 2.)  Setting this parameter to n will cause all
+ * rows in the buffer to be padded to the nearest multiple of n samples
+ * (1 = unpadded.)
+ *
+ * @param height pointer to an integer variable that will receive the height
+ * (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pointer to an integer variable that specifies or will
+ * receive the pixel format of the packed-pixel buffer.  The behavior of this
+ * function varies depending on the value of `*pixelFormat` passed to the
+ * function:
+ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will
+ * use the most optimal pixel format for the file type, and `*pixelFormat` will
+ * contain the ID of that pixel format upon successful return from this
+ * function.
+ * - @ref TJPF_GRAY : Only PGM files and 8-bit-per-pixel BMP files with a
+ * grayscale colormap can be loaded.
+ * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be
+ * converted using a quick & dirty algorithm that is suitable only for testing
+ * purposes.  (Proper conversion between CMYK and other formats requires a
+ * color management system.)
+ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the
+ * specified pixel format, and pixel format conversion will be performed if
+ * necessary.
+ *
+ * @return a pointer to a newly-allocated buffer containing the packed-pixel
+ * image, converted to the chosen pixel format and with the chosen row
+ * alignment, or NULL if an error occurred (see #tj3GetErrorStr().)  This
+ * buffer should be freed using #tj3Free().
+ */
+DLLEXPORT unsigned char *tj3LoadImage8(tjhandle handle, const char *filename,
+                                       int *width, int align, int *height,
+                                       int *pixelFormat);
+
+/**
+ * Load a packed-pixel image with 9 to 12 bits of data precision per sample
+ * from disk into memory.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file containing a packed-pixel image in PBMPLUS
+ * (PPM/PGM) format.  The target data precision (from 9 to 12 bits per sample)
+ * can be specified using #TJPARAM_PRECISION and defaults to 12 if
+ * #TJPARAM_PRECISION is unset or out of range.  If the data precision of the
+ * PBMPLUS file does not match the target data precision, then upconverting or
+ * downconverting will be performed.
+ *
+ * @param width pointer to an integer variable that will receive the width (in
+ * pixels) of the packed-pixel image
+ *
+ * @param align row alignment (in samples) of the packed-pixel buffer to be
+ * returned (must be a power of 2.)  Setting this parameter to n will cause all
+ * rows in the buffer to be padded to the nearest multiple of n samples
+ * (1 = unpadded.)
+ *
+ * @param height pointer to an integer variable that will receive the height
+ * (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pointer to an integer variable that specifies or will
+ * receive the pixel format of the packed-pixel buffer.  The behavior of this
+ * function will vary depending on the value of `*pixelFormat` passed to the
+ * function:
+ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will
+ * use the most optimal pixel format for the file type, and `*pixelFormat` will
+ * contain the ID of that pixel format upon successful return from this
+ * function.
+ * - @ref TJPF_GRAY : Only PGM files can be loaded.
+ * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be
+ * converted using a quick & dirty algorithm that is suitable only for testing
+ * purposes.  (Proper conversion between CMYK and other formats requires a
+ * color management system.)
+ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the
+ * specified pixel format, and pixel format conversion will be performed if
+ * necessary.
+ *
+ * @return a pointer to a newly-allocated buffer containing the packed-pixel
+ * image, converted to the chosen pixel format and with the chosen row
+ * alignment, or NULL if an error occurred (see #tj3GetErrorStr().)  This
+ * buffer should be freed using #tj3Free().
+ */
+DLLEXPORT short *tj3LoadImage12(tjhandle handle, const char *filename,
+                                int *width, int align, int *height,
+                                int *pixelFormat);
+
+/**
+ * Load a packed-pixel image with 13 to 16 bits of data precision per sample
+ * from disk into memory.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file containing a packed-pixel image in PBMPLUS
+ * (PPM/PGM) format.  The target data precision (from 13 to 16 bits per sample)
+ * can be specified using #TJPARAM_PRECISION and defaults to 16 if
+ * #TJPARAM_PRECISION is unset or out of range.  If the data precision of the
+ * PBMPLUS file does not match the target data precision, then upconverting or
+ * downconverting will be performed.
+ *
+ * @param width pointer to an integer variable that will receive the width (in
+ * pixels) of the packed-pixel image
+ *
+ * @param align row alignment (in samples) of the packed-pixel buffer to be
+ * returned (must be a power of 2.)  Setting this parameter to n will cause all
+ * rows in the buffer to be padded to the nearest multiple of n samples
+ * (1 = unpadded.)
+ *
+ * @param height pointer to an integer variable that will receive the height
+ * (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pointer to an integer variable that specifies or will
+ * receive the pixel format of the packed-pixel buffer.  The behavior of this
+ * function will vary depending on the value of `*pixelFormat` passed to the
+ * function:
+ * - @ref TJPF_UNKNOWN : The packed-pixel buffer returned by this function will
+ * use the most optimal pixel format for the file type, and `*pixelFormat` will
+ * contain the ID of that pixel format upon successful return from this
+ * function.
+ * - @ref TJPF_GRAY : Only PGM files can be loaded.
+ * - @ref TJPF_CMYK : The RGB or grayscale pixels stored in the file will be
+ * converted using a quick & dirty algorithm that is suitable only for testing
+ * purposes.  (Proper conversion between CMYK and other formats requires a
+ * color management system.)
+ * - Other @ref TJPF "pixel formats" : The packed-pixel buffer will use the
+ * specified pixel format, and pixel format conversion will be performed if
+ * necessary.
+ *
+ * @return a pointer to a newly-allocated buffer containing the packed-pixel
+ * image, converted to the chosen pixel format and with the chosen row
+ * alignment, or NULL if an error occurred (see #tj3GetErrorStr().)  This
+ * buffer should be freed using #tj3Free().
+ */
+DLLEXPORT unsigned short *tj3LoadImage16(tjhandle handle, const char *filename,
+                                         int *width, int align, int *height,
+                                         int *pixelFormat);
+
+
+/**
+ * Save a packed-pixel image with 2 to 8 bits of data precision per sample from
+ * memory to disk.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file to which to save the packed-pixel image.  The
+ * image will be stored in Windows BMP or PBMPLUS (PPM/PGM) format, depending
+ * on the file extension.  Windows BMP files require 8-bit-per-sample data
+ * precision.  When saving a PBMPLUS file, the source data precision (from 2 to
+ * 8 bits per sample) can be specified using #TJPARAM_PRECISION and defaults to
+ * 8 if #TJPARAM_PRECISION is unset or out of range.
+ *
+ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK image to be saved
+ *
+ * @param width width (in pixels) of the packed-pixel image
+ *
+ * @param pitch samples per row in the packed-pixel image.  Setting this
+ * parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ *
+ * @param height height (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF
+ * "Pixel formats".)  If this parameter is set to @ref TJPF_GRAY, then the
+ * image will be stored in PGM or 8-bit-per-pixel (indexed color) BMP format.
+ * Otherwise, the image will be stored in PPM or 24-bit-per-pixel BMP format.
+ * If this parameter is set to @ref TJPF_CMYK, then the CMYK pixels will be
+ * converted to RGB using a quick & dirty algorithm that is suitable only for
+ * testing purposes.  (Proper conversion between CMYK and other formats
+ * requires a color management system.)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SaveImage8(tjhandle handle, const char *filename,
+                            const unsigned char *buffer, int width, int pitch,
+                            int height, int pixelFormat);
+
+/**
+ * Save a packed-pixel image with 9 to 12 bits of data precision per sample
+ * from memory to disk.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file to which to save the packed-pixel image,
+ * which will be stored in PBMPLUS (PPM/PGM) format.  The source data precision
+ * (from 9 to 12 bits per sample) can be specified using #TJPARAM_PRECISION and
+ * defaults to 12 if #TJPARAM_PRECISION is unset or out of range.
+ *
+ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK image to be saved
+ *
+ * @param width width (in pixels) of the packed-pixel image
+ *
+ * @param pitch samples per row in the packed-pixel image.  Setting this
+ * parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ *
+ * @param height height (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF
+ * "Pixel formats".)  If this parameter is set to @ref TJPF_GRAY, then the
+ * image will be stored in PGM format.  Otherwise, the image will be stored in
+ * PPM format.  If this parameter is set to @ref TJPF_CMYK, then the CMYK
+ * pixels will be converted to RGB using a quick & dirty algorithm that is
+ * suitable only for testing purposes.  (Proper conversion between CMYK and
+ * other formats requires a color management system.)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SaveImage12(tjhandle handle, const char *filename,
+                             const short *buffer, int width, int pitch,
+                             int height, int pixelFormat);
+
+/**
+ * Save a packed-pixel image with 13 to 16 bits of data precision per sample
+ * from memory to disk.
+ *
+ * @param handle handle to a TurboJPEG instance
+ *
+ * @param filename name of a file to which to save the packed-pixel image,
+ * which will be stored in PBMPLUS (PPM/PGM) format.  The source data precision
+ * (from 13 to 16 bits per sample) can be specified using #TJPARAM_PRECISION
+ * and defaults to 16 if #TJPARAM_PRECISION is unset or out of range.
+ *
+ * @param buffer pointer to a buffer containing a packed-pixel RGB, grayscale,
+ * or CMYK image to be saved
+ *
+ * @param width width (in pixels) of the packed-pixel image
+ *
+ * @param pitch samples per row in the packed-pixel image.  Setting this
+ * parameter to 0 is the equivalent of setting it to
+ * <tt>width * #tjPixelSize[pixelFormat]</tt>.
+ *
+ * @param height height (in pixels) of the packed-pixel image
+ *
+ * @param pixelFormat pixel format of the packed-pixel image (see @ref TJPF
+ * "Pixel formats".)  If this parameter is set to @ref TJPF_GRAY, then the
+ * image will be stored in PGM format.  Otherwise, the image will be stored in
+ * PPM format.  If this parameter is set to @ref TJPF_CMYK, then the CMYK
+ * pixels will be converted to RGB using a quick & dirty algorithm that is
+ * suitable only for testing purposes.  (Proper conversion between CMYK and
+ * other formats requires a color management system.)
+ *
+ * @return 0 if successful, or -1 if an error occurred (see #tj3GetErrorStr().)
+ */
+DLLEXPORT int tj3SaveImage16(tjhandle handle, const char *filename,
+                             const unsigned short *buffer, int width,
+                             int pitch, int height, int pixelFormat);
+
+
+/* Backward compatibility functions and macros (nothing to see here) */
+
+/* TurboJPEG 1.0+ */
+
+#define NUMSUBOPT  TJ_NUMSAMP
+#define TJ_444  TJSAMP_444
+#define TJ_422  TJSAMP_422
+#define TJ_420  TJSAMP_420
+#define TJ_411  TJSAMP_420
+#define TJ_GRAYSCALE  TJSAMP_GRAY
+
+#define TJ_BGR  1
+#define TJ_BOTTOMUP  TJFLAG_BOTTOMUP
+#define TJ_FORCEMMX  TJFLAG_FORCEMMX
+#define TJ_FORCESSE  TJFLAG_FORCESSE
+#define TJ_FORCESSE2  TJFLAG_FORCESSE2
+#define TJ_ALPHAFIRST  64
+#define TJ_FORCESSE3  TJFLAG_FORCESSE3
+#define TJ_FASTUPSAMPLE  TJFLAG_FASTUPSAMPLE
+
+#define TJPAD(width)  (((width) + 3) & (~3))
+
+DLLEXPORT unsigned long TJBUFSIZE(int width, int height);
+
+DLLEXPORT int tjCompress(tjhandle handle, unsigned char *srcBuf, int width,
+                         int pitch, int height, int pixelSize,
+                         unsigned char *dstBuf, unsigned long *compressedSize,
+                         int jpegSubsamp, int jpegQual, int flags);
+
+DLLEXPORT int tjDecompress(tjhandle handle, unsigned char *jpegBuf,
+                           unsigned long jpegSize, unsigned char *dstBuf,
+                           int width, int pitch, int height, int pixelSize,
+                           int flags);
+
+DLLEXPORT int tjDecompressHeader(tjhandle handle, unsigned char *jpegBuf,
+                                 unsigned long jpegSize, int *width,
+                                 int *height);
+
+DLLEXPORT int tjDestroy(tjhandle handle);
+
+DLLEXPORT char *tjGetErrorStr(void);
+
+DLLEXPORT tjhandle tjInitCompress(void);
+
+DLLEXPORT tjhandle tjInitDecompress(void);
+
+/* TurboJPEG 1.1+ */
+
+#define TJ_YUV  512
+
+DLLEXPORT unsigned long TJBUFSIZEYUV(int width, int height, int jpegSubsamp);
+
+DLLEXPORT int tjDecompressHeader2(tjhandle handle, unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp);
+
+DLLEXPORT int tjDecompressToYUV(tjhandle handle, unsigned char *jpegBuf,
+                                unsigned long jpegSize, unsigned char *dstBuf,
+                                int flags);
+
+DLLEXPORT int tjEncodeYUV(tjhandle handle, unsigned char *srcBuf, int width,
+                          int pitch, int height, int pixelSize,
+                          unsigned char *dstBuf, int subsamp, int flags);
+
+/* TurboJPEG 1.2+ */
+
+#define TJFLAG_BOTTOMUP  2
+#define TJFLAG_FORCEMMX  8
+#define TJFLAG_FORCESSE  16
+#define TJFLAG_FORCESSE2  32
+#define TJFLAG_FORCESSE3  128
+#define TJFLAG_FASTUPSAMPLE  256
+#define TJFLAG_NOREALLOC  1024
+
+DLLEXPORT unsigned char *tjAlloc(int bytes);
+
+DLLEXPORT unsigned long tjBufSize(int width, int height, int jpegSubsamp);
+
+DLLEXPORT unsigned long tjBufSizeYUV(int width, int height, int subsamp);
+
+DLLEXPORT int tjCompress2(tjhandle handle, const unsigned char *srcBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          unsigned char **jpegBuf, unsigned long *jpegSize,
+                          int jpegSubsamp, int jpegQual, int flags);
+
+DLLEXPORT int tjDecompress2(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, unsigned char *dstBuf,
+                            int width, int pitch, int height, int pixelFormat,
+                            int flags);
+
+DLLEXPORT int tjEncodeYUV2(tjhandle handle, unsigned char *srcBuf, int width,
+                           int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int subsamp, int flags);
+
+DLLEXPORT void tjFree(unsigned char *buffer);
+
+DLLEXPORT tjscalingfactor *tjGetScalingFactors(int *numscalingfactors);
+
+DLLEXPORT tjhandle tjInitTransform(void);
+
+DLLEXPORT int tjTransform(tjhandle handle, const unsigned char *jpegBuf,
+                            unsigned long jpegSize, int n,
+                            unsigned char **dstBufs, unsigned long *dstSizes,
+                            tjtransform *transforms, int flags);
+
+/* TurboJPEG 1.2.1+ */
+
+#define TJFLAG_FASTDCT  2048
+#define TJFLAG_ACCURATEDCT  4096
+
+/* TurboJPEG 1.4+ */
+
+DLLEXPORT unsigned long tjBufSizeYUV2(int width, int align, int height,
+                                      int subsamp);
+
+DLLEXPORT int tjCompressFromYUV(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int align, int height, int subsamp,
+                                unsigned char **jpegBuf,
+                                unsigned long *jpegSize, int jpegQual,
+                                int flags);
+
+DLLEXPORT int tjCompressFromYUVPlanes(tjhandle handle,
+                                      const unsigned char **srcPlanes,
+                                      int width, const int *strides,
+                                      int height, int subsamp,
+                                      unsigned char **jpegBuf,
+                                      unsigned long *jpegSize, int jpegQual,
+                                      int flags);
+
+DLLEXPORT int tjDecodeYUV(tjhandle handle, const unsigned char *srcBuf,
+                          int align, int subsamp, unsigned char *dstBuf,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags);
+
+DLLEXPORT int tjDecodeYUVPlanes(tjhandle handle,
+                                const unsigned char **srcPlanes,
+                                const int *strides, int subsamp,
+                                unsigned char *dstBuf, int width, int pitch,
+                                int height, int pixelFormat, int flags);
+
+DLLEXPORT int tjDecompressHeader3(tjhandle handle,
+                                  const unsigned char *jpegBuf,
+                                  unsigned long jpegSize, int *width,
+                                  int *height, int *jpegSubsamp,
+                                  int *jpegColorspace);
+
+DLLEXPORT int tjDecompressToYUV2(tjhandle handle, const unsigned char *jpegBuf,
+                                 unsigned long jpegSize, unsigned char *dstBuf,
+                                 int width, int align, int height, int flags);
+
+DLLEXPORT int tjDecompressToYUVPlanes(tjhandle handle,
+                                      const unsigned char *jpegBuf,
+                                      unsigned long jpegSize,
+                                      unsigned char **dstPlanes, int width,
+                                      int *strides, int height, int flags);
+
+DLLEXPORT int tjEncodeYUV3(tjhandle handle, const unsigned char *srcBuf,
+                           int width, int pitch, int height, int pixelFormat,
+                           unsigned char *dstBuf, int align, int subsamp,
+                           int flags);
+
+DLLEXPORT int tjEncodeYUVPlanes(tjhandle handle, const unsigned char *srcBuf,
+                                int width, int pitch, int height,
+                                int pixelFormat, unsigned char **dstPlanes,
+                                int *strides, int subsamp, int flags);
+
+DLLEXPORT int tjPlaneHeight(int componentID, int height, int subsamp);
+
+DLLEXPORT unsigned long tjPlaneSizeYUV(int componentID, int width, int stride,
+                                       int height, int subsamp);
+
+DLLEXPORT int tjPlaneWidth(int componentID, int width, int subsamp);
+
+/* TurboJPEG 2.0+ */
+
+#define TJFLAG_STOPONWARNING  8192
+#define TJFLAG_PROGRESSIVE  16384
+
+DLLEXPORT int tjGetErrorCode(tjhandle handle);
+
+DLLEXPORT char *tjGetErrorStr2(tjhandle handle);
+
+DLLEXPORT unsigned char *tjLoadImage(const char *filename, int *width,
+                                     int align, int *height, int *pixelFormat,
+                                     int flags);
+
+DLLEXPORT int tjSaveImage(const char *filename, unsigned char *buffer,
+                          int width, int pitch, int height, int pixelFormat,
+                          int flags);
+
+/* TurboJPEG 2.1+ */
+
+#define TJFLAG_LIMITSCANS  32768
+
+/**
+ * @}
+ */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/thirdparty/thorvg/inc/config.h b/thirdparty/thorvg/inc/config.h
index 6a54f8053b3..e7362a986a4 100644
--- a/thirdparty/thorvg/inc/config.h
+++ b/thirdparty/thorvg/inc/config.h
@@ -4,13 +4,13 @@
 #define THORVG_SW_RASTER_SUPPORT
 #define THORVG_SVG_LOADER_SUPPORT
 #define THORVG_PNG_LOADER_SUPPORT
-#define THORVG_JPG_LOADER_SUPPORT
 #ifndef WEB_ENABLED
 #define THORVG_THREAD_SUPPORT
 #endif
 
-// Added conditionally if webp module is enabled.
+// Added conditionally if respective modules are enabled.
 //#define THORVG_WEBP_LOADER_SUPPORT
+//#define THORVG_JPG_LOADER_SUPPORT
 
 // For internal debugging:
 //#define THORVG_LOG_ENABLED
diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp b/thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.cpp
similarity index 53%
rename from thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp
rename to thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.cpp
index 4ae4f1202b1..f00993ab14c 100644
--- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.cpp
+++ b/thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.cpp
@@ -21,6 +21,7 @@
  */
 
 #include <memory.h>
+#include <turbojpeg.h>
 #include "tvgJpgLoader.h"
 
 /************************************************************************/
@@ -29,57 +30,71 @@
 
 void JpgLoader::clear()
 {
-    jpgdDelete(decoder);
     if (freeData) free(data);
-    decoder = nullptr;
     data = nullptr;
+    size = 0;
     freeData = false;
 }
 
 
-void JpgLoader::run(unsigned tid)
-{
-    surface.buf8 = jpgdDecompress(decoder);
-    surface.stride = static_cast<uint32_t>(w);
-    surface.w = static_cast<uint32_t>(w);
-    surface.h = static_cast<uint32_t>(h);
-    surface.cs = ColorSpace::ARGB8888;
-    surface.channelSize = sizeof(uint32_t);
-    surface.premultiplied = true;
-
-    clear();
-}
-
-
 /************************************************************************/
 /* External Class Implementation                                        */
 /************************************************************************/
 
 JpgLoader::JpgLoader() : ImageLoader(FileType::Jpg)
 {
-
+    jpegDecompressor = tjInitDecompress();
 }
 
 
 JpgLoader::~JpgLoader()
 {
-    done();
     clear();
-    free(surface.buf8);
+    tjDestroy(jpegDecompressor);
+
+    //This image is shared with raster engine.
+    tjFree(surface.buf8);
 }
 
 
 bool JpgLoader::open(const string& path)
 {
 #ifdef THORVG_FILE_IO_SUPPORT
-    int width, height;
-    decoder = jpgdHeader(path.c_str(), &width, &height);
-    if (!decoder) return false;
+    auto jpegFile = fopen(path.c_str(), "rb");
+    if (!jpegFile) return false;
+
+    auto ret = false;
+
+    //determine size
+    if (fseek(jpegFile, 0, SEEK_END) < 0) goto finalize;
+    if (((size = ftell(jpegFile)) < 1)) goto finalize;
+    if (fseek(jpegFile, 0, SEEK_SET)) goto finalize;
+
+    data = (unsigned char *) malloc(size);
+    if (!data) goto finalize;
+
+    freeData = true;
+
+    if (fread(data, size, 1, jpegFile) < 1) goto failure;
+
+    int width, height, subSample, colorSpace;
+    if (tjDecompressHeader3(jpegDecompressor, data, size, &width, &height, &subSample, &colorSpace) < 0) {
+        TVGERR("JPG LOADER", "%s", tjGetErrorStr());
+        goto failure;
+    }
 
     w = static_cast<float>(width);
     h = static_cast<float>(height);
+    ret = true;
 
-    return true;
+    goto finalize;
+
+failure:
+    clear();
+
+finalize:
+    fclose(jpegFile);
+    return ret;
 #else
     return false;
 #endif
@@ -88,50 +103,62 @@ bool JpgLoader::open(const string& path)
 
 bool JpgLoader::open(const char* data, uint32_t size, bool copy)
 {
+    int width, height, subSample, colorSpace;
+    if (tjDecompressHeader3(jpegDecompressor, (unsigned char *) data, size, &width, &height, &subSample, &colorSpace) < 0) return false;
+
     if (copy) {
-        this->data = (char *) malloc(size);
+        this->data = (unsigned char *) malloc(size);
         if (!this->data) return false;
-        memcpy((char *)this->data, data, size);
+        memcpy((unsigned char *)this->data, data, size);
         freeData = true;
     } else {
-        this->data = (char *) data;
+        this->data = (unsigned char *) data;
         freeData = false;
     }
 
-    int width, height;
-    decoder = jpgdHeader(this->data, size, &width, &height);
-    if (!decoder) return false;
-
     w = static_cast<float>(width);
     h = static_cast<float>(height);
+    this->size = size;
 
     return true;
 }
 
 
-
 bool JpgLoader::read()
 {
     if (!LoadModule::read()) return true;
 
-    if (!decoder || w == 0 || h == 0) return false;
+    if (w == 0 || h == 0) return false;
 
-    TaskScheduler::request(this);
+    //determine the image format
+    TJPF format;
+    if (cs == ColorSpace::ARGB8888 || cs == ColorSpace::ARGB8888S) {
+        format = TJPF_BGRX;
+        surface.cs = ColorSpace::ARGB8888;
+    } else {
+        format = TJPF_RGBX;
+        surface.cs = ColorSpace::ABGR8888;
+    }
 
+    auto image = (unsigned char *)tjAlloc(static_cast<int>(w) * static_cast<int>(h) * tjPixelSize[format]);
+    if (!image) return false;
+
+    //decompress jpg image
+    if (tjDecompress2(jpegDecompressor, data, size, image, static_cast<int>(w), 0, static_cast<int>(h), format, 0) < 0) {
+        TVGERR("JPG LOADER", "%s", tjGetErrorStr());
+        tjFree(image);
+        image = nullptr;
+        return false;
+    }
+
+    //setup the surface
+    surface.buf8 = image;
+    surface.stride = w;
+    surface.w = w;
+    surface.h = h;
+    surface.channelSize = sizeof(uint32_t);
+    surface.premultiplied = true;
+
+    clear();
     return true;
 }
-
-
-bool JpgLoader::close()
-{
-    if (!LoadModule::close()) return false;
-    this->done();
-    return true;
-}
-
-
-RenderSurface* JpgLoader::bitmap()
-{
-    this->done();
-    return ImageLoader::bitmap();
-}
diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.h b/thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.h
similarity index 84%
rename from thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.h
rename to thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.h
index 85f6d25dc39..348dbbf8e5f 100644
--- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgLoader.h
+++ b/thirdparty/thorvg/src/loaders/external_jpg/tvgJpgLoader.h
@@ -24,19 +24,12 @@
 #define _TVG_JPG_LOADER_H_
 
 #include "tvgLoader.h"
-#include "tvgTaskScheduler.h"
-#include "tvgJpgd.h"
 
-class JpgLoader : public ImageLoader, public Task
+using tjhandle = void*;
+
+//TODO: Use Task?
+class JpgLoader : public ImageLoader
 {
-private:
-    jpeg_decoder* decoder = nullptr;
-    char* data = nullptr;
-    bool freeData = false;
-
-    void clear();
-    void run(unsigned tid) override;
-
 public:
     JpgLoader();
     ~JpgLoader();
@@ -44,9 +37,14 @@ public:
     bool open(const string& path) override;
     bool open(const char* data, uint32_t size, bool copy) override;
     bool read() override;
-    bool close() override;
 
-    RenderSurface* bitmap() override;
+private:
+    void clear();
+
+    tjhandle jpegDecompressor;
+    unsigned char* data = nullptr;
+    unsigned long size = 0;
+    bool freeData = false;
 };
 
 #endif //_TVG_JPG_LOADER_H_
diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp
deleted file mode 100644
index 92e23698af2..00000000000
--- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.cpp
+++ /dev/null
@@ -1,3033 +0,0 @@
-/*
- * Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved.
-
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
-
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
-
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-// jpgd.cpp - C++ class for JPEG decompression.
-// Public domain, Rich Geldreich <richgel99@gmail.com>
-// Alex Evans: Linear memory allocator (taken from jpge.h).
-// v1.04, May. 19, 2012: Code tweaks to fix VS2008 static code analysis warnings (all looked harmless)
-//
-// Supports progressive and baseline sequential JPEG image files, and the most common chroma subsampling factors: Y, H1V1, H2V1, H1V2, and H2V2.
-//
-// Chroma upsampling quality: H2V2 is upsampled in the frequency domain, H2V1 and H1V2 are upsampled using point sampling.
-// Chroma upsampling reference: "Fast Scheme for Image Size Change in the Compressed Domain"
-// http://vision.ai.uiuc.edu/~dugad/research/dct/index.html
-
-#include <memory.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <setjmp.h>
-#include <stdint.h>
-
-#include "tvgCommon.h"
-#include "tvgJpgd.h"
-
-#ifdef _MSC_VER
-  #pragma warning (disable : 4611) // warning C4611: interaction between '_setjmp' and C++ object destruction is non-portable
-  #define JPGD_NORETURN __declspec(noreturn)
-#elif defined(__GNUC__)
-  #define JPGD_NORETURN __attribute__ ((noreturn))
-#else
-  #define JPGD_NORETURN
-#endif
-
-/************************************************************************/
-/* Internal Class Implementation                                        */
-/************************************************************************/
-
-
-// Set to 1 to enable freq. domain chroma upsampling on images using H2V2 subsampling (0=faster nearest neighbor sampling).
-// This is slower, but results in higher quality on images with highly saturated colors.
-#define JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING 1
-
-#define JPGD_ASSERT(x)
-#define JPGD_MAX(a,b) (((a)>(b)) ? (a) : (b))
-#define JPGD_MIN(a,b) (((a)<(b)) ? (a) : (b))
-
-typedef int16_t jpgd_quant_t;
-typedef int16_t jpgd_block_t;
-
-// Success/failure error codes.
-enum jpgd_status
-{
-    JPGD_SUCCESS = 0, JPGD_FAILED = -1, JPGD_DONE = 1,
-    JPGD_BAD_DHT_COUNTS = -256, JPGD_BAD_DHT_INDEX, JPGD_BAD_DHT_MARKER, JPGD_BAD_DQT_MARKER, JPGD_BAD_DQT_TABLE,
-    JPGD_BAD_PRECISION, JPGD_BAD_HEIGHT, JPGD_BAD_WIDTH, JPGD_TOO_MANY_COMPONENTS,
-    JPGD_BAD_SOF_LENGTH, JPGD_BAD_VARIABLE_MARKER, JPGD_BAD_DRI_LENGTH, JPGD_BAD_SOS_LENGTH,
-    JPGD_BAD_SOS_COMP_ID, JPGD_W_EXTRA_BYTES_BEFORE_MARKER, JPGD_NO_ARITHMETIC_SUPPORT, JPGD_UNEXPECTED_MARKER,
-    JPGD_NOT_JPEG, JPGD_UNSUPPORTED_MARKER, JPGD_BAD_DQT_LENGTH, JPGD_TOO_MANY_BLOCKS,
-    JPGD_UNDEFINED_QUANT_TABLE, JPGD_UNDEFINED_HUFF_TABLE, JPGD_NOT_SINGLE_SCAN, JPGD_UNSUPPORTED_COLORSPACE,
-    JPGD_UNSUPPORTED_SAMP_FACTORS, JPGD_DECODE_ERROR, JPGD_BAD_RESTART_MARKER, JPGD_ASSERTION_ERROR,
-    JPGD_BAD_SOS_SPECTRAL, JPGD_BAD_SOS_SUCCESSIVE, JPGD_STREAM_READ, JPGD_NOTENOUGHMEM
-};
-
-enum
-{
-    JPGD_IN_BUF_SIZE = 8192, JPGD_MAX_BLOCKS_PER_MCU = 10, JPGD_MAX_HUFF_TABLES = 8, JPGD_MAX_QUANT_TABLES = 4,
-    JPGD_MAX_COMPONENTS = 4, JPGD_MAX_COMPS_IN_SCAN = 4, JPGD_MAX_BLOCKS_PER_ROW = 8192, JPGD_MAX_HEIGHT = 16384, JPGD_MAX_WIDTH = 16384
-};
-
-// Input stream interface.
-// Derive from this class to read input data from sources other than files or memory. Set m_eof_flag to true when no more data is available.
-// The decoder is rather greedy: it will keep on calling this method until its internal input buffer is full, or until the EOF flag is set.
-// It the input stream contains data after the JPEG stream's EOI (end of image) marker it will probably be pulled into the internal buffer.
-// Call the get_total_bytes_read() method to determine the actual size of the JPEG stream after successful decoding.
-struct jpeg_decoder_stream
-{
-    jpeg_decoder_stream() { }
-    virtual ~jpeg_decoder_stream() { }
-
-    // The read() method is called when the internal input buffer is empty.
-    // Parameters:
-    // pBuf - input buffer
-    // max_bytes_to_read - maximum bytes that can be written to pBuf
-    // pEOF_flag - set this to true if at end of stream (no more bytes remaining)
-    // Returns -1 on error, otherwise return the number of bytes actually written to the buffer (which may be 0).
-    // Notes: This method will be called in a loop until you set *pEOF_flag to true or the internal buffer is full.
-    virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag) = 0;
-};
-
-
-// stdio FILE stream class.
-class jpeg_decoder_file_stream : public jpeg_decoder_stream
-{
-    jpeg_decoder_file_stream(const jpeg_decoder_file_stream &);
-    jpeg_decoder_file_stream &operator =(const jpeg_decoder_file_stream &);
-
-    FILE *m_pFile = nullptr;
-    bool m_eof_flag = false;
-    bool m_error_flag = false;
-
-public:
-    jpeg_decoder_file_stream() {}
-    virtual ~jpeg_decoder_file_stream();
-    bool open(const char *Pfilename);
-    void close();
-    virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
-  };
-
-
-// Memory stream class.
-class jpeg_decoder_mem_stream : public jpeg_decoder_stream
-{
-    const uint8_t *m_pSrc_data;
-    uint32_t m_ofs, m_size;
-
-public:
-    jpeg_decoder_mem_stream() : m_pSrc_data(nullptr), m_ofs(0), m_size(0) {}
-    jpeg_decoder_mem_stream(const uint8_t *pSrc_data, uint32_t size) : m_pSrc_data(pSrc_data), m_ofs(0), m_size(size) {}
-    virtual ~jpeg_decoder_mem_stream() {}
-    bool open(const uint8_t *pSrc_data, uint32_t size);
-    void close() { m_pSrc_data = nullptr; m_ofs = 0; m_size = 0; }
-    virtual int read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag);
-};
-
-
-class jpeg_decoder
-{
-public:
-    // Call get_error_code() after constructing to determine if the stream is valid or not. You may call the get_width(), get_height(), etc.
-    // methods after the constructor is called. You may then either destruct the object, or begin decoding the image by calling begin_decoding(), then decode() on each scanline.
-    jpeg_decoder(jpeg_decoder_stream *pStream);
-    ~jpeg_decoder();
-
-    // Call this method after constructing the object to begin decompression.
-    // If JPGD_SUCCESS is returned you may then call decode() on each scanline.
-    int begin_decoding();
-    // Returns the next scan line.
-    // For grayscale images, pScan_line will point to a buffer containing 8-bit pixels (get_bytes_per_pixel() will return 1).
-    // Otherwise, it will always point to a buffer containing 32-bit RGBA pixels (A will always be 255, and get_bytes_per_pixel() will return 4).
-    // Returns JPGD_SUCCESS if a scan line has been returned.
-    // Returns JPGD_DONE if all scan lines have been returned.
-    // Returns JPGD_FAILED if an error occurred. Call get_error_code() for a more info.
-    int decode(const void** pScan_line, uint32_t* pScan_line_len);
-    inline jpgd_status get_error_code() const { return m_error_code; }
-    inline int get_width() const { return m_image_x_size; }
-    inline int get_height() const { return m_image_y_size; }
-    inline int get_num_components() const { return m_comps_in_frame; }
-    inline int get_bytes_per_pixel() const { return m_dest_bytes_per_pixel; }
-    inline int get_bytes_per_scan_line() const { return m_image_x_size * get_bytes_per_pixel(); }
-    // Returns the total number of bytes actually consumed by the decoder (which should equal the actual size of the JPEG file).
-    inline int get_total_bytes_read() const { return m_total_bytes_read; }
-
-private:
-    jpeg_decoder(const jpeg_decoder &);
-    jpeg_decoder &operator =(const jpeg_decoder &);
-
-    typedef void (*pDecode_block_func)(jpeg_decoder *, int, int, int);
-
-    struct huff_tables
-    {
-      bool ac_table;
-      uint32_t  look_up[256];
-      uint32_t  look_up2[256];
-      uint8_t code_size[256];
-      uint32_t  tree[512];
-    };
-
-    struct coeff_buf
-    {
-      uint8_t *pData;
-      int block_num_x, block_num_y;
-      int block_len_x, block_len_y;
-      int block_size;
-    };
-
-    struct mem_block
-    {
-      mem_block *m_pNext;
-      size_t m_used_count;
-      size_t m_size;
-      char m_data[1];
-    };
-
-    jmp_buf m_jmp_state;
-    mem_block *m_pMem_blocks;
-    int m_image_x_size;
-    int m_image_y_size;
-    jpeg_decoder_stream *m_pStream;
-    int m_progressive_flag;
-    uint8_t m_huff_ac[JPGD_MAX_HUFF_TABLES];
-    uint8_t* m_huff_num[JPGD_MAX_HUFF_TABLES];      // pointer to number of Huffman codes per bit size
-    uint8_t* m_huff_val[JPGD_MAX_HUFF_TABLES];      // pointer to Huffman codes per bit size
-    jpgd_quant_t* m_quant[JPGD_MAX_QUANT_TABLES]; // pointer to quantization tables
-    int m_scan_type;                              // Gray, Yh1v1, Yh1v2, Yh2v1, Yh2v2 (CMYK111, CMYK4114 no longer supported)
-    int m_comps_in_frame;                         // # of components in frame
-    int m_comp_h_samp[JPGD_MAX_COMPONENTS];       // component's horizontal sampling factor
-    int m_comp_v_samp[JPGD_MAX_COMPONENTS];       // component's vertical sampling factor
-    int m_comp_quant[JPGD_MAX_COMPONENTS];        // component's quantization table selector
-    int m_comp_ident[JPGD_MAX_COMPONENTS];        // component's ID
-    int m_comp_h_blocks[JPGD_MAX_COMPONENTS];
-    int m_comp_v_blocks[JPGD_MAX_COMPONENTS];
-    int m_comps_in_scan;                          // # of components in scan
-    int m_comp_list[JPGD_MAX_COMPS_IN_SCAN];      // components in this scan
-    int m_comp_dc_tab[JPGD_MAX_COMPONENTS];       // component's DC Huffman coding table selector
-    int m_comp_ac_tab[JPGD_MAX_COMPONENTS];       // component's AC Huffman coding table selector
-    int m_spectral_start;                         // spectral selection start
-    int m_spectral_end;                           // spectral selection end
-    int m_successive_low;                         // successive approximation low
-    int m_successive_high;                        // successive approximation high
-    int m_max_mcu_x_size;                         // MCU's max. X size in pixels
-    int m_max_mcu_y_size;                         // MCU's max. Y size in pixels
-    int m_blocks_per_mcu;
-    int m_max_blocks_per_row;
-    int m_mcus_per_row, m_mcus_per_col;
-    int m_mcu_org[JPGD_MAX_BLOCKS_PER_MCU];
-    int m_total_lines_left;                       // total # lines left in image
-    int m_mcu_lines_left;                         // total # lines left in this MCU
-    int m_real_dest_bytes_per_scan_line;
-    int m_dest_bytes_per_scan_line;               // rounded up
-    int m_dest_bytes_per_pixel;                   // 4 (RGB) or 1 (Y)
-    huff_tables* m_pHuff_tabs[JPGD_MAX_HUFF_TABLES];
-    coeff_buf* m_dc_coeffs[JPGD_MAX_COMPONENTS];
-    coeff_buf* m_ac_coeffs[JPGD_MAX_COMPONENTS];
-    int m_eob_run;
-    int m_block_y_mcu[JPGD_MAX_COMPONENTS];
-    uint8_t* m_pIn_buf_ofs;
-    int m_in_buf_left;
-    int m_tem_flag;
-    bool m_eof_flag;
-    uint8_t m_in_buf_pad_start[128];
-    uint8_t m_in_buf[JPGD_IN_BUF_SIZE + 128];
-    uint8_t m_in_buf_pad_end[128];
-    int m_bits_left;
-    uint32_t m_bit_buf;
-    int m_restart_interval;
-    int m_restarts_left;
-    int m_next_restart_num;
-    int m_max_mcus_per_row;
-    int m_max_blocks_per_mcu;
-    int m_expanded_blocks_per_mcu;
-    int m_expanded_blocks_per_row;
-    int m_expanded_blocks_per_component;
-    bool  m_freq_domain_chroma_upsample;
-    int m_max_mcus_per_col;
-    uint32_t m_last_dc_val[JPGD_MAX_COMPONENTS];
-    jpgd_block_t* m_pMCU_coefficients;
-    int m_mcu_block_max_zag[JPGD_MAX_BLOCKS_PER_MCU];
-    uint8_t* m_pSample_buf;
-    int m_crr[256];
-    int m_cbb[256];
-    int m_crg[256];
-    int m_cbg[256];
-    uint8_t* m_pScan_line_0;
-    uint8_t* m_pScan_line_1;
-    jpgd_status m_error_code;
-    bool m_ready_flag;
-    int m_total_bytes_read;
-
-    void free_all_blocks();
-    JPGD_NORETURN void stop_decoding(jpgd_status status);
-    void *alloc(size_t n, bool zero = false);
-    void word_clear(void *p, uint16_t c, uint32_t n);
-    void prep_in_buffer();
-    void read_dht_marker();
-    void read_dqt_marker();
-    void read_sof_marker();
-    void skip_variable_marker();
-    void read_dri_marker();
-    void read_sos_marker();
-    int next_marker();
-    int process_markers();
-    void locate_soi_marker();
-    void locate_sof_marker();
-    int locate_sos_marker();
-    void init(jpeg_decoder_stream * pStream);
-    void create_look_ups();
-    void fix_in_buffer();
-    void transform_mcu(int mcu_row);
-    void transform_mcu_expand(int mcu_row);
-    coeff_buf* coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y);
-    inline jpgd_block_t *coeff_buf_getp(coeff_buf *cb, int block_x, int block_y);
-    void load_next_row();
-    void decode_next_row();
-    void make_huff_table(int index, huff_tables *pH);
-    void check_quant_tables();
-    void check_huff_tables();
-    void calc_mcu_block_order();
-    int init_scan();
-    void init_frame();
-    void process_restart();
-    void decode_scan(pDecode_block_func decode_block_func);
-    void init_progressive();
-    void init_sequential();
-    void decode_start();
-    void decode_init(jpeg_decoder_stream * pStream);
-    void H2V2Convert();
-    void H2V1Convert();
-    void H1V2Convert();
-    void H1V1Convert();
-    void gray_convert();
-    void expanded_convert();
-    void find_eoi();
-    inline uint32_t get_char();
-    inline uint32_t get_char(bool *pPadding_flag);
-    inline void stuff_char(uint8_t q);
-    inline uint8_t get_octet();
-    inline uint32_t get_bits(int num_bits);
-    inline uint32_t get_bits_no_markers(int numbits);
-    inline int huff_decode(huff_tables *pH);
-    inline int huff_decode(huff_tables *pH, int& extrabits);
-    static inline uint8_t clamp(int i);
-    static void decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
-    static void decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
-    static void decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y);
-    static void decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y);
-};
-
-
-// DCT coefficients are stored in this sequence.
-static int g_ZAG[64] = {  0,1,8,16,9,2,3,10,17,24,32,25,18,11,4,5,12,19,26,33,40,48,41,34,27,20,13,6,7,14,21,28,35,42,49,56,57,50,43,36,29,22,15,23,30,37,44,51,58,59,52,45,38,31,39,46,53,60,61,54,47,55,62,63 };
-
-enum JPEG_MARKER
-{
-  M_SOF0  = 0xC0, M_SOF1  = 0xC1, M_SOF2  = 0xC2, M_SOF3  = 0xC3, M_SOF5  = 0xC5, M_SOF6  = 0xC6, M_SOF7  = 0xC7, M_JPG   = 0xC8,
-  M_SOF9  = 0xC9, M_SOF10 = 0xCA, M_SOF11 = 0xCB, M_SOF13 = 0xCD, M_SOF14 = 0xCE, M_SOF15 = 0xCF, M_DHT   = 0xC4, M_DAC   = 0xCC,
-  M_RST0  = 0xD0, M_RST1  = 0xD1, M_RST2  = 0xD2, M_RST3  = 0xD3, M_RST4  = 0xD4, M_RST5  = 0xD5, M_RST6  = 0xD6, M_RST7  = 0xD7,
-  M_SOI   = 0xD8, M_EOI   = 0xD9, M_SOS   = 0xDA, M_DQT   = 0xDB, M_DNL   = 0xDC, M_DRI   = 0xDD, M_DHP   = 0xDE, M_EXP   = 0xDF,
-  M_APP0  = 0xE0, M_APP15 = 0xEF, M_JPG0  = 0xF0, M_JPG13 = 0xFD, M_COM   = 0xFE, M_TEM   = 0x01, M_ERROR = 0x100, RST0   = 0xD0
-};
-
-enum JPEG_SUBSAMPLING { JPGD_GRAYSCALE = 0, JPGD_YH1V1, JPGD_YH2V1, JPGD_YH1V2, JPGD_YH2V2 };
-
-#define CONST_BITS  13
-#define PASS1_BITS  2
-#define SCALEDONE ((int32_t)1)
-#define DESCALE(x,n)  (((x) + (SCALEDONE << ((n)-1))) >> (n))
-#define DESCALE_ZEROSHIFT(x,n)  (((x) + (128 << (n)) + (SCALEDONE << ((n)-1))) >> (n))
-#define MULTIPLY(var, cnst)  ((var) * (cnst))
-#define CLAMP(i) ((static_cast<uint32_t>(i) > 255) ? (((~i) >> 31) & 0xFF) : (i))
-
-#define FIX_0_298631336  ((int32_t)2446)        /* FIX(0.298631336) */
-#define FIX_0_390180644  ((int32_t)3196)        /* FIX(0.390180644) */
-#define FIX_0_541196100  ((int32_t)4433)        /* FIX(0.541196100) */
-#define FIX_0_765366865  ((int32_t)6270)        /* FIX(0.765366865) */
-#define FIX_0_899976223  ((int32_t)7373)        /* FIX(0.899976223) */
-#define FIX_1_175875602  ((int32_t)9633)        /* FIX(1.175875602) */
-#define FIX_1_501321110  ((int32_t)12299)       /* FIX(1.501321110) */
-#define FIX_1_847759065  ((int32_t)15137)       /* FIX(1.847759065) */
-#define FIX_1_961570560  ((int32_t)16069)       /* FIX(1.961570560) */
-#define FIX_2_053119869  ((int32_t)16819)       /* FIX(2.053119869) */
-#define FIX_2_562915447  ((int32_t)20995)       /* FIX(2.562915447) */
-#define FIX_3_072711026  ((int32_t)25172)       /* FIX(3.072711026) */
-
-
-// Compiler creates a fast path 1D IDCT for X non-zero columns
-template <int NONZERO_COLS>
-struct Row
-{
-    static void idct(int* pTemp, const jpgd_block_t* pSrc)
-    {
-        // ACCESS_COL() will be optimized at compile time to either an array access, or 0.
-        #define ACCESS_COL(x) (((x) < NONZERO_COLS) ? (int)pSrc[x] : 0)
-
-        const int z2 = ACCESS_COL(2), z3 = ACCESS_COL(6);
-        const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-        const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
-        const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-
-        const int tmp0 = static_cast<unsigned int>(ACCESS_COL(0) + ACCESS_COL(4)) << CONST_BITS;
-        const int tmp1 = static_cast<unsigned int>(ACCESS_COL(0) - ACCESS_COL(4)) << CONST_BITS;
-
-        const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
-
-        const int atmp0 = ACCESS_COL(7), atmp1 = ACCESS_COL(5), atmp2 = ACCESS_COL(3), atmp3 = ACCESS_COL(1);
-
-        const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
-        const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
-
-        const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
-        const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
-        const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
-        const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
-
-        const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
-        const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
-        const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
-        const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
-
-        pTemp[0] = DESCALE(tmp10 + btmp3, CONST_BITS-PASS1_BITS);
-        pTemp[7] = DESCALE(tmp10 - btmp3, CONST_BITS-PASS1_BITS);
-        pTemp[1] = DESCALE(tmp11 + btmp2, CONST_BITS-PASS1_BITS);
-        pTemp[6] = DESCALE(tmp11 - btmp2, CONST_BITS-PASS1_BITS);
-        pTemp[2] = DESCALE(tmp12 + btmp1, CONST_BITS-PASS1_BITS);
-        pTemp[5] = DESCALE(tmp12 - btmp1, CONST_BITS-PASS1_BITS);
-        pTemp[3] = DESCALE(tmp13 + btmp0, CONST_BITS-PASS1_BITS);
-        pTemp[4] = DESCALE(tmp13 - btmp0, CONST_BITS-PASS1_BITS);
-    }
-};
-
-
-template <>
-struct Row<0>
-{
-    static void idct(int* pTemp, const jpgd_block_t* pSrc)
-    {
-#ifdef _MSC_VER
-      pTemp; pSrc;
-#endif
-    }
-};
-
-
-template <>
-struct Row<1>
-{
-    static void idct(int* pTemp, const jpgd_block_t* pSrc)
-    {
-        const int dcval = pSrc[0] * PASS1_BITS * 2;
-
-        pTemp[0] = dcval;
-        pTemp[1] = dcval;
-        pTemp[2] = dcval;
-        pTemp[3] = dcval;
-        pTemp[4] = dcval;
-        pTemp[5] = dcval;
-        pTemp[6] = dcval;
-        pTemp[7] = dcval;
-    }
-};
-
-
-// Compiler creates a fast path 1D IDCT for X non-zero rows
-template <int NONZERO_ROWS>
-struct Col
-{
-    static void idct(uint8_t* pDst_ptr, const int* pTemp)
-    {
-        // ACCESS_ROW() will be optimized at compile time to either an array access, or 0.
-        #define ACCESS_ROW(x) (((x) < NONZERO_ROWS) ? pTemp[x * 8] : 0)
-
-        const int z2 = ACCESS_ROW(2);
-        const int z3 = ACCESS_ROW(6);
-
-        const int z1 = MULTIPLY(z2 + z3, FIX_0_541196100);
-        const int tmp2 = z1 + MULTIPLY(z3, - FIX_1_847759065);
-        const int tmp3 = z1 + MULTIPLY(z2, FIX_0_765366865);
-
-        const int tmp0 = static_cast<unsigned int>(ACCESS_ROW(0) + ACCESS_ROW(4)) << CONST_BITS;
-        const int tmp1 = static_cast<unsigned int>(ACCESS_ROW(0) - ACCESS_ROW(4)) << CONST_BITS;
-
-        const int tmp10 = tmp0 + tmp3, tmp13 = tmp0 - tmp3, tmp11 = tmp1 + tmp2, tmp12 = tmp1 - tmp2;
-
-        const int atmp0 = ACCESS_ROW(7), atmp1 = ACCESS_ROW(5), atmp2 = ACCESS_ROW(3), atmp3 = ACCESS_ROW(1);
-
-        const int bz1 = atmp0 + atmp3, bz2 = atmp1 + atmp2, bz3 = atmp0 + atmp2, bz4 = atmp1 + atmp3;
-        const int bz5 = MULTIPLY(bz3 + bz4, FIX_1_175875602);
-
-        const int az1 = MULTIPLY(bz1, - FIX_0_899976223);
-        const int az2 = MULTIPLY(bz2, - FIX_2_562915447);
-        const int az3 = MULTIPLY(bz3, - FIX_1_961570560) + bz5;
-        const int az4 = MULTIPLY(bz4, - FIX_0_390180644) + bz5;
-
-        const int btmp0 = MULTIPLY(atmp0, FIX_0_298631336) + az1 + az3;
-        const int btmp1 = MULTIPLY(atmp1, FIX_2_053119869) + az2 + az4;
-        const int btmp2 = MULTIPLY(atmp2, FIX_3_072711026) + az2 + az3;
-        const int btmp3 = MULTIPLY(atmp3, FIX_1_501321110) + az1 + az4;
-
-        int i = DESCALE_ZEROSHIFT(tmp10 + btmp3, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*0] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp10 - btmp3, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*7] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp11 + btmp2, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*1] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp11 - btmp2, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*6] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp12 + btmp1, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*2] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp12 - btmp1, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*5] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp13 + btmp0, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*3] = (uint8_t)CLAMP(i);
-
-        i = DESCALE_ZEROSHIFT(tmp13 - btmp0, CONST_BITS+PASS1_BITS+3);
-        pDst_ptr[8*4] = (uint8_t)CLAMP(i);
-    }
-};
-
-
-template <>
-struct Col<1>
-{
-    static void idct(uint8_t* pDst_ptr, const int* pTemp)
-    {
-        int dcval = DESCALE_ZEROSHIFT(pTemp[0], PASS1_BITS+3);
-        const uint8_t dcval_clamped = (uint8_t)CLAMP(dcval);
-        pDst_ptr[0*8] = dcval_clamped;
-        pDst_ptr[1*8] = dcval_clamped;
-        pDst_ptr[2*8] = dcval_clamped;
-        pDst_ptr[3*8] = dcval_clamped;
-        pDst_ptr[4*8] = dcval_clamped;
-        pDst_ptr[5*8] = dcval_clamped;
-        pDst_ptr[6*8] = dcval_clamped;
-        pDst_ptr[7*8] = dcval_clamped;
-    }
-};
-
-
-static const uint8_t s_idct_row_table[] = {
-    1,0,0,0,0,0,0,0, 2,0,0,0,0,0,0,0, 2,1,0,0,0,0,0,0, 2,1,1,0,0,0,0,0, 2,2,1,0,0,0,0,0, 3,2,1,0,0,0,0,0, 4,2,1,0,0,0,0,0, 4,3,1,0,0,0,0,0,
-    4,3,2,0,0,0,0,0, 4,3,2,1,0,0,0,0, 4,3,2,1,1,0,0,0, 4,3,2,2,1,0,0,0, 4,3,3,2,1,0,0,0, 4,4,3,2,1,0,0,0, 5,4,3,2,1,0,0,0, 6,4,3,2,1,0,0,0,
-    6,5,3,2,1,0,0,0, 6,5,4,2,1,0,0,0, 6,5,4,3,1,0,0,0, 6,5,4,3,2,0,0,0, 6,5,4,3,2,1,0,0, 6,5,4,3,2,1,1,0, 6,5,4,3,2,2,1,0, 6,5,4,3,3,2,1,0,
-    6,5,4,4,3,2,1,0, 6,5,5,4,3,2,1,0, 6,6,5,4,3,2,1,0, 7,6,5,4,3,2,1,0, 8,6,5,4,3,2,1,0, 8,7,5,4,3,2,1,0, 8,7,6,4,3,2,1,0, 8,7,6,5,3,2,1,0,
-    8,7,6,5,4,2,1,0, 8,7,6,5,4,3,1,0, 8,7,6,5,4,3,2,0, 8,7,6,5,4,3,2,1, 8,7,6,5,4,3,2,2, 8,7,6,5,4,3,3,2, 8,7,6,5,4,4,3,2, 8,7,6,5,5,4,3,2,
-    8,7,6,6,5,4,3,2, 8,7,7,6,5,4,3,2, 8,8,7,6,5,4,3,2, 8,8,8,6,5,4,3,2, 8,8,8,7,5,4,3,2, 8,8,8,7,6,4,3,2, 8,8,8,7,6,5,3,2, 8,8,8,7,6,5,4,2,
-    8,8,8,7,6,5,4,3, 8,8,8,7,6,5,4,4, 8,8,8,7,6,5,5,4, 8,8,8,7,6,6,5,4, 8,8,8,7,7,6,5,4, 8,8,8,8,7,6,5,4, 8,8,8,8,8,6,5,4, 8,8,8,8,8,7,5,4,
-    8,8,8,8,8,7,6,4, 8,8,8,8,8,7,6,5, 8,8,8,8,8,7,6,6, 8,8,8,8,8,7,7,6, 8,8,8,8,8,8,7,6, 8,8,8,8,8,8,8,6, 8,8,8,8,8,8,8,7, 8,8,8,8,8,8,8,8,
-};
-
-
-static const uint8_t s_idct_col_table[] = { 1, 1, 2, 3, 3, 3, 3, 3, 3, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8 };
-
-
-void idct(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr, int block_max_zag)
-{
-    JPGD_ASSERT(block_max_zag >= 1);
-    JPGD_ASSERT(block_max_zag <= 64);
-
-    if (block_max_zag <= 1) {
-        int k = ((pSrc_ptr[0] + 4) >> 3) + 128;
-        k = CLAMP(k);
-        k = k | (k<<8);
-        k = k | (k<<16);
-        for (int i = 8; i > 0; i--) {
-            *(int*)&pDst_ptr[0] = k;
-            *(int*)&pDst_ptr[4] = k;
-            pDst_ptr += 8;
-        }
-      return;
-    }
-
-    int temp[64];
-    const jpgd_block_t* pSrc = pSrc_ptr;
-    int* pTemp = temp;
-    const uint8_t* pRow_tab = &s_idct_row_table[(block_max_zag - 1) * 8];
-    int i;
-    for (i = 8; i > 0; i--, pRow_tab++) {
-        switch (*pRow_tab) {
-            case 0: Row<0>::idct(pTemp, pSrc); break;
-            case 1: Row<1>::idct(pTemp, pSrc); break;
-            case 2: Row<2>::idct(pTemp, pSrc); break;
-            case 3: Row<3>::idct(pTemp, pSrc); break;
-            case 4: Row<4>::idct(pTemp, pSrc); break;
-            case 5: Row<5>::idct(pTemp, pSrc); break;
-            case 6: Row<6>::idct(pTemp, pSrc); break;
-            case 7: Row<7>::idct(pTemp, pSrc); break;
-            case 8: Row<8>::idct(pTemp, pSrc); break;
-        }
-        pSrc += 8;
-        pTemp += 8;
-    }
-
-    pTemp = temp;
-
-    const int nonzero_rows = s_idct_col_table[block_max_zag - 1];
-    for (i = 8; i > 0; i--) {
-        switch (nonzero_rows) {
-            case 1: Col<1>::idct(pDst_ptr, pTemp); break;
-            case 2: Col<2>::idct(pDst_ptr, pTemp); break;
-            case 3: Col<3>::idct(pDst_ptr, pTemp); break;
-            case 4: Col<4>::idct(pDst_ptr, pTemp); break;
-            case 5: Col<5>::idct(pDst_ptr, pTemp); break;
-            case 6: Col<6>::idct(pDst_ptr, pTemp); break;
-            case 7: Col<7>::idct(pDst_ptr, pTemp); break;
-            case 8: Col<8>::idct(pDst_ptr, pTemp); break;
-        }
-        pTemp++;
-        pDst_ptr++;
-    }
-}
-
-
-void idct_4x4(const jpgd_block_t* pSrc_ptr, uint8_t* pDst_ptr)
-{
-    int temp[64];
-    int* pTemp = temp;
-    const jpgd_block_t* pSrc = pSrc_ptr;
-
-    for (int i = 4; i > 0; i--) {
-        Row<4>::idct(pTemp, pSrc);
-        pSrc += 8;
-        pTemp += 8;
-    }
-
-    pTemp = temp;
-
-    for (int i = 8; i > 0; i--) {
-        Col<4>::idct(pDst_ptr, pTemp);
-        pTemp++;
-        pDst_ptr++;
-    }
-}
-
-
-// Retrieve one character from the input stream.
-inline uint32_t jpeg_decoder::get_char()
-{
-    // Any bytes remaining in buffer?
-    if (!m_in_buf_left) {
-        // Try to get more bytes.
-        prep_in_buffer();
-        // Still nothing to get?
-        if (!m_in_buf_left) {
-            // Pad the end of the stream with 0xFF 0xD9 (EOI marker)
-            int t = m_tem_flag;
-            m_tem_flag ^= 1;
-            if (t) return 0xD9;
-            else return 0xFF;
-        }
-    }
-    uint32_t c = *m_pIn_buf_ofs++;
-    m_in_buf_left--;
-    return c;
-}
-
-
-// Same as previous method, except can indicate if the character is a pad character or not.
-inline uint32_t jpeg_decoder::get_char(bool *pPadding_flag)
-{
-    if (!m_in_buf_left) {
-        prep_in_buffer();
-        if (!m_in_buf_left) {
-            *pPadding_flag = true;
-            int t = m_tem_flag;
-            m_tem_flag ^= 1;
-            if (t) return 0xD9;
-            else return 0xFF;
-        }
-    }
-    *pPadding_flag = false;
-    uint32_t c = *m_pIn_buf_ofs++;
-    m_in_buf_left--;
-
-    return c;
-}
-
-
-// Inserts a previously retrieved character back into the input buffer.
-inline void jpeg_decoder::stuff_char(uint8_t q)
-{
-    *(--m_pIn_buf_ofs) = q;
-    m_in_buf_left++;
-}
-
-
-// Retrieves one character from the input stream, but does not read past markers. Will continue to return 0xFF when a marker is encountered.
-inline uint8_t jpeg_decoder::get_octet()
-{
-    bool padding_flag;
-    int c = get_char(&padding_flag);
-
-    if (c == 0xFF) {
-        if (padding_flag) return 0xFF;
-
-        c = get_char(&padding_flag);
-        if (padding_flag) {
-            stuff_char(0xFF);
-            return 0xFF;
-        }
-        if (c == 0x00) return 0xFF;
-        else {
-            stuff_char(static_cast<uint8_t>(c));
-            stuff_char(0xFF);
-            return 0xFF;
-        }
-    }
-    return static_cast<uint8_t>(c);
-}
-
-
-// Retrieves a variable number of bits from the input stream. Does not recognize markers.
-inline uint32_t jpeg_decoder::get_bits(int num_bits)
-{
-    if (!num_bits) return 0;
-
-    uint32_t i = m_bit_buf >> (32 - num_bits);
-
-    if ((m_bits_left -= num_bits) <= 0) {
-        m_bit_buf <<= (num_bits += m_bits_left);
-        uint32_t c1 = get_char();
-        uint32_t c2 = get_char();
-        m_bit_buf = (m_bit_buf & 0xFFFF0000) | (c1 << 8) | c2;
-        m_bit_buf <<= -m_bits_left;
-        m_bits_left += 16;
-        JPGD_ASSERT(m_bits_left >= 0);
-    }
-    else m_bit_buf <<= num_bits;
-
-    return i;
-}
-
-
-// Retrieves a variable number of bits from the input stream. Markers will not be read into the input bit buffer. Instead, an infinite number of all 1's will be returned when a marker is encountered.
-inline uint32_t jpeg_decoder::get_bits_no_markers(int num_bits)
-{
-    if (!num_bits)return 0;
-
-    uint32_t i = m_bit_buf >> (32 - num_bits);
-
-    if ((m_bits_left -= num_bits) <= 0) {
-        m_bit_buf <<= (num_bits += m_bits_left);
-        if ((m_in_buf_left < 2) || (m_pIn_buf_ofs[0] == 0xFF) || (m_pIn_buf_ofs[1] == 0xFF)) {
-            uint32_t c1 = get_octet();
-            uint32_t c2 = get_octet();
-            m_bit_buf |= (c1 << 8) | c2;
-        } else {
-            m_bit_buf |= ((uint32_t)m_pIn_buf_ofs[0] << 8) | m_pIn_buf_ofs[1];
-            m_in_buf_left -= 2;
-            m_pIn_buf_ofs += 2;
-        }
-        m_bit_buf <<= -m_bits_left;
-        m_bits_left += 16;
-        JPGD_ASSERT(m_bits_left >= 0);
-    } else m_bit_buf <<= num_bits;
-
-    return i;
-}
-
-
-// Decodes a Huffman encoded symbol.
-inline int jpeg_decoder::huff_decode(huff_tables *pH)
-{
-    int symbol;
-
-    // Check first 8-bits: do we have a complete symbol?
-    if ((symbol = pH->look_up[m_bit_buf >> 24]) < 0) {
-        // Decode more bits, use a tree traversal to find symbol.
-        int ofs = 23;
-        do {
-            symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
-            ofs--;
-        } while (symbol < 0);
-        get_bits_no_markers(8 + (23 - ofs));
-    } else get_bits_no_markers(pH->code_size[symbol]);
-
-  return symbol;
-}
-
-
-// Decodes a Huffman encoded symbol.
-inline int jpeg_decoder::huff_decode(huff_tables *pH, int& extra_bits)
-{
-    int symbol;
-
-    // Check first 8-bits: do we have a complete symbol?
-    if ((symbol = pH->look_up2[m_bit_buf >> 24]) < 0) {
-        // Use a tree traversal to find symbol.
-        int ofs = 23;
-        do {
-            symbol = pH->tree[-(int)(symbol + ((m_bit_buf >> ofs) & 1))];
-            ofs--;
-        } while (symbol < 0);
-
-        get_bits_no_markers(8 + (23 - ofs));
-        extra_bits = get_bits_no_markers(symbol & 0xF);
-    } else {
-        JPGD_ASSERT(((symbol >> 8) & 31) == pH->code_size[symbol & 255] + ((symbol & 0x8000) ? (symbol & 15) : 0));
-
-        if (symbol & 0x8000) {
-            get_bits_no_markers((symbol >> 8) & 31);
-            extra_bits = symbol >> 16;
-        } else  {
-            int code_size = (symbol >> 8) & 31;
-            int num_extra_bits = symbol & 0xF;
-            int bits = code_size + num_extra_bits;
-            if (bits <= (m_bits_left + 16)) extra_bits = get_bits_no_markers(bits) & ((1 << num_extra_bits) - 1);
-            else {
-                get_bits_no_markers(code_size);
-                extra_bits = get_bits_no_markers(num_extra_bits);
-            }
-        }
-        symbol &= 0xFF;
-    }
-    return symbol;
-}
-
-
-// Tables and macro used to fully decode the DPCM differences.
-static const int s_extend_test[16] = { 0, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100, 0x0200, 0x0400, 0x0800, 0x1000, 0x2000, 0x4000 };
-static const unsigned int s_extend_offset[16] = { 0, ((~0u)<<1) + 1, ((~0u)<<2) + 1, ((~0u)<<3) + 1, ((~0u)<<4) + 1, ((~0u)<<5) + 1, ((~0u)<<6) + 1, ((~0u)<<7) + 1, ((~0u)<<8) + 1, ((~0u)<<9) + 1, ((~0u)<<10) + 1, ((~0u)<<11) + 1, ((~0u)<<12) + 1, ((~0u)<<13) + 1, ((~0u)<<14) + 1, ((~0u)<<15) + 1 };
-
-// The logical AND's in this macro are to shut up static code analysis (aren't really necessary - couldn't find another way to do this)
-#define JPGD_HUFF_EXTEND(x, s) (((x) < s_extend_test[s & 15]) ? ((x) + s_extend_offset[s & 15]) : (x))
-
-
-// Clamps a value between 0-255.
-inline uint8_t jpeg_decoder::clamp(int i)
-{
-    if (static_cast<uint32_t>(i) > 255) i = (((~i) >> 31) & 0xFF);
-    return static_cast<uint8_t>(i);
-}
-
-
-namespace DCT_Upsample
-{
-    struct Matrix44
-    {
-        typedef int Element_Type;
-        enum { NUM_ROWS = 4, NUM_COLS = 4 };
-
-        Element_Type v[NUM_ROWS][NUM_COLS];
-
-        inline int rows() const { return NUM_ROWS; }
-        inline int cols() const { return NUM_COLS; }
-        inline const Element_Type & at(int r, int c) const { return v[r][c]; }
-        inline       Element_Type & at(int r, int c)       { return v[r][c]; }
-
-        inline Matrix44() {}
-
-        inline Matrix44& operator += (const Matrix44& a)
-        {
-            for (int r = 0; r < NUM_ROWS; r++) {
-                at(r, 0) += a.at(r, 0);
-                at(r, 1) += a.at(r, 1);
-                at(r, 2) += a.at(r, 2);
-                at(r, 3) += a.at(r, 3);
-            }
-            return *this;
-        }
-
-        inline Matrix44& operator -= (const Matrix44& a)
-        {
-            for (int r = 0; r < NUM_ROWS; r++) {
-                at(r, 0) -= a.at(r, 0);
-                at(r, 1) -= a.at(r, 1);
-                at(r, 2) -= a.at(r, 2);
-                at(r, 3) -= a.at(r, 3);
-            }
-            return *this;
-        }
-
-        friend inline Matrix44 operator + (const Matrix44& a, const Matrix44& b)
-        {
-            Matrix44 ret;
-            for (int r = 0; r < NUM_ROWS; r++) {
-                ret.at(r, 0) = a.at(r, 0) + b.at(r, 0);
-                ret.at(r, 1) = a.at(r, 1) + b.at(r, 1);
-                ret.at(r, 2) = a.at(r, 2) + b.at(r, 2);
-                ret.at(r, 3) = a.at(r, 3) + b.at(r, 3);
-            }
-            return ret;
-        }
-
-        friend inline Matrix44 operator - (const Matrix44& a, const Matrix44& b)
-        {
-            Matrix44 ret;
-            for (int r = 0; r < NUM_ROWS; r++) {
-                ret.at(r, 0) = a.at(r, 0) - b.at(r, 0);
-                ret.at(r, 1) = a.at(r, 1) - b.at(r, 1);
-                ret.at(r, 2) = a.at(r, 2) - b.at(r, 2);
-                ret.at(r, 3) = a.at(r, 3) - b.at(r, 3);
-            }
-            return ret;
-        }
-
-        static inline void add_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
-        {
-            for (int r = 0; r < 4; r++) {
-                pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) + b.at(r, 0));
-                pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) + b.at(r, 1));
-                pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) + b.at(r, 2));
-                pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) + b.at(r, 3));
-            }
-        }
-
-        static inline void sub_and_store(jpgd_block_t* pDst, const Matrix44& a, const Matrix44& b)
-        {
-            for (int r = 0; r < 4; r++) {
-                pDst[0*8 + r] = static_cast<jpgd_block_t>(a.at(r, 0) - b.at(r, 0));
-                pDst[1*8 + r] = static_cast<jpgd_block_t>(a.at(r, 1) - b.at(r, 1));
-                pDst[2*8 + r] = static_cast<jpgd_block_t>(a.at(r, 2) - b.at(r, 2));
-                pDst[3*8 + r] = static_cast<jpgd_block_t>(a.at(r, 3) - b.at(r, 3));
-            }
-        }
-    };
-
-    const int FRACT_BITS = 10;
-    const int SCALE = 1 << FRACT_BITS;
-
-    typedef int Temp_Type;
-    #define D(i) (((i) + (SCALE >> 1)) >> FRACT_BITS)
-    #define F(i) ((int)((i) * SCALE + .5f))
-
-    // Any decent C++ compiler will optimize this at compile time to a 0, or an array access.
-    #define AT(c, r) ((((c)>=NUM_COLS)||((r)>=NUM_ROWS)) ? 0 : pSrc[(c)+(r)*8])
-
-    // NUM_ROWS/NUM_COLS = # of non-zero rows/cols in input matrix
-    template<int NUM_ROWS, int NUM_COLS>
-    struct P_Q
-    {
-        static void calc(Matrix44& P, Matrix44& Q, const jpgd_block_t* pSrc)
-        {
-            // 4x8 = 4x8 times 8x8, matrix 0 is constant
-            const Temp_Type X000 = AT(0, 0);
-            const Temp_Type X001 = AT(0, 1);
-            const Temp_Type X002 = AT(0, 2);
-            const Temp_Type X003 = AT(0, 3);
-            const Temp_Type X004 = AT(0, 4);
-            const Temp_Type X005 = AT(0, 5);
-            const Temp_Type X006 = AT(0, 6);
-            const Temp_Type X007 = AT(0, 7);
-            const Temp_Type X010 = D(F(0.415735f) * AT(1, 0) + F(0.791065f) * AT(3, 0) + F(-0.352443f) * AT(5, 0) + F(0.277785f) * AT(7, 0));
-            const Temp_Type X011 = D(F(0.415735f) * AT(1, 1) + F(0.791065f) * AT(3, 1) + F(-0.352443f) * AT(5, 1) + F(0.277785f) * AT(7, 1));
-            const Temp_Type X012 = D(F(0.415735f) * AT(1, 2) + F(0.791065f) * AT(3, 2) + F(-0.352443f) * AT(5, 2) + F(0.277785f) * AT(7, 2));
-            const Temp_Type X013 = D(F(0.415735f) * AT(1, 3) + F(0.791065f) * AT(3, 3) + F(-0.352443f) * AT(5, 3) + F(0.277785f) * AT(7, 3));
-            const Temp_Type X014 = D(F(0.415735f) * AT(1, 4) + F(0.791065f) * AT(3, 4) + F(-0.352443f) * AT(5, 4) + F(0.277785f) * AT(7, 4));
-            const Temp_Type X015 = D(F(0.415735f) * AT(1, 5) + F(0.791065f) * AT(3, 5) + F(-0.352443f) * AT(5, 5) + F(0.277785f) * AT(7, 5));
-            const Temp_Type X016 = D(F(0.415735f) * AT(1, 6) + F(0.791065f) * AT(3, 6) + F(-0.352443f) * AT(5, 6) + F(0.277785f) * AT(7, 6));
-            const Temp_Type X017 = D(F(0.415735f) * AT(1, 7) + F(0.791065f) * AT(3, 7) + F(-0.352443f) * AT(5, 7) + F(0.277785f) * AT(7, 7));
-            const Temp_Type X020 = AT(4, 0);
-            const Temp_Type X021 = AT(4, 1);
-            const Temp_Type X022 = AT(4, 2);
-            const Temp_Type X023 = AT(4, 3);
-            const Temp_Type X024 = AT(4, 4);
-            const Temp_Type X025 = AT(4, 5);
-            const Temp_Type X026 = AT(4, 6);
-            const Temp_Type X027 = AT(4, 7);
-            const Temp_Type X030 = D(F(0.022887f) * AT(1, 0) + F(-0.097545f) * AT(3, 0) + F(0.490393f) * AT(5, 0) + F(0.865723f) * AT(7, 0));
-            const Temp_Type X031 = D(F(0.022887f) * AT(1, 1) + F(-0.097545f) * AT(3, 1) + F(0.490393f) * AT(5, 1) + F(0.865723f) * AT(7, 1));
-            const Temp_Type X032 = D(F(0.022887f) * AT(1, 2) + F(-0.097545f) * AT(3, 2) + F(0.490393f) * AT(5, 2) + F(0.865723f) * AT(7, 2));
-            const Temp_Type X033 = D(F(0.022887f) * AT(1, 3) + F(-0.097545f) * AT(3, 3) + F(0.490393f) * AT(5, 3) + F(0.865723f) * AT(7, 3));
-            const Temp_Type X034 = D(F(0.022887f) * AT(1, 4) + F(-0.097545f) * AT(3, 4) + F(0.490393f) * AT(5, 4) + F(0.865723f) * AT(7, 4));
-            const Temp_Type X035 = D(F(0.022887f) * AT(1, 5) + F(-0.097545f) * AT(3, 5) + F(0.490393f) * AT(5, 5) + F(0.865723f) * AT(7, 5));
-            const Temp_Type X036 = D(F(0.022887f) * AT(1, 6) + F(-0.097545f) * AT(3, 6) + F(0.490393f) * AT(5, 6) + F(0.865723f) * AT(7, 6));
-            const Temp_Type X037 = D(F(0.022887f) * AT(1, 7) + F(-0.097545f) * AT(3, 7) + F(0.490393f) * AT(5, 7) + F(0.865723f) * AT(7, 7));
-
-            // 4x4 = 4x8 times 8x4, matrix 1 is constant
-            P.at(0, 0) = X000;
-            P.at(0, 1) = D(X001 * F(0.415735f) + X003 * F(0.791065f) + X005 * F(-0.352443f) + X007 * F(0.277785f));
-            P.at(0, 2) = X004;
-            P.at(0, 3) = D(X001 * F(0.022887f) + X003 * F(-0.097545f) + X005 * F(0.490393f) + X007 * F(0.865723f));
-            P.at(1, 0) = X010;
-            P.at(1, 1) = D(X011 * F(0.415735f) + X013 * F(0.791065f) + X015 * F(-0.352443f) + X017 * F(0.277785f));
-            P.at(1, 2) = X014;
-            P.at(1, 3) = D(X011 * F(0.022887f) + X013 * F(-0.097545f) + X015 * F(0.490393f) + X017 * F(0.865723f));
-            P.at(2, 0) = X020;
-            P.at(2, 1) = D(X021 * F(0.415735f) + X023 * F(0.791065f) + X025 * F(-0.352443f) + X027 * F(0.277785f));
-            P.at(2, 2) = X024;
-            P.at(2, 3) = D(X021 * F(0.022887f) + X023 * F(-0.097545f) + X025 * F(0.490393f) + X027 * F(0.865723f));
-            P.at(3, 0) = X030;
-            P.at(3, 1) = D(X031 * F(0.415735f) + X033 * F(0.791065f) + X035 * F(-0.352443f) + X037 * F(0.277785f));
-            P.at(3, 2) = X034;
-            P.at(3, 3) = D(X031 * F(0.022887f) + X033 * F(-0.097545f) + X035 * F(0.490393f) + X037 * F(0.865723f));
-            // 40 muls 24 adds
-
-            // 4x4 = 4x8 times 8x4, matrix 1 is constant
-            Q.at(0, 0) = D(X001 * F(0.906127f) + X003 * F(-0.318190f) + X005 * F(0.212608f) + X007 * F(-0.180240f));
-            Q.at(0, 1) = X002;
-            Q.at(0, 2) = D(X001 * F(-0.074658f) + X003 * F(0.513280f) + X005 * F(0.768178f) + X007 * F(-0.375330f));
-            Q.at(0, 3) = X006;
-            Q.at(1, 0) = D(X011 * F(0.906127f) + X013 * F(-0.318190f) + X015 * F(0.212608f) + X017 * F(-0.180240f));
-            Q.at(1, 1) = X012;
-            Q.at(1, 2) = D(X011 * F(-0.074658f) + X013 * F(0.513280f) + X015 * F(0.768178f) + X017 * F(-0.375330f));
-            Q.at(1, 3) = X016;
-            Q.at(2, 0) = D(X021 * F(0.906127f) + X023 * F(-0.318190f) + X025 * F(0.212608f) + X027 * F(-0.180240f));
-            Q.at(2, 1) = X022;
-            Q.at(2, 2) = D(X021 * F(-0.074658f) + X023 * F(0.513280f) + X025 * F(0.768178f) + X027 * F(-0.375330f));
-            Q.at(2, 3) = X026;
-            Q.at(3, 0) = D(X031 * F(0.906127f) + X033 * F(-0.318190f) + X035 * F(0.212608f) + X037 * F(-0.180240f));
-            Q.at(3, 1) = X032;
-            Q.at(3, 2) = D(X031 * F(-0.074658f) + X033 * F(0.513280f) + X035 * F(0.768178f) + X037 * F(-0.375330f));
-            Q.at(3, 3) = X036;
-            // 40 muls 24 adds
-        }
-    };
-
-
-    template<int NUM_ROWS, int NUM_COLS>
-    struct R_S
-    {
-        static void calc(Matrix44& R, Matrix44& S, const jpgd_block_t* pSrc)
-        {
-            // 4x8 = 4x8 times 8x8, matrix 0 is constant
-            const Temp_Type X100 = D(F(0.906127f) * AT(1, 0) + F(-0.318190f) * AT(3, 0) + F(0.212608f) * AT(5, 0) + F(-0.180240f) * AT(7, 0));
-            const Temp_Type X101 = D(F(0.906127f) * AT(1, 1) + F(-0.318190f) * AT(3, 1) + F(0.212608f) * AT(5, 1) + F(-0.180240f) * AT(7, 1));
-            const Temp_Type X102 = D(F(0.906127f) * AT(1, 2) + F(-0.318190f) * AT(3, 2) + F(0.212608f) * AT(5, 2) + F(-0.180240f) * AT(7, 2));
-            const Temp_Type X103 = D(F(0.906127f) * AT(1, 3) + F(-0.318190f) * AT(3, 3) + F(0.212608f) * AT(5, 3) + F(-0.180240f) * AT(7, 3));
-            const Temp_Type X104 = D(F(0.906127f) * AT(1, 4) + F(-0.318190f) * AT(3, 4) + F(0.212608f) * AT(5, 4) + F(-0.180240f) * AT(7, 4));
-            const Temp_Type X105 = D(F(0.906127f) * AT(1, 5) + F(-0.318190f) * AT(3, 5) + F(0.212608f) * AT(5, 5) + F(-0.180240f) * AT(7, 5));
-            const Temp_Type X106 = D(F(0.906127f) * AT(1, 6) + F(-0.318190f) * AT(3, 6) + F(0.212608f) * AT(5, 6) + F(-0.180240f) * AT(7, 6));
-            const Temp_Type X107 = D(F(0.906127f) * AT(1, 7) + F(-0.318190f) * AT(3, 7) + F(0.212608f) * AT(5, 7) + F(-0.180240f) * AT(7, 7));
-            const Temp_Type X110 = AT(2, 0);
-            const Temp_Type X111 = AT(2, 1);
-            const Temp_Type X112 = AT(2, 2);
-            const Temp_Type X113 = AT(2, 3);
-            const Temp_Type X114 = AT(2, 4);
-            const Temp_Type X115 = AT(2, 5);
-            const Temp_Type X116 = AT(2, 6);
-            const Temp_Type X117 = AT(2, 7);
-            const Temp_Type X120 = D(F(-0.074658f) * AT(1, 0) + F(0.513280f) * AT(3, 0) + F(0.768178f) * AT(5, 0) + F(-0.375330f) * AT(7, 0));
-            const Temp_Type X121 = D(F(-0.074658f) * AT(1, 1) + F(0.513280f) * AT(3, 1) + F(0.768178f) * AT(5, 1) + F(-0.375330f) * AT(7, 1));
-            const Temp_Type X122 = D(F(-0.074658f) * AT(1, 2) + F(0.513280f) * AT(3, 2) + F(0.768178f) * AT(5, 2) + F(-0.375330f) * AT(7, 2));
-            const Temp_Type X123 = D(F(-0.074658f) * AT(1, 3) + F(0.513280f) * AT(3, 3) + F(0.768178f) * AT(5, 3) + F(-0.375330f) * AT(7, 3));
-            const Temp_Type X124 = D(F(-0.074658f) * AT(1, 4) + F(0.513280f) * AT(3, 4) + F(0.768178f) * AT(5, 4) + F(-0.375330f) * AT(7, 4));
-            const Temp_Type X125 = D(F(-0.074658f) * AT(1, 5) + F(0.513280f) * AT(3, 5) + F(0.768178f) * AT(5, 5) + F(-0.375330f) * AT(7, 5));
-            const Temp_Type X126 = D(F(-0.074658f) * AT(1, 6) + F(0.513280f) * AT(3, 6) + F(0.768178f) * AT(5, 6) + F(-0.375330f) * AT(7, 6));
-            const Temp_Type X127 = D(F(-0.074658f) * AT(1, 7) + F(0.513280f) * AT(3, 7) + F(0.768178f) * AT(5, 7) + F(-0.375330f) * AT(7, 7));
-            const Temp_Type X130 = AT(6, 0);
-            const Temp_Type X131 = AT(6, 1);
-            const Temp_Type X132 = AT(6, 2);
-            const Temp_Type X133 = AT(6, 3);
-            const Temp_Type X134 = AT(6, 4);
-            const Temp_Type X135 = AT(6, 5);
-            const Temp_Type X136 = AT(6, 6);
-            const Temp_Type X137 = AT(6, 7);
-            // 80 muls 48 adds
-
-            // 4x4 = 4x8 times 8x4, matrix 1 is constant
-            R.at(0, 0) = X100;
-            R.at(0, 1) = D(X101 * F(0.415735f) + X103 * F(0.791065f) + X105 * F(-0.352443f) + X107 * F(0.277785f));
-            R.at(0, 2) = X104;
-            R.at(0, 3) = D(X101 * F(0.022887f) + X103 * F(-0.097545f) + X105 * F(0.490393f) + X107 * F(0.865723f));
-            R.at(1, 0) = X110;
-            R.at(1, 1) = D(X111 * F(0.415735f) + X113 * F(0.791065f) + X115 * F(-0.352443f) + X117 * F(0.277785f));
-            R.at(1, 2) = X114;
-            R.at(1, 3) = D(X111 * F(0.022887f) + X113 * F(-0.097545f) + X115 * F(0.490393f) + X117 * F(0.865723f));
-            R.at(2, 0) = X120;
-            R.at(2, 1) = D(X121 * F(0.415735f) + X123 * F(0.791065f) + X125 * F(-0.352443f) + X127 * F(0.277785f));
-            R.at(2, 2) = X124;
-            R.at(2, 3) = D(X121 * F(0.022887f) + X123 * F(-0.097545f) + X125 * F(0.490393f) + X127 * F(0.865723f));
-            R.at(3, 0) = X130;
-            R.at(3, 1) = D(X131 * F(0.415735f) + X133 * F(0.791065f) + X135 * F(-0.352443f) + X137 * F(0.277785f));
-            R.at(3, 2) = X134;
-            R.at(3, 3) = D(X131 * F(0.022887f) + X133 * F(-0.097545f) + X135 * F(0.490393f) + X137 * F(0.865723f));
-            // 40 muls 24 adds
-            // 4x4 = 4x8 times 8x4, matrix 1 is constant
-            S.at(0, 0) = D(X101 * F(0.906127f) + X103 * F(-0.318190f) + X105 * F(0.212608f) + X107 * F(-0.180240f));
-            S.at(0, 1) = X102;
-            S.at(0, 2) = D(X101 * F(-0.074658f) + X103 * F(0.513280f) + X105 * F(0.768178f) + X107 * F(-0.375330f));
-            S.at(0, 3) = X106;
-            S.at(1, 0) = D(X111 * F(0.906127f) + X113 * F(-0.318190f) + X115 * F(0.212608f) + X117 * F(-0.180240f));
-            S.at(1, 1) = X112;
-            S.at(1, 2) = D(X111 * F(-0.074658f) + X113 * F(0.513280f) + X115 * F(0.768178f) + X117 * F(-0.375330f));
-            S.at(1, 3) = X116;
-            S.at(2, 0) = D(X121 * F(0.906127f) + X123 * F(-0.318190f) + X125 * F(0.212608f) + X127 * F(-0.180240f));
-            S.at(2, 1) = X122;
-            S.at(2, 2) = D(X121 * F(-0.074658f) + X123 * F(0.513280f) + X125 * F(0.768178f) + X127 * F(-0.375330f));
-            S.at(2, 3) = X126;
-            S.at(3, 0) = D(X131 * F(0.906127f) + X133 * F(-0.318190f) + X135 * F(0.212608f) + X137 * F(-0.180240f));
-            S.at(3, 1) = X132;
-            S.at(3, 2) = D(X131 * F(-0.074658f) + X133 * F(0.513280f) + X135 * F(0.768178f) + X137 * F(-0.375330f));
-            S.at(3, 3) = X136;
-            // 40 muls 24 adds
-        }
-    };
-} // end namespace DCT_Upsample
-
-
-// Unconditionally frees all allocated m_blocks.
-void jpeg_decoder::free_all_blocks()
-{
-    delete(m_pStream);
-    m_pStream = nullptr;
-
-    for (mem_block *b = m_pMem_blocks; b; ) {
-        mem_block *n = b->m_pNext;
-        free(b);
-        b = n;
-    }
-    m_pMem_blocks = nullptr;
-}
-
-
-// This method handles all errors. It will never return.
-// It could easily be changed to use C++ exceptions.
-JPGD_NORETURN void jpeg_decoder::stop_decoding(jpgd_status status)
-{
-    m_error_code = status;
-    free_all_blocks();
-    longjmp(m_jmp_state, status);
-}
-
-
-void *jpeg_decoder::alloc(size_t nSize, bool zero)
-{
-    nSize = (JPGD_MAX(nSize, 1) + 3) & ~3;
-    char *rv = nullptr;
-    for (mem_block *b = m_pMem_blocks; b; b = b->m_pNext) {
-        if ((b->m_used_count + nSize) <= b->m_size) {
-            rv = b->m_data + b->m_used_count;
-            b->m_used_count += nSize;
-            break;
-        }
-    }
-    if (!rv) {
-        int capacity = JPGD_MAX(32768 - 256, (nSize + 2047) & ~2047);
-        mem_block *b = (mem_block*)malloc(sizeof(mem_block) + capacity);
-        if (!b) stop_decoding(JPGD_NOTENOUGHMEM);
-        b->m_pNext = m_pMem_blocks; m_pMem_blocks = b;
-        b->m_used_count = nSize;
-        b->m_size = capacity;
-        rv = b->m_data;
-    }
-    if (zero) memset(rv, 0, nSize);
-    return rv;
-}
-
-
-void jpeg_decoder::word_clear(void *p, uint16_t c, uint32_t n)
-{
-    uint8_t *pD = (uint8_t*)p;
-    const uint8_t l = c & 0xFF, h = (c >> 8) & 0xFF;
-    while (n) {
-        pD[0] = l; pD[1] = h; pD += 2;
-        n--;
-    }
-}
-
-
-// Refill the input buffer.
-// This method will sit in a loop until (A) the buffer is full or (B)
-// the stream's read() method reports and end of file condition.
-void jpeg_decoder::prep_in_buffer()
-{
-    m_in_buf_left = 0;
-    m_pIn_buf_ofs = m_in_buf;
-
-    if (m_eof_flag) return;
-
-    do {
-        int bytes_read = m_pStream->read(m_in_buf + m_in_buf_left, JPGD_IN_BUF_SIZE - m_in_buf_left, &m_eof_flag);
-        if (bytes_read == -1) stop_decoding(JPGD_STREAM_READ);
-        m_in_buf_left += bytes_read;
-    } while ((m_in_buf_left < JPGD_IN_BUF_SIZE) && (!m_eof_flag));
-
-    m_total_bytes_read += m_in_buf_left;
-
-    // Pad the end of the block with M_EOI (prevents the decompressor from going off the rails if the stream is invalid).
-    // (This dates way back to when this decompressor was written in C/asm, and the all-asm Huffman decoder did some fancy things to increase perf.)
-    word_clear(m_pIn_buf_ofs + m_in_buf_left, 0xD9FF, 64);
-}
-
-
-// Read a Huffman code table.
-void jpeg_decoder::read_dht_marker()
-{
-    int i, index, count;
-    uint8_t huff_num[17];
-    uint8_t huff_val[256];
-    uint32_t num_left = get_bits(16);
-
-    if (num_left < 2) stop_decoding(JPGD_BAD_DHT_MARKER);
-    num_left -= 2;
-
-    while (num_left) {
-        index = get_bits(8);
-        huff_num[0] = 0;
-        count = 0;
-
-        for (i = 1; i <= 16; i++) {
-            huff_num[i] = static_cast<uint8_t>(get_bits(8));
-            count += huff_num[i];
-        }
-
-        if (count > 255) stop_decoding(JPGD_BAD_DHT_COUNTS);
-
-        for (i = 0; i < count; i++)
-            huff_val[i] = static_cast<uint8_t>(get_bits(8));
-
-        i = 1 + 16 + count;
-
-        if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DHT_MARKER);
-        num_left -= i;
-
-        if ((index & 0x10) > 0x10) stop_decoding(JPGD_BAD_DHT_INDEX);
-        index = (index & 0x0F) + ((index & 0x10) >> 4) * (JPGD_MAX_HUFF_TABLES >> 1);
-        if (index >= JPGD_MAX_HUFF_TABLES) stop_decoding(JPGD_BAD_DHT_INDEX);
-
-        if (!m_huff_num[index]) m_huff_num[index] = (uint8_t *)alloc(17);
-        if (!m_huff_val[index]) m_huff_val[index] = (uint8_t *)alloc(256);
-
-        m_huff_ac[index] = (index & 0x10) != 0;
-        memcpy(m_huff_num[index], huff_num, 17);
-        memcpy(m_huff_val[index], huff_val, 256);
-    }
-}
-
-
-// Read a quantization table.
-void jpeg_decoder::read_dqt_marker()
-{
-    int n, i, prec;
-    uint32_t temp;
-    uint32_t num_left = get_bits(16);
-    if (num_left < 2) stop_decoding(JPGD_BAD_DQT_MARKER);
-    num_left -= 2;
-
-    while (num_left) {
-        n = get_bits(8);
-        prec = n >> 4;
-        n &= 0x0F;
-
-        if (n >= JPGD_MAX_QUANT_TABLES) stop_decoding(JPGD_BAD_DQT_TABLE);
-
-        if (!m_quant[n]) m_quant[n] = (jpgd_quant_t *)alloc(64 * sizeof(jpgd_quant_t));
-
-        // read quantization entries, in zag order
-        for (i = 0; i < 64; i++) {
-            temp = get_bits(8);
-            if (prec) temp = (temp << 8) + get_bits(8);
-            m_quant[n][i] = static_cast<jpgd_quant_t>(temp);
-        }
-        i = 64 + 1;
-        if (prec) i += 64;
-        if (num_left < (uint32_t)i) stop_decoding(JPGD_BAD_DQT_LENGTH);
-        num_left -= i;
-    }
-}
-
-
-// Read the start of frame (SOF) marker.
-void jpeg_decoder::read_sof_marker()
-{
-    int i;
-    uint32_t num_left = get_bits(16);
-
-    if (get_bits(8) != 8) stop_decoding(JPGD_BAD_PRECISION);   /* precision: sorry, only 8-bit precision is supported right now */
-
-    m_image_y_size = get_bits(16);
-    if ((m_image_y_size < 1) || (m_image_y_size > JPGD_MAX_HEIGHT)) stop_decoding(JPGD_BAD_HEIGHT);
-
-    m_image_x_size = get_bits(16);
-    if ((m_image_x_size < 1) || (m_image_x_size > JPGD_MAX_WIDTH)) stop_decoding(JPGD_BAD_WIDTH);
-
-    m_comps_in_frame = get_bits(8);
-    if (m_comps_in_frame > JPGD_MAX_COMPONENTS) stop_decoding(JPGD_TOO_MANY_COMPONENTS);
-
-    if (num_left != (uint32_t)(m_comps_in_frame * 3 + 8)) stop_decoding(JPGD_BAD_SOF_LENGTH);
-
-    for (i = 0; i < m_comps_in_frame; i++) {
-        m_comp_ident[i]  = get_bits(8);
-        m_comp_h_samp[i] = get_bits(4);
-        m_comp_v_samp[i] = get_bits(4);
-        m_comp_quant[i]  = get_bits(8);
-    }
-}
-
-
-// Used to skip unrecognized markers.
-void jpeg_decoder::skip_variable_marker()
-{
-    uint32_t num_left = get_bits(16);
-    if (num_left < 2) stop_decoding(JPGD_BAD_VARIABLE_MARKER);
-    num_left -= 2;
-
-    while (num_left) {
-        get_bits(8);
-        num_left--;
-    }
-}
-
-
-// Read a define restart interval (DRI) marker.
-void jpeg_decoder::read_dri_marker()
-{
-    if (get_bits(16) != 4) stop_decoding(JPGD_BAD_DRI_LENGTH);
-    m_restart_interval = get_bits(16);
-}
-
-
-// Read a start of scan (SOS) marker.
-void jpeg_decoder::read_sos_marker()
-{
-    int i, ci, c, cc;
-    uint32_t num_left = get_bits(16);
-    int n = get_bits(8);
-
-    m_comps_in_scan = n;
-    num_left -= 3;
-
-    if ( (num_left != (uint32_t)(n * 2 + 3)) || (n < 1) || (n > JPGD_MAX_COMPS_IN_SCAN) ) stop_decoding(JPGD_BAD_SOS_LENGTH);
-
-    for (i = 0; i < n; i++) {
-        cc = get_bits(8);
-        c = get_bits(8);
-        num_left -= 2;
-
-        for (ci = 0; ci < m_comps_in_frame; ci++)
-          if (cc == m_comp_ident[ci]) break;
-
-        if (ci >= m_comps_in_frame) stop_decoding(JPGD_BAD_SOS_COMP_ID);
-
-        m_comp_list[i]    = ci;
-        m_comp_dc_tab[ci] = (c >> 4) & 15;
-        m_comp_ac_tab[ci] = (c & 15) + (JPGD_MAX_HUFF_TABLES >> 1);
-    }
-    m_spectral_start  = get_bits(8);
-    m_spectral_end    = get_bits(8);
-    m_successive_high = get_bits(4);
-    m_successive_low  = get_bits(4);
-
-    if (!m_progressive_flag) {
-        m_spectral_start = 0;
-        m_spectral_end = 63;
-    }
-    num_left -= 3;
-
-    while (num_left) {    /* read past whatever is num_left */
-        get_bits(8);
-        num_left--;
-    }
-}
-
-
-// Finds the next marker.
-int jpeg_decoder::next_marker()
-{
-    uint32_t c;
-
-    do {
-        do {
-            c = get_bits(8);
-        } while (c != 0xFF);
-
-        do {
-            c = get_bits(8);
-        } while (c == 0xFF);
-    } while (c == 0);
-
-    return c;
-}
-
-
-// Process markers. Returns when an SOFx, SOI, EOI, or SOS marker is
-// encountered.
-int jpeg_decoder::process_markers()
-{
-    int c;
-
-    for ( ; ; ) {
-        c = next_marker();
-        switch (c) {
-            case M_SOF0:
-            case M_SOF1:
-            case M_SOF2:
-            case M_SOF3:
-            case M_SOF5:
-            case M_SOF6:
-            case M_SOF7:
-      //      case M_JPG:
-            case M_SOF9:
-            case M_SOF10:
-            case M_SOF11:
-            case M_SOF13:
-            case M_SOF14:
-            case M_SOF15:
-            case M_SOI:
-            case M_EOI:
-            case M_SOS: return c;
-            case M_DHT: {
-                read_dht_marker();
-                break;
-            }
-            // No arithmetic support - dumb patents!
-            case M_DAC: {
-                stop_decoding(JPGD_NO_ARITHMETIC_SUPPORT);
-                break;
-            }
-            case M_DQT: {
-                read_dqt_marker();
-                break;
-            }
-            case M_DRI: {
-                read_dri_marker();
-                break;
-            }
-            //case M_APP0:  /* no need to read the JFIF marker */
-            case M_JPG:
-            case M_RST0:    /* no parameters */
-            case M_RST1:
-            case M_RST2:
-            case M_RST3:
-            case M_RST4:
-            case M_RST5:
-            case M_RST6:
-            case M_RST7:
-            case M_TEM: {
-                stop_decoding(JPGD_UNEXPECTED_MARKER);
-                break;
-            }
-            default: {   /* must be DNL, DHP, EXP, APPn, JPGn, COM, or RESn or APP0 */
-                skip_variable_marker();
-                break;
-            }
-        }
-    }
-}
-
-
-// Finds the start of image (SOI) marker.
-// This code is rather defensive: it only checks the first 512 bytes to avoid
-// false positives.
-void jpeg_decoder::locate_soi_marker()
-{
-    uint32_t lastchar = get_bits(8);
-    uint32_t thischar = get_bits(8);
-
-    /* ok if it's a normal JPEG file without a special header */
-    if ((lastchar == 0xFF) && (thischar == M_SOI)) return;
-
-    uint32_t bytesleft = 4096; //512;
-
-    while (true) {
-        if (--bytesleft == 0) stop_decoding(JPGD_NOT_JPEG);
-
-        lastchar = thischar;
-        thischar = get_bits(8);
-
-        if (lastchar == 0xFF) {
-          if (thischar == M_SOI) break;
-          else if (thischar == M_EOI) stop_decoding(JPGD_NOT_JPEG); // get_bits will keep returning M_EOI if we read past the end
-        }
-    }
-
-    // Check the next character after marker: if it's not 0xFF, it can't be the start of the next marker, so the file is bad.
-    thischar = (m_bit_buf >> 24) & 0xFF;
-    if (thischar != 0xFF) stop_decoding(JPGD_NOT_JPEG);
-}
-
-
-// Find a start of frame (SOF) marker.
-void jpeg_decoder::locate_sof_marker()
-{
-    locate_soi_marker();
-    int c = process_markers();
-
-    switch (c) {
-        case M_SOF2: {
-            m_progressive_flag = true;
-            read_sof_marker();
-            break;
-        }
-        case M_SOF0:  /* baseline DCT */
-        case M_SOF1: { /* extended sequential DCT */
-          read_sof_marker();
-          break;
-        }
-        case M_SOF9: {  /* Arithmetic coding */
-          stop_decoding(JPGD_NO_ARITHMETIC_SUPPORT);
-          break;
-        }
-        default: {
-          stop_decoding(JPGD_UNSUPPORTED_MARKER);
-          break;
-        }
-    }
-}
-
-
-// Find a start of scan (SOS) marker.
-int jpeg_decoder::locate_sos_marker()
-{
-    int c = process_markers();
-    if (c == M_EOI) return false;
-    else if (c != M_SOS) stop_decoding(JPGD_UNEXPECTED_MARKER);
-    read_sos_marker();
-    return true;
-}
-
-
-// Reset everything to default/uninitialized state.
-void jpeg_decoder::init(jpeg_decoder_stream *pStream)
-{
-    m_pMem_blocks = nullptr;
-    m_error_code = JPGD_SUCCESS;
-    m_ready_flag = false;
-    m_image_x_size = m_image_y_size = 0;
-    m_pStream = pStream;
-    m_progressive_flag = false;
-
-    memset(m_huff_ac, 0, sizeof(m_huff_ac));
-    memset(m_huff_num, 0, sizeof(m_huff_num));
-    memset(m_huff_val, 0, sizeof(m_huff_val));
-    memset(m_quant, 0, sizeof(m_quant));
-
-    m_scan_type = 0;
-    m_comps_in_frame = 0;
-
-    memset(m_comp_h_samp, 0, sizeof(m_comp_h_samp));
-    memset(m_comp_v_samp, 0, sizeof(m_comp_v_samp));
-    memset(m_comp_quant, 0, sizeof(m_comp_quant));
-    memset(m_comp_ident, 0, sizeof(m_comp_ident));
-    memset(m_comp_h_blocks, 0, sizeof(m_comp_h_blocks));
-    memset(m_comp_v_blocks, 0, sizeof(m_comp_v_blocks));
-
-    m_comps_in_scan = 0;
-    memset(m_comp_list, 0, sizeof(m_comp_list));
-    memset(m_comp_dc_tab, 0, sizeof(m_comp_dc_tab));
-    memset(m_comp_ac_tab, 0, sizeof(m_comp_ac_tab));
-
-    m_spectral_start = 0;
-    m_spectral_end = 0;
-    m_successive_low = 0;
-    m_successive_high = 0;
-    m_max_mcu_x_size = 0;
-    m_max_mcu_y_size = 0;
-    m_blocks_per_mcu = 0;
-    m_max_blocks_per_row = 0;
-    m_mcus_per_row = 0;
-    m_mcus_per_col = 0;
-    m_expanded_blocks_per_component = 0;
-    m_expanded_blocks_per_mcu = 0;
-    m_expanded_blocks_per_row = 0;
-    m_freq_domain_chroma_upsample = false;
-
-    memset(m_mcu_org, 0, sizeof(m_mcu_org));
-
-    m_total_lines_left = 0;
-    m_mcu_lines_left = 0;
-    m_real_dest_bytes_per_scan_line = 0;
-    m_dest_bytes_per_scan_line = 0;
-    m_dest_bytes_per_pixel = 0;
-
-    memset(m_pHuff_tabs, 0, sizeof(m_pHuff_tabs));
-
-    memset(m_dc_coeffs, 0, sizeof(m_dc_coeffs));
-    memset(m_ac_coeffs, 0, sizeof(m_ac_coeffs));
-    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
-    m_eob_run = 0;
-
-    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
-    m_pIn_buf_ofs = m_in_buf;
-    m_in_buf_left = 0;
-    m_eof_flag = false;
-    m_tem_flag = 0;
-
-    memset(m_in_buf_pad_start, 0, sizeof(m_in_buf_pad_start));
-    memset(m_in_buf, 0, sizeof(m_in_buf));
-    memset(m_in_buf_pad_end, 0, sizeof(m_in_buf_pad_end));
-
-    m_restart_interval = 0;
-    m_restarts_left    = 0;
-    m_next_restart_num = 0;
-
-    m_max_mcus_per_row = 0;
-    m_max_blocks_per_mcu = 0;
-    m_max_mcus_per_col = 0;
-
-    memset(m_last_dc_val, 0, sizeof(m_last_dc_val));
-    m_pMCU_coefficients = nullptr;
-    m_pSample_buf = nullptr;
-
-    m_total_bytes_read = 0;
-
-    m_pScan_line_0 = nullptr;
-    m_pScan_line_1 = nullptr;
-
-    // Ready the input buffer.
-    prep_in_buffer();
-
-    // Prime the bit buffer.
-    m_bits_left = 16;
-    m_bit_buf = 0;
-
-    get_bits(16);
-    get_bits(16);
-
-    for (int i = 0; i < JPGD_MAX_BLOCKS_PER_MCU; i++) {
-        m_mcu_block_max_zag[i] = 64;
-    }
-}
-
-#define SCALEBITS 16
-#define ONE_HALF  ((int) 1 << (SCALEBITS-1))
-#define FIX(x)    ((int) ((x) * (1L<<SCALEBITS) + 0.5f))
-
-
-// Create a few tables that allow us to quickly convert YCbCr to RGB.
-void jpeg_decoder::create_look_ups()
-{
-  for (int i = 0; i <= 255; i++) {
-      int k = i - 128;
-      m_crr[i] = ( FIX(1.40200f)  * k + ONE_HALF) >> SCALEBITS;
-      m_cbb[i] = ( FIX(1.77200f)  * k + ONE_HALF) >> SCALEBITS;
-      m_crg[i] = (-FIX(0.71414f)) * k;
-      m_cbg[i] = (-FIX(0.34414f)) * k + ONE_HALF;
-  }
-}
-
-
-// This method throws back into the stream any bytes that where read
-// into the bit buffer during initial marker scanning.
-void jpeg_decoder::fix_in_buffer()
-{
-    // In case any 0xFF's where pulled into the buffer during marker scanning.
-    JPGD_ASSERT((m_bits_left & 7) == 0);
-
-    if (m_bits_left == 16) stuff_char( (uint8_t)(m_bit_buf & 0xFF));
-    if (m_bits_left >= 8) stuff_char( (uint8_t)((m_bit_buf >> 8) & 0xFF));
-
-    stuff_char((uint8_t)((m_bit_buf >> 16) & 0xFF));
-    stuff_char((uint8_t)((m_bit_buf >> 24) & 0xFF));
-
-    m_bits_left = 16;
-    get_bits_no_markers(16);
-    get_bits_no_markers(16);
-}
-
-
-void jpeg_decoder::transform_mcu(int mcu_row)
-{
-    jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
-    uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_blocks_per_mcu * 64;
-
-    for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
-        idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
-        pSrc_ptr += 64;
-        pDst_ptr += 64;
-    }
-}
-
-
-static const uint8_t s_max_rc[64] =
-{
-    17, 18, 34, 50, 50, 51, 52, 52, 52, 68, 84, 84, 84, 84, 85, 86, 86, 86, 86, 86,
-    102, 118, 118, 118, 118, 118, 118, 119, 120, 120, 120, 120, 120, 120, 120, 136,
-    136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136,
-    136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136, 136
-};
-
-
-void jpeg_decoder::transform_mcu_expand(int mcu_row)
-{
-    jpgd_block_t* pSrc_ptr = m_pMCU_coefficients;
-    uint8_t* pDst_ptr = m_pSample_buf + mcu_row * m_expanded_blocks_per_mcu * 64;
-
-    // Y IDCT
-    int mcu_block;
-    for (mcu_block = 0; mcu_block < m_expanded_blocks_per_component; mcu_block++) {
-        idct(pSrc_ptr, pDst_ptr, m_mcu_block_max_zag[mcu_block]);
-        pSrc_ptr += 64;
-        pDst_ptr += 64;
-    }
-
-    // Chroma IDCT, with upsampling
-    jpgd_block_t temp_block[64];
-
-    for (int i = 0; i < 2; i++) {
-        DCT_Upsample::Matrix44 P, Q, R, S;
-        JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] >= 1);
-        JPGD_ASSERT(m_mcu_block_max_zag[mcu_block] <= 64);
-
-        int max_zag = m_mcu_block_max_zag[mcu_block++] - 1;
-        if (max_zag <= 0) max_zag = 0; // should never happen, only here to shut up static analysis
-
-        switch (s_max_rc[max_zag]) {
-            case 1*16+1:
-                DCT_Upsample::P_Q<1, 1>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<1, 1>::calc(R, S, pSrc_ptr);
-                break;
-            case 1*16+2:
-                DCT_Upsample::P_Q<1, 2>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<1, 2>::calc(R, S, pSrc_ptr);
-                break;
-            case 2*16+2:
-                DCT_Upsample::P_Q<2, 2>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<2, 2>::calc(R, S, pSrc_ptr);
-                break;
-            case 3*16+2:
-                DCT_Upsample::P_Q<3, 2>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<3, 2>::calc(R, S, pSrc_ptr);
-                break;
-            case 3*16+3:
-                DCT_Upsample::P_Q<3, 3>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<3, 3>::calc(R, S, pSrc_ptr);
-                break;
-            case 3*16+4:
-                DCT_Upsample::P_Q<3, 4>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<3, 4>::calc(R, S, pSrc_ptr);
-                break;
-            case 4*16+4:
-                DCT_Upsample::P_Q<4, 4>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<4, 4>::calc(R, S, pSrc_ptr);
-                break;
-            case 5*16+4:
-                DCT_Upsample::P_Q<5, 4>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<5, 4>::calc(R, S, pSrc_ptr);
-                break;
-            case 5*16+5:
-                DCT_Upsample::P_Q<5, 5>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<5, 5>::calc(R, S, pSrc_ptr);
-                break;
-            case 5*16+6:
-                DCT_Upsample::P_Q<5, 6>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<5, 6>::calc(R, S, pSrc_ptr);
-                break;
-            case 6*16+6:
-                DCT_Upsample::P_Q<6, 6>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<6, 6>::calc(R, S, pSrc_ptr);
-                break;
-            case 7*16+6:
-                DCT_Upsample::P_Q<7, 6>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<7, 6>::calc(R, S, pSrc_ptr);
-                break;
-            case 7*16+7:
-                DCT_Upsample::P_Q<7, 7>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<7, 7>::calc(R, S, pSrc_ptr);
-                break;
-            case 7*16+8:
-                DCT_Upsample::P_Q<7, 8>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<7, 8>::calc(R, S, pSrc_ptr);
-                break;
-            case 8*16+8:
-                DCT_Upsample::P_Q<8, 8>::calc(P, Q, pSrc_ptr);
-                DCT_Upsample::R_S<8, 8>::calc(R, S, pSrc_ptr);
-                break;
-            default:
-                TVGERR("JPG", "invalid transform_mcu_expand");
-                return;
-        }
-        DCT_Upsample::Matrix44 a(P + Q); P -= Q;
-        DCT_Upsample::Matrix44& b = P;
-        DCT_Upsample::Matrix44 c(R + S); R -= S;
-        DCT_Upsample::Matrix44& d = R;
-
-        DCT_Upsample::Matrix44::add_and_store(temp_block, a, c);
-        idct_4x4(temp_block, pDst_ptr);
-        pDst_ptr += 64;
-
-        DCT_Upsample::Matrix44::sub_and_store(temp_block, a, c);
-        idct_4x4(temp_block, pDst_ptr);
-        pDst_ptr += 64;
-
-        DCT_Upsample::Matrix44::add_and_store(temp_block, b, d);
-        idct_4x4(temp_block, pDst_ptr);
-        pDst_ptr += 64;
-
-        DCT_Upsample::Matrix44::sub_and_store(temp_block, b, d);
-        idct_4x4(temp_block, pDst_ptr);
-        pDst_ptr += 64;
-        pSrc_ptr += 64;
-    }
-}
-
-
-// Loads and dequantizes the next row of (already decoded) coefficients.
-// Progressive images only.
-void jpeg_decoder::load_next_row()
-{
-    int i;
-    jpgd_block_t *p;
-    jpgd_quant_t *q;
-    int mcu_row, mcu_block;
-    int component_num, component_id;
-    int block_x_mcu[JPGD_MAX_COMPONENTS];
-
-    memset(block_x_mcu, 0, JPGD_MAX_COMPONENTS * sizeof(int));
-
-    for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
-        int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
-
-        for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
-            component_id = m_mcu_org[mcu_block];
-            q = m_quant[m_comp_quant[component_id]];
-            p = m_pMCU_coefficients + 64 * mcu_block;
-
-            jpgd_block_t* pAC = coeff_buf_getp(m_ac_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
-            jpgd_block_t* pDC = coeff_buf_getp(m_dc_coeffs[component_id], block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
-            p[0] = pDC[0];
-            memcpy(&p[1], &pAC[1], 63 * sizeof(jpgd_block_t));
-
-            for (i = 63; i > 0; i--) {
-                if (p[g_ZAG[i]]) break;
-            }
-
-            m_mcu_block_max_zag[mcu_block] = i + 1;
-
-            for ( ; i >= 0; i--) {
-                if (p[g_ZAG[i]]) {
-                    p[g_ZAG[i]] = static_cast<jpgd_block_t>(p[g_ZAG[i]] * q[i]);
-                }
-            }
-
-            if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
-            else {
-                if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) block_x_mcu_ofs = 0;
-                if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
-                    block_y_mcu_ofs = 0;
-                    block_x_mcu[component_id] += m_comp_h_samp[component_id];
-                }
-            }
-        }
-        if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
-        else transform_mcu(mcu_row);
-    }
-    if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
-    else {
-        for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
-            component_id = m_comp_list[component_num];
-            m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
-        }
-    }
-}
-
-
-// Restart interval processing.
-void jpeg_decoder::process_restart()
-{
-    int i;
-    int c = 0;
-
-    // Align to a byte boundary
-    // FIXME: Is this really necessary? get_bits_no_markers() never reads in markers!
-    //get_bits_no_markers(m_bits_left & 7);
-
-    // Let's scan a little bit to find the marker, but not _too_ far.
-    // 1536 is a "fudge factor" that determines how much to scan.
-    for (i = 1536; i > 0; i--) {
-        if (get_char() == 0xFF) break;
-    }
-    if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
-
-    for ( ; i > 0; i--) {
-        if ((c = get_char()) != 0xFF) break;
-    }
-    if (i == 0) stop_decoding(JPGD_BAD_RESTART_MARKER);
-
-    // Is it the expected marker? If not, something bad happened.
-    if (c != (m_next_restart_num + M_RST0)) stop_decoding(JPGD_BAD_RESTART_MARKER);
-
-    // Reset each component's DC prediction values.
-    memset(&m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
-
-    m_eob_run = 0;
-    m_restarts_left = m_restart_interval;
-    m_next_restart_num = (m_next_restart_num + 1) & 7;
-
-    // Get the bit buffer going again...
-    m_bits_left = 16;
-    get_bits_no_markers(16);
-    get_bits_no_markers(16);
-}
-
-
-static inline int dequantize_ac(int c, int q)
-{
-    c *= q;
-    return c;
-}
-
-// Decodes and dequantizes the next row of coefficients.
-void jpeg_decoder::decode_next_row()
-{
-    for (int mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
-        if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
-
-        jpgd_block_t* p = m_pMCU_coefficients;
-
-        for (int mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++, p += 64) {
-            int component_id = m_mcu_org[mcu_block];
-            jpgd_quant_t* q = m_quant[m_comp_quant[component_id]];
-
-            int r, s;
-            s = huff_decode(m_pHuff_tabs[m_comp_dc_tab[component_id]], r);
-            s = JPGD_HUFF_EXTEND(r, s);
-
-            m_last_dc_val[component_id] = (s += m_last_dc_val[component_id]);
-
-            p[0] = static_cast<jpgd_block_t>(s * q[0]);
-
-            int prev_num_set = m_mcu_block_max_zag[mcu_block];
-            huff_tables *pH = m_pHuff_tabs[m_comp_ac_tab[component_id]];
-            int k;
-            for (k = 1; k < 64; k++) {
-                int extra_bits;
-                s = huff_decode(pH, extra_bits);
-                r = s >> 4;
-                s &= 15;
-
-                if (s) {
-                    if (r) {
-                        if ((k + r) > 63) stop_decoding(JPGD_DECODE_ERROR);
-                        if (k < prev_num_set) {
-                            int n = JPGD_MIN(r, prev_num_set - k);
-                            int kt = k;
-                            while (n--) p[g_ZAG[kt++]] = 0;
-                        }
-                        k += r;
-                    }
-                    s = JPGD_HUFF_EXTEND(extra_bits, s);
-                    JPGD_ASSERT(k < 64);
-                    p[g_ZAG[k]] = static_cast<jpgd_block_t>(dequantize_ac(s, q[k])); //s * q[k];
-                } else {
-                    if (r == 15) {
-                        if ((k + 16) > 64) stop_decoding(JPGD_DECODE_ERROR);
-                        if (k < prev_num_set) {
-                            int n = JPGD_MIN(16, prev_num_set - k);
-                            int kt = k;
-                            while (n--) {
-                                JPGD_ASSERT(kt <= 63);
-                                p[g_ZAG[kt++]] = 0;
-                            }
-                        }
-                        k += 16 - 1; // - 1 because the loop counter is k
-                        JPGD_ASSERT(p[g_ZAG[k]] == 0);
-                    } else  break;
-                }
-            }
-
-            if (k < prev_num_set) {
-                int kt = k;
-                while (kt < prev_num_set) p[g_ZAG[kt++]] = 0;
-            }
-
-            m_mcu_block_max_zag[mcu_block] = k;
-        }
-        if (m_freq_domain_chroma_upsample) transform_mcu_expand(mcu_row);
-        else transform_mcu(mcu_row);
-        m_restarts_left--;
-    }
-}
-
-
-// YCbCr H1V1 (1x1:1:1, 3 m_blocks per MCU) to RGB
-void jpeg_decoder::H1V1Convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t *d = m_pScan_line_0;
-    uint8_t *s = m_pSample_buf + row * 8;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        for (int j = 0; j < 8; j++) {
-            int y = s[j];
-            int cb = s[64+j];
-            int cr = s[128+j];
-
-            d[0] = clamp(y + m_crr[cr]);
-            d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
-            d[2] = clamp(y + m_cbb[cb]);
-            d[3] = 255;
-            d += 4;
-        }
-        s += 64*3;
-    }
-}
-
-
-// YCbCr H2V1 (2x1:1:1, 4 m_blocks per MCU) to RGB
-void jpeg_decoder::H2V1Convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t *d0 = m_pScan_line_0;
-    uint8_t *y = m_pSample_buf + row * 8;
-    uint8_t *c = m_pSample_buf + 2*64 + row * 8;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        for (int l = 0; l < 2; l++) {
-            for (int j = 0; j < 4; j++) {
-                int cb = c[0];
-                int cr = c[64];
-
-                int rc = m_crr[cr];
-                int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
-                int bc = m_cbb[cb];
-
-                int yy = y[j<<1];
-                d0[0] = clamp(yy+rc);
-                d0[1] = clamp(yy+gc);
-                d0[2] = clamp(yy+bc);
-                d0[3] = 255;
-
-                yy = y[(j<<1)+1];
-                d0[4] = clamp(yy+rc);
-                d0[5] = clamp(yy+gc);
-                d0[6] = clamp(yy+bc);
-                d0[7] = 255;
-                d0 += 8;
-                c++;
-            }
-            y += 64;
-        }
-        y += 64*4 - 64*2;
-        c += 64*4 - 8;
-    }
-}
-
-
-// YCbCr H2V1 (1x2:1:1, 4 m_blocks per MCU) to RGB
-void jpeg_decoder::H1V2Convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t *d0 = m_pScan_line_0;
-    uint8_t *d1 = m_pScan_line_1;
-    uint8_t *y;
-    uint8_t *c;
-
-    if (row < 8) y = m_pSample_buf + row * 8;
-    else y = m_pSample_buf + 64*1 + (row & 7) * 8;
-
-    c = m_pSample_buf + 64*2 + (row >> 1) * 8;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        for (int j = 0; j < 8; j++) {
-            int cb = c[0+j];
-            int cr = c[64+j];
-
-            int rc = m_crr[cr];
-            int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
-            int bc = m_cbb[cb];
-
-            int yy = y[j];
-            d0[0] = clamp(yy+rc);
-            d0[1] = clamp(yy+gc);
-            d0[2] = clamp(yy+bc);
-            d0[3] = 255;
-
-            yy = y[8+j];
-            d1[0] = clamp(yy+rc);
-            d1[1] = clamp(yy+gc);
-            d1[2] = clamp(yy+bc);
-            d1[3] = 255;
-
-            d0 += 4;
-            d1 += 4;
-        }
-        y += 64*4;
-        c += 64*4;
-    }
-}
-
-
-// YCbCr H2V2 (2x2:1:1, 6 m_blocks per MCU) to RGB
-void jpeg_decoder::H2V2Convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t *d0 = m_pScan_line_0;
-    uint8_t *d1 = m_pScan_line_1;
-    uint8_t *y;
-    uint8_t *c;
-
-    if (row < 8) y = m_pSample_buf + row * 8;
-    else y = m_pSample_buf + 64*2 + (row & 7) * 8;
-
-    c = m_pSample_buf + 64*4 + (row >> 1) * 8;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        for (int l = 0; l < 2; l++) {
-            for (int j = 0; j < 8; j += 2) {
-                int cb = c[0];
-                int cr = c[64];
-
-                int rc = m_crr[cr];
-                int gc = ((m_crg[cr] + m_cbg[cb]) >> 16);
-                int bc = m_cbb[cb];
-
-                int yy = y[j];
-                d0[0] = clamp(yy+rc);
-                d0[1] = clamp(yy+gc);
-                d0[2] = clamp(yy+bc);
-                d0[3] = 255;
-
-                yy = y[j+1];
-                d0[4] = clamp(yy+rc);
-                d0[5] = clamp(yy+gc);
-                d0[6] = clamp(yy+bc);
-                d0[7] = 255;
-
-                yy = y[j+8];
-                d1[0] = clamp(yy+rc);
-                d1[1] = clamp(yy+gc);
-                d1[2] = clamp(yy+bc);
-                d1[3] = 255;
-
-                yy = y[j+8+1];
-                d1[4] = clamp(yy+rc);
-                d1[5] = clamp(yy+gc);
-                d1[6] = clamp(yy+bc);
-                d1[7] = 255;
-
-                d0 += 8;
-                d1 += 8;
-
-                c++;
-            }
-            y += 64;
-        }
-        y += 64*6 - 64*2;
-        c += 64*6 - 8;
-    }
-}
-
-
-// Y (1 block per MCU) to 8-bit grayscale
-void jpeg_decoder::gray_convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t *d = m_pScan_line_0;
-    uint8_t *s = m_pSample_buf + row * 8;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        *(uint32_t *)d = *(uint32_t *)s;
-        *(uint32_t *)(&d[4]) = *(uint32_t *)(&s[4]);
-        s += 64;
-        d += 8;
-    }
-}
-
-
-void jpeg_decoder::expanded_convert()
-{
-    int row = m_max_mcu_y_size - m_mcu_lines_left;
-    uint8_t* Py = m_pSample_buf + (row / 8) * 64 * m_comp_h_samp[0] + (row & 7) * 8;
-    uint8_t* d = m_pScan_line_0;
-
-    for (int i = m_max_mcus_per_row; i > 0; i--) {
-        for (int k = 0; k < m_max_mcu_x_size; k += 8) {
-            const int Y_ofs = k * 8;
-            const int Cb_ofs = Y_ofs + 64 * m_expanded_blocks_per_component;
-            const int Cr_ofs = Y_ofs + 64 * m_expanded_blocks_per_component * 2;
-            for (int j = 0; j < 8; j++) {
-                int y = Py[Y_ofs + j];
-                int cb = Py[Cb_ofs + j];
-                int cr = Py[Cr_ofs + j];
-
-                d[0] = clamp(y + m_crr[cr]);
-                d[1] = clamp(y + ((m_crg[cr] + m_cbg[cb]) >> 16));
-                d[2] = clamp(y + m_cbb[cb]);
-                d[3] = 255;
-
-                d += 4;
-            }
-        }
-        Py += 64 * m_expanded_blocks_per_mcu;
-    }
-}
-
-
-// Find end of image (EOI) marker, so we can return to the user the exact size of the input stream.
-void jpeg_decoder::find_eoi()
-{
-    if (!m_progressive_flag) {
-        // Attempt to read the EOI marker.
-        //get_bits_no_markers(m_bits_left & 7);
-
-        // Prime the bit buffer
-        m_bits_left = 16;
-        get_bits(16);
-        get_bits(16);
-
-        // The next marker _should_ be EOI
-        process_markers();
-    }
-    m_total_bytes_read -= m_in_buf_left;
-}
-
-
-int jpeg_decoder::decode(const void** pScan_line, uint32_t* pScan_line_len)
-{
-    if ((m_error_code) || (!m_ready_flag)) return JPGD_FAILED;
-    if (m_total_lines_left == 0) return JPGD_DONE;
-    if (m_mcu_lines_left == 0) {
-        if (setjmp(m_jmp_state)) return JPGD_FAILED;
-        if (m_progressive_flag) load_next_row();
-        else decode_next_row();
-        // Find the EOI marker if that was the last row.
-        if (m_total_lines_left <= m_max_mcu_y_size) find_eoi();
-        m_mcu_lines_left = m_max_mcu_y_size;
-    }
-
-    if (m_freq_domain_chroma_upsample) {
-        expanded_convert();
-        *pScan_line = m_pScan_line_0;
-    } else {
-        switch (m_scan_type) {
-            case JPGD_YH2V2: {
-                if ((m_mcu_lines_left & 1) == 0) {
-                    H2V2Convert();
-                    *pScan_line = m_pScan_line_0;
-                }
-              else *pScan_line = m_pScan_line_1;
-              break;
-            }
-            case JPGD_YH2V1: {
-                H2V1Convert();
-                *pScan_line = m_pScan_line_0;
-                break;
-            }
-            case JPGD_YH1V2: {
-                if ((m_mcu_lines_left & 1) == 0) {
-                    H1V2Convert();
-                    *pScan_line = m_pScan_line_0;
-                } else *pScan_line = m_pScan_line_1;
-                break;
-            }
-            case JPGD_YH1V1: {
-                H1V1Convert();
-                *pScan_line = m_pScan_line_0;
-                break;
-            }
-            case JPGD_GRAYSCALE: {
-                gray_convert();
-                *pScan_line = m_pScan_line_0;
-                break;
-            }
-        }
-    }
-
-    *pScan_line_len = m_real_dest_bytes_per_scan_line;
-    m_mcu_lines_left--;
-    m_total_lines_left--;
-
-    return JPGD_SUCCESS;
-}
-
-
-// Creates the tables needed for efficient Huffman decoding.
-void jpeg_decoder::make_huff_table(int index, huff_tables *pH)
-{
-    int p, i, l, si;
-    uint8_t huffsize[257];
-    uint32_t huffcode[257];
-    uint32_t code;
-    uint32_t subtree;
-    int code_size;
-    int lastp;
-    int nextfreeentry;
-    int currententry;
-
-    pH->ac_table = m_huff_ac[index] != 0;
-    p = 0;
-
-    for (l = 1; l <= 16; l++)  {
-        for (i = 1; i <= m_huff_num[index][l]; i++) {
-            huffsize[p++] = static_cast<uint8_t>(l);
-        }
-    }
-
-    huffsize[p] = 0;
-    lastp = p;
-    code = 0;
-    si = huffsize[0];
-    p = 0;
-
-    while (huffsize[p]) {
-        while (huffsize[p] == si) {
-            huffcode[p++] = code;
-            code++;
-        }
-        code <<= 1;
-        si++;
-    }
-
-    memset(pH->look_up, 0, sizeof(pH->look_up));
-    memset(pH->look_up2, 0, sizeof(pH->look_up2));
-    memset(pH->tree, 0, sizeof(pH->tree));
-    memset(pH->code_size, 0, sizeof(pH->code_size));
-
-    nextfreeentry = -1;
-    p = 0;
-
-    while (p < lastp) {
-        i = m_huff_val[index][p];
-        code = huffcode[p];
-        code_size = huffsize[p];
-        pH->code_size[i] = static_cast<uint8_t>(code_size);
-
-        if (code_size <= 8) {
-            code <<= (8 - code_size);
-            for (l = 1 << (8 - code_size); l > 0; l--) {
-                JPGD_ASSERT(i < 256);
-                pH->look_up[code] = i;
-                bool has_extrabits = false;
-                int extra_bits = 0;
-                int num_extra_bits = i & 15;
-                int bits_to_fetch = code_size;
-
-                if (num_extra_bits) {
-                    int total_codesize = code_size + num_extra_bits;
-                    if (total_codesize <= 8) {
-                        has_extrabits = true;
-                        extra_bits = ((1 << num_extra_bits) - 1) & (code >> (8 - total_codesize));
-                        JPGD_ASSERT(extra_bits <= 0x7FFF);
-                        bits_to_fetch += num_extra_bits;
-                    }
-                }
-                if (!has_extrabits) pH->look_up2[code] = i | (bits_to_fetch << 8);
-                else pH->look_up2[code] = i | 0x8000 | (extra_bits << 16) | (bits_to_fetch << 8);
-                code++;
-            }
-        } else {
-            subtree = (code >> (code_size - 8)) & 0xFF;
-            currententry = pH->look_up[subtree];
-
-            if (currententry == 0) {
-                pH->look_up[subtree] = currententry = nextfreeentry;
-                pH->look_up2[subtree] = currententry = nextfreeentry;
-                nextfreeentry -= 2;
-            }
-
-            code <<= (16 - (code_size - 8));
-
-            for (l = code_size; l > 9; l--) {
-                if ((code & 0x8000) == 0) currententry--;
-                if (pH->tree[-currententry - 1] == 0) {
-                    pH->tree[-currententry - 1] = nextfreeentry;
-                    currententry = nextfreeentry;
-                    nextfreeentry -= 2;
-                } else currententry = pH->tree[-currententry - 1];
-                code <<= 1;
-            }
-            if ((code & 0x8000) == 0) currententry--;
-            pH->tree[-currententry - 1] = i;
-        }
-        p++;
-    }
-}
-
-
-// Verifies the quantization tables needed for this scan are available.
-void jpeg_decoder::check_quant_tables()
-{
-    for (int i = 0; i < m_comps_in_scan; i++) {
-        if (m_quant[m_comp_quant[m_comp_list[i]]] == nullptr) stop_decoding(JPGD_UNDEFINED_QUANT_TABLE);
-    }
-}
-
-
-// Verifies that all the Huffman tables needed for this scan are available.
-void jpeg_decoder::check_huff_tables()
-{
-    for (int i = 0; i < m_comps_in_scan; i++) {
-      if ((m_spectral_start == 0) && (m_huff_num[m_comp_dc_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
-      if ((m_spectral_end > 0) && (m_huff_num[m_comp_ac_tab[m_comp_list[i]]] == nullptr)) stop_decoding(JPGD_UNDEFINED_HUFF_TABLE);
-    }
-
-    for (int i = 0; i < JPGD_MAX_HUFF_TABLES; i++) {
-        if (m_huff_num[i]) {
-            if (!m_pHuff_tabs[i]) m_pHuff_tabs[i] = (huff_tables *)alloc(sizeof(huff_tables));
-            make_huff_table(i, m_pHuff_tabs[i]);
-        }
-    }
-}
-
-
-// Determines the component order inside each MCU.
-// Also calcs how many MCU's are on each row, etc.
-void jpeg_decoder::calc_mcu_block_order()
-{
-    int component_num, component_id;
-    int max_h_samp = 0, max_v_samp = 0;
-
-    for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
-        if (m_comp_h_samp[component_id] > max_h_samp) {
-          max_h_samp = m_comp_h_samp[component_id];
-        }
-        if (m_comp_v_samp[component_id] > max_v_samp) {
-          max_v_samp = m_comp_v_samp[component_id];
-        }
-    }
-
-    for (component_id = 0; component_id < m_comps_in_frame; component_id++) {
-        m_comp_h_blocks[component_id] = ((((m_image_x_size * m_comp_h_samp[component_id]) + (max_h_samp - 1)) / max_h_samp) + 7) / 8;
-        m_comp_v_blocks[component_id] = ((((m_image_y_size * m_comp_v_samp[component_id]) + (max_v_samp - 1)) / max_v_samp) + 7) / 8;
-    }
-
-    if (m_comps_in_scan == 1) {
-        m_mcus_per_row = m_comp_h_blocks[m_comp_list[0]];
-        m_mcus_per_col = m_comp_v_blocks[m_comp_list[0]];
-    } else {
-        m_mcus_per_row = (((m_image_x_size + 7) / 8) + (max_h_samp - 1)) / max_h_samp;
-        m_mcus_per_col = (((m_image_y_size + 7) / 8) + (max_v_samp - 1)) / max_v_samp;
-    }
-
-    if (m_comps_in_scan == 1) {
-        m_mcu_org[0] = m_comp_list[0];
-        m_blocks_per_mcu = 1;
-    } else {
-        m_blocks_per_mcu = 0;
-
-        for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
-            int num_blocks;
-            component_id = m_comp_list[component_num];
-            num_blocks = m_comp_h_samp[component_id] * m_comp_v_samp[component_id];
-            while (num_blocks--) m_mcu_org[m_blocks_per_mcu++] = component_id;
-        }
-    }
-}
-
-
-// Starts a new scan.
-int jpeg_decoder::init_scan()
-{
-    if (!locate_sos_marker()) return false;
-
-    calc_mcu_block_order();
-    check_huff_tables();
-    check_quant_tables();
-
-    memset(m_last_dc_val, 0, m_comps_in_frame * sizeof(uint32_t));
-
-    m_eob_run = 0;
-
-    if (m_restart_interval) {
-        m_restarts_left = m_restart_interval;
-        m_next_restart_num = 0;
-    }
-    fix_in_buffer();
-    return true;
-}
-
-
-// Starts a frame. Determines if the number of components or sampling factors
-// are supported.
-void jpeg_decoder::init_frame()
-{
-    int i;
-
-    if (m_comps_in_frame == 1) {
-        if ((m_comp_h_samp[0] != 1) || (m_comp_v_samp[0] != 1)) stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
-        m_scan_type = JPGD_GRAYSCALE;
-        m_max_blocks_per_mcu = 1;
-        m_max_mcu_x_size = 8;
-        m_max_mcu_y_size = 8;
-    } else if (m_comps_in_frame == 3) {
-        if (((m_comp_h_samp[1] != 1) || (m_comp_v_samp[1] != 1)) || ((m_comp_h_samp[2] != 1) || (m_comp_v_samp[2] != 1)))
-            stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
-
-        if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 1)) {
-            m_scan_type = JPGD_YH1V1;
-            m_max_blocks_per_mcu = 3;
-            m_max_mcu_x_size = 8;
-            m_max_mcu_y_size = 8;
-        } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 1)) {
-            m_scan_type = JPGD_YH2V1;
-            m_max_blocks_per_mcu = 4;
-            m_max_mcu_x_size = 16;
-            m_max_mcu_y_size = 8;
-        } else if ((m_comp_h_samp[0] == 1) && (m_comp_v_samp[0] == 2)) {
-            m_scan_type = JPGD_YH1V2;
-            m_max_blocks_per_mcu = 4;
-            m_max_mcu_x_size = 8;
-            m_max_mcu_y_size = 16;
-        } else if ((m_comp_h_samp[0] == 2) && (m_comp_v_samp[0] == 2)) {
-            m_scan_type = JPGD_YH2V2;
-            m_max_blocks_per_mcu = 6;
-            m_max_mcu_x_size = 16;
-            m_max_mcu_y_size = 16;
-        } else stop_decoding(JPGD_UNSUPPORTED_SAMP_FACTORS);
-    } else stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
-
-    m_max_mcus_per_row = (m_image_x_size + (m_max_mcu_x_size - 1)) / m_max_mcu_x_size;
-    m_max_mcus_per_col = (m_image_y_size + (m_max_mcu_y_size - 1)) / m_max_mcu_y_size;
-
-    // These values are for the *destination* pixels: after conversion.
-    if (m_scan_type == JPGD_GRAYSCALE) m_dest_bytes_per_pixel = 1;
-    else m_dest_bytes_per_pixel = 4;
-
-    m_dest_bytes_per_scan_line = ((m_image_x_size + 15) & 0xFFF0) * m_dest_bytes_per_pixel;
-    m_real_dest_bytes_per_scan_line = (m_image_x_size * m_dest_bytes_per_pixel);
-
-    // Initialize two scan line buffers.
-    m_pScan_line_0 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
-    if ((m_scan_type == JPGD_YH1V2) || (m_scan_type == JPGD_YH2V2)) {
-        m_pScan_line_1 = (uint8_t *)alloc(m_dest_bytes_per_scan_line, true);
-    }
-
-    m_max_blocks_per_row = m_max_mcus_per_row * m_max_blocks_per_mcu;
-
-    // Should never happen
-    if (m_max_blocks_per_row > JPGD_MAX_BLOCKS_PER_ROW) stop_decoding(JPGD_ASSERTION_ERROR);
-
-    // Allocate the coefficient buffer, enough for one MCU
-    m_pMCU_coefficients = (jpgd_block_t*)alloc(m_max_blocks_per_mcu * 64 * sizeof(jpgd_block_t));
-
-    for (i = 0; i < m_max_blocks_per_mcu; i++) {
-        m_mcu_block_max_zag[i] = 64;
-    }
-
-    m_expanded_blocks_per_component = m_comp_h_samp[0] * m_comp_v_samp[0];
-    m_expanded_blocks_per_mcu = m_expanded_blocks_per_component * m_comps_in_frame;
-    m_expanded_blocks_per_row = m_max_mcus_per_row * m_expanded_blocks_per_mcu;
-    // Freq. domain chroma upsampling is only supported for H2V2 subsampling factor (the most common one I've seen).
-    m_freq_domain_chroma_upsample = false;
-#if JPGD_SUPPORT_FREQ_DOMAIN_UPSAMPLING
-    m_freq_domain_chroma_upsample = (m_expanded_blocks_per_mcu == 4*3);
-#endif
-
-    if (m_freq_domain_chroma_upsample)
-        m_pSample_buf = (uint8_t *)alloc(m_expanded_blocks_per_row * 64);
-    else
-        m_pSample_buf = (uint8_t *)alloc(m_max_blocks_per_row * 64);
-
-    m_total_lines_left = m_image_y_size;
-    m_mcu_lines_left = 0;
-    create_look_ups();
-}
-
-
-// The coeff_buf series of methods originally stored the coefficients
-// into a "virtual" file which was located in EMS, XMS, or a disk file. A cache
-// was used to make this process more efficient. Now, we can store the entire
-// thing in RAM.
-jpeg_decoder::coeff_buf* jpeg_decoder::coeff_buf_open(int block_num_x, int block_num_y, int block_len_x, int block_len_y)
-{
-    coeff_buf* cb = (coeff_buf*)alloc(sizeof(coeff_buf));
-    cb->block_num_x = block_num_x;
-    cb->block_num_y = block_num_y;
-    cb->block_len_x = block_len_x;
-    cb->block_len_y = block_len_y;
-    cb->block_size = (block_len_x * block_len_y) * sizeof(jpgd_block_t);
-    cb->pData = (uint8_t *)alloc(cb->block_size * block_num_x * block_num_y, true);
-    return cb;
-}
-
-
-inline jpgd_block_t *jpeg_decoder::coeff_buf_getp(coeff_buf *cb, int block_x, int block_y)
-{
-    JPGD_ASSERT((block_x < cb->block_num_x) && (block_y < cb->block_num_y));
-    return (jpgd_block_t *)(cb->pData + block_x * cb->block_size + block_y * (cb->block_size * cb->block_num_x));
-}
-
-
-// The following methods decode the various types of m_blocks encountered
-// in progressively encoded images.
-void jpeg_decoder::decode_block_dc_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
-    int s, r;
-    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
-
-    if ((s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_dc_tab[component_id]])) != 0) {
-        r = pD->get_bits_no_markers(s);
-        s = JPGD_HUFF_EXTEND(r, s);
-    }
-    pD->m_last_dc_val[component_id] = (s += pD->m_last_dc_val[component_id]);
-    p[0] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
-}
-
-
-void jpeg_decoder::decode_block_dc_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
-    if (pD->get_bits_no_markers(1)) {
-        jpgd_block_t *p = pD->coeff_buf_getp(pD->m_dc_coeffs[component_id], block_x, block_y);
-        p[0] |= (1 << pD->m_successive_low);
-    }
-}
-
-
-void jpeg_decoder::decode_block_ac_first(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
-    int k, s, r;
-
-    if (pD->m_eob_run) {
-        pD->m_eob_run--;
-        return;
-    }
-    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
-
-    for (k = pD->m_spectral_start; k <= pD->m_spectral_end; k++) {
-        s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
-        r = s >> 4;
-        s &= 15;
-        if (s) {
-            if ((k += r) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
-            r = pD->get_bits_no_markers(s);
-            s = JPGD_HUFF_EXTEND(r, s);
-            p[g_ZAG[k]] = static_cast<jpgd_block_t>(static_cast<unsigned int>(s) << pD->m_successive_low);
-        } else {
-            if (r == 15) {
-                if ((k += 15) > 63) pD->stop_decoding(JPGD_DECODE_ERROR);
-            } else {
-                pD->m_eob_run = 1 << r;
-                if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
-                pD->m_eob_run--;
-                break;
-            }
-        }
-    }
-}
-
-
-void jpeg_decoder::decode_block_ac_refine(jpeg_decoder *pD, int component_id, int block_x, int block_y)
-{
-    int s, k, r;
-    int p1 = 1 << pD->m_successive_low;
-    int m1 = static_cast<unsigned int>(-1) << pD->m_successive_low;
-    jpgd_block_t *p = pD->coeff_buf_getp(pD->m_ac_coeffs[component_id], block_x, block_y);
-
-    JPGD_ASSERT(pD->m_spectral_end <= 63);
-
-    k = pD->m_spectral_start;
-
-    if (pD->m_eob_run == 0) {
-        for ( ; k <= pD->m_spectral_end; k++) {
-            s = pD->huff_decode(pD->m_pHuff_tabs[pD->m_comp_ac_tab[component_id]]);
-            r = s >> 4;
-            s &= 15;
-            if (s) {
-                if (s != 1) pD->stop_decoding(JPGD_DECODE_ERROR);
-                if (pD->get_bits_no_markers(1)) s = p1;
-                else s = m1;
-            } else {
-                if (r != 15) {
-                    pD->m_eob_run = 1 << r;
-                    if (r) pD->m_eob_run += pD->get_bits_no_markers(r);
-                    break;
-                }
-            }
-
-            do {
-                jpgd_block_t *this_coef = p + g_ZAG[k & 63];
-
-                if (*this_coef != 0) {
-                    if (pD->get_bits_no_markers(1)) {
-                        if ((*this_coef & p1) == 0) {
-                            if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
-                            else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
-                        }
-                    }
-                } else {
-                    if (--r < 0) break;
-                }
-                k++;
-            } while (k <= pD->m_spectral_end);
-
-            if ((s) && (k < 64)) {
-              p[g_ZAG[k]] = static_cast<jpgd_block_t>(s);
-            }
-        }
-    }
-
-    if (pD->m_eob_run > 0) {
-        for ( ; k <= pD->m_spectral_end; k++) {
-            jpgd_block_t *this_coef = p + g_ZAG[k & 63]; // logical AND to shut up static code analysis
-
-            if (*this_coef != 0) {
-                if (pD->get_bits_no_markers(1)) {
-                    if ((*this_coef & p1) == 0) {
-                        if (*this_coef >= 0) *this_coef = static_cast<jpgd_block_t>(*this_coef + p1);
-                        else *this_coef = static_cast<jpgd_block_t>(*this_coef + m1);
-                    }
-                }
-            }
-        }
-        pD->m_eob_run--;
-    }
-}
-
-
-// Decode a scan in a progressively encoded image.
-void jpeg_decoder::decode_scan(pDecode_block_func decode_block_func)
-{
-    int mcu_row, mcu_col, mcu_block;
-    int block_x_mcu[JPGD_MAX_COMPONENTS], m_block_y_mcu[JPGD_MAX_COMPONENTS];
-
-    memset(m_block_y_mcu, 0, sizeof(m_block_y_mcu));
-
-    for (mcu_col = 0; mcu_col < m_mcus_per_col; mcu_col++) {
-        int component_num, component_id;
-        memset(block_x_mcu, 0, sizeof(block_x_mcu));
-
-        for (mcu_row = 0; mcu_row < m_mcus_per_row; mcu_row++) {
-            int block_x_mcu_ofs = 0, block_y_mcu_ofs = 0;
-
-            if ((m_restart_interval) && (m_restarts_left == 0)) process_restart();
-
-            for (mcu_block = 0; mcu_block < m_blocks_per_mcu; mcu_block++) {
-                component_id = m_mcu_org[mcu_block];
-                decode_block_func(this, component_id, block_x_mcu[component_id] + block_x_mcu_ofs, m_block_y_mcu[component_id] + block_y_mcu_ofs);
-
-                if (m_comps_in_scan == 1) block_x_mcu[component_id]++;
-                else {
-                    if (++block_x_mcu_ofs == m_comp_h_samp[component_id]) {
-                        block_x_mcu_ofs = 0;
-
-                        if (++block_y_mcu_ofs == m_comp_v_samp[component_id]) {
-                            block_y_mcu_ofs = 0;
-                            block_x_mcu[component_id] += m_comp_h_samp[component_id];
-                        }
-                    }
-                }
-            }
-            m_restarts_left--;
-        }
-
-        if (m_comps_in_scan == 1) m_block_y_mcu[m_comp_list[0]]++;
-        else {
-            for (component_num = 0; component_num < m_comps_in_scan; component_num++) {
-                component_id = m_comp_list[component_num];
-                m_block_y_mcu[component_id] += m_comp_v_samp[component_id];
-            }
-        }
-    }
-}
-
-
-// Decode a progressively encoded image.
-void jpeg_decoder::init_progressive()
-{
-    int i;
-
-    if (m_comps_in_frame == 4) stop_decoding(JPGD_UNSUPPORTED_COLORSPACE);
-
-    // Allocate the coefficient buffers.
-    for (i = 0; i < m_comps_in_frame; i++) {
-        m_dc_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 1, 1);
-        m_ac_coeffs[i] = coeff_buf_open(m_max_mcus_per_row * m_comp_h_samp[i], m_max_mcus_per_col * m_comp_v_samp[i], 8, 8);
-    }
-
-    while (true) {
-        int dc_only_scan, refinement_scan;
-        pDecode_block_func decode_block_func;
-
-        if (!init_scan()) break;
-
-        dc_only_scan = (m_spectral_start == 0);
-        refinement_scan = (m_successive_high != 0);
-
-        if ((m_spectral_start > m_spectral_end) || (m_spectral_end > 63)) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
-
-        if (dc_only_scan) {
-            if (m_spectral_end) stop_decoding(JPGD_BAD_SOS_SPECTRAL);
-        } else if (m_comps_in_scan != 1) {  /* AC scans can only contain one component */
-            stop_decoding(JPGD_BAD_SOS_SPECTRAL);
-        }
-
-        if ((refinement_scan) && (m_successive_low != m_successive_high - 1)) stop_decoding(JPGD_BAD_SOS_SUCCESSIVE);
-
-        if (dc_only_scan) {
-            if (refinement_scan) decode_block_func = decode_block_dc_refine;
-            else decode_block_func = decode_block_dc_first;
-        } else {
-            if (refinement_scan) decode_block_func = decode_block_ac_refine;
-            else decode_block_func = decode_block_ac_first;
-        }
-        decode_scan(decode_block_func);
-        m_bits_left = 16;
-        get_bits(16);
-        get_bits(16);
-    }
-
-    m_comps_in_scan = m_comps_in_frame;
-
-    for (i = 0; i < m_comps_in_frame; i++) {
-        m_comp_list[i] = i;
-    }
-
-    calc_mcu_block_order();
-}
-
-
-void jpeg_decoder::init_sequential()
-{
-    if (!init_scan()) stop_decoding(JPGD_UNEXPECTED_MARKER);
-}
-
-
-void jpeg_decoder::decode_start()
-{
-    init_frame();
-    if (m_progressive_flag) init_progressive();
-    else init_sequential();
-}
-
-
-void jpeg_decoder::decode_init(jpeg_decoder_stream *pStream)
-{
-    init(pStream);
-    locate_sof_marker();
-}
-
-
-jpeg_decoder::jpeg_decoder(jpeg_decoder_stream *pStream)
-{
-    if (setjmp(m_jmp_state)) return;
-    decode_init(pStream);
-}
-
-
-int jpeg_decoder::begin_decoding()
-{
-    if (m_ready_flag) return JPGD_SUCCESS;
-    if (m_error_code) return JPGD_FAILED;
-    if (setjmp(m_jmp_state)) return JPGD_FAILED;
-
-    decode_start();
-    m_ready_flag = true;
-
-    return JPGD_SUCCESS;
-}
-
-
-jpeg_decoder::~jpeg_decoder()
-{
-    free_all_blocks();
-}
-
-
-void jpeg_decoder_file_stream::close()
-{
-    if (m_pFile) {
-        fclose(m_pFile);
-        m_pFile = nullptr;
-    }
-    m_eof_flag = false;
-    m_error_flag = false;
-}
-
-
-jpeg_decoder_file_stream::~jpeg_decoder_file_stream()
-{
-    close();
-}
-
-
-bool jpeg_decoder_file_stream::open(const char *Pfilename)
-{
-    close();
-
-    m_eof_flag = false;
-    m_error_flag = false;
-
-#if defined(_MSC_VER)
-    m_pFile = nullptr;
-    fopen_s(&m_pFile, Pfilename, "rb");
-#else
-    m_pFile = fopen(Pfilename, "rb");
-#endif
-    return m_pFile != nullptr;
-}
-
-
-int jpeg_decoder_file_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
-{
-    if (!m_pFile) return -1;
-
-    if (m_eof_flag) {
-        *pEOF_flag = true;
-        return 0;
-    }
-
-    if (m_error_flag) return -1;
-
-    int bytes_read = static_cast<int>(fread(pBuf, 1, max_bytes_to_read, m_pFile));
-    if (bytes_read < max_bytes_to_read) {
-        if (ferror(m_pFile)) {
-            m_error_flag = true;
-            return -1;
-        }
-        m_eof_flag = true;
-        *pEOF_flag = true;
-    }
-    return bytes_read;
-}
-
-
-bool jpeg_decoder_mem_stream::open(const uint8_t *pSrc_data, uint32_t size)
-{
-    close();
-    m_pSrc_data = pSrc_data;
-    m_ofs = 0;
-    m_size = size;
-    return true;
-}
-
-
-int jpeg_decoder_mem_stream::read(uint8_t *pBuf, int max_bytes_to_read, bool *pEOF_flag)
-{
-    *pEOF_flag = false;
-    if (!m_pSrc_data) return -1;
-
-    uint32_t bytes_remaining = m_size - m_ofs;
-    if ((uint32_t)max_bytes_to_read > bytes_remaining) {
-        max_bytes_to_read = bytes_remaining;
-        *pEOF_flag = true;
-    }
-    memcpy(pBuf, m_pSrc_data + m_ofs, max_bytes_to_read);
-    m_ofs += max_bytes_to_read;
-
-    return max_bytes_to_read;
-}
-
-
-/************************************************************************/
-/* External Class Implementation                                        */
-/************************************************************************/
-
-
-jpeg_decoder* jpgdHeader(const char* data, int size, int* width, int* height)
-{
-    auto decoder = new jpeg_decoder(new jpeg_decoder_mem_stream((const uint8_t*)data, size));
-    if (decoder->get_error_code() != JPGD_SUCCESS) {
-        delete(decoder);
-        return nullptr;
-    }
-
-    if (width) *width = decoder->get_width();
-    if (height) *height = decoder->get_height();
-
-    return decoder;
-}
-
-
-jpeg_decoder* jpgdHeader(const char* filename, int* width, int* height)
-{
-    auto fileStream = new jpeg_decoder_file_stream();
-    if (!fileStream->open(filename)) {
-        delete(fileStream);
-        return nullptr;
-    }
-
-    auto decoder = new jpeg_decoder(fileStream);
-    if (decoder->get_error_code() != JPGD_SUCCESS) {
-        delete(fileStream);
-        delete(decoder);
-        return nullptr;
-    }
-
-    if (width) *width = decoder->get_width();
-    if (height) *height = decoder->get_height();
-
-    return decoder;
-}
-
-
-void jpgdDelete(jpeg_decoder* decoder)
-{
-    delete(decoder);
-}
-
-
-unsigned char* jpgdDecompress(jpeg_decoder* decoder)
-{
-    if (!decoder) return nullptr;
-
-    int req_comps = 4;  //TODO: fixed 4 channel components now?
-    if ((req_comps != 1) && (req_comps != 3) && (req_comps != 4)) return nullptr;
-
-    auto image_width = decoder->get_width();
-    auto image_height = decoder->get_height();
-    //auto actual_comps = decoder->get_num_components();
-
-    if (decoder->begin_decoding() != JPGD_SUCCESS) return nullptr;
-
-    const int dst_bpl = image_width * req_comps;
-    uint8_t *pImage_data = (uint8_t*)malloc(dst_bpl * image_height);
-    if (!pImage_data) return nullptr;
-
-    for (int y = 0; y < image_height; y++) {
-        const uint8_t* pScan_line;
-        uint32_t scan_line_len;
-        if (decoder->decode((const void**)&pScan_line, &scan_line_len) != JPGD_SUCCESS) {
-            free(pImage_data);
-            return nullptr;
-        }
-
-        uint8_t *pDst = pImage_data + y * dst_bpl;
-
-        //Return as BGRA
-        if ((req_comps == 4) && (decoder->get_num_components() == 3)) {
-            for (int x = 0; x < image_width; x++) {
-                pDst[0] = pScan_line[x*4+2];
-                pDst[1] = pScan_line[x*4+1];
-                pDst[2] = pScan_line[x*4+0];
-                pDst[3] = 255;
-                pDst += 4;
-            }
-        } else if (((req_comps == 1) && (decoder->get_num_components() == 1)) || ((req_comps == 4) && (decoder->get_num_components() == 3))) {
-            memcpy(pDst, pScan_line, dst_bpl);
-        } else if (decoder->get_num_components() == 1) {
-            if (req_comps == 3) {
-                for (int x = 0; x < image_width; x++) {
-                    uint8_t luma = pScan_line[x];
-                    pDst[0] = luma;
-                    pDst[1] = luma;
-                    pDst[2] = luma;
-                    pDst += 3;
-                }
-            } else {
-                for (int x = 0; x < image_width; x++) {
-                    uint8_t luma = pScan_line[x];
-                    pDst[0] = luma;
-                    pDst[1] = luma;
-                    pDst[2] = luma;
-                    pDst[3] = 255;
-                    pDst += 4;
-                }
-            }
-        } else if (decoder->get_num_components() == 3) {
-            if (req_comps == 1) {
-                const int YR = 19595, YG = 38470, YB = 7471;
-                for (int x = 0; x < image_width; x++) {
-                    int r = pScan_line[x*4+0];
-                    int g = pScan_line[x*4+1];
-                    int b = pScan_line[x*4+2];
-                    *pDst++ = static_cast<uint8_t>((r * YR + g * YG + b * YB + 32768) >> 16);
-                }
-            } else {
-                for (int x = 0; x < image_width; x++) {
-                    pDst[0] = pScan_line[x*4+0];
-                    pDst[1] = pScan_line[x*4+1];
-                    pDst[2] = pScan_line[x*4+2];
-                    pDst += 3;
-                }
-            }
-        }
-    }
-    return pImage_data;
-}
diff --git a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h b/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h
deleted file mode 100644
index e1fe35f4887..00000000000
--- a/thirdparty/thorvg/src/loaders/jpg/tvgJpgd.h
+++ /dev/null
@@ -1,35 +0,0 @@
-/*
- * Copyright (c) 2021 - 2024 the ThorVG project. All rights reserved.
-
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
-
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
-
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-// jpgd.h - C++ class for JPEG decompression.
-// Public domain, Rich Geldreich <richgel99@gmail.com>
-#ifndef _TVG_JPGD_H_
-#define _TVG_JPGD_H_
-
-class jpeg_decoder;
-
-jpeg_decoder* jpgdHeader(const char* data, int size, int* width, int* height);
-jpeg_decoder* jpgdHeader(const char* filename, int* width, int* height);
-unsigned char* jpgdDecompress(jpeg_decoder* decoder);
-void jpgdDelete(jpeg_decoder* decoder);
-
-#endif //_TVG_JPGD_H_
diff --git a/thirdparty/thorvg/update-thorvg.sh b/thirdparty/thorvg/update-thorvg.sh
index 7c211f73e70..593e24a61a4 100755
--- a/thirdparty/thorvg/update-thorvg.sh
+++ b/thirdparty/thorvg/update-thorvg.sh
@@ -43,13 +43,13 @@ cat << EOF > ../inc/config.h
 #define THORVG_SW_RASTER_SUPPORT
 #define THORVG_SVG_LOADER_SUPPORT
 #define THORVG_PNG_LOADER_SUPPORT
-#define THORVG_JPG_LOADER_SUPPORT
 #ifndef WEB_ENABLED
 #define THORVG_THREAD_SUPPORT
 #endif
 
-// Added conditionally if webp module is enabled.
+// Added conditionally if respective modules are enabled.
 //#define THORVG_WEBP_LOADER_SUPPORT
+//#define THORVG_JPG_LOADER_SUPPORT
 
 // For internal debugging:
 //#define THORVG_LOG_ENABLED
@@ -71,8 +71,7 @@ mkdir ../src/loaders
 cp -rv src/loaders/svg src/loaders/raw  ../src/loaders/
 cp -rv src/loaders/external_png ../src/loaders/
 cp -rv src/loaders/external_webp ../src/loaders/
-# Not using external jpg as it's turbojpeg, which we don't have.
-cp -rv src/loaders/jpg ../src/loaders/
+cp -rv src/loaders/external_jpg ../src/loaders/
 
 popd
 rm -rf tmp