From bd493da2dca51849859bd3d331f0cacfdb7371d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=94=D0=BC=D0=B8=D1=82=D1=80=D0=B8=D0=B9=20=D0=A6=D0=B5?= =?UTF-8?q?=D0=BD=D0=B5=D0=BA=D0=BE=D0=B2?= <49730476+DimaThenekov@users.noreply.github.com> Date: Thu, 20 Nov 2025 19:40:35 +0300 Subject: [PATCH] Add zstd compression support at tools --- tools/copy-to-sha256.py | 42 +++++++++++++++++++++++++++++++++-------- tools/fs2json.py | 6 ++++-- 2 files changed, 38 insertions(+), 10 deletions(-) diff --git a/tools/copy-to-sha256.py b/tools/copy-to-sha256.py index 96e2b9b6..ec12fb22 100755 --- a/tools/copy-to-sha256.py +++ b/tools/copy-to-sha256.py @@ -7,8 +7,22 @@ import argparse import hashlib import shutil import tarfile +import sys +import io HASH_LENGTH = 8 +USE_COMPRESSION = True + +if USE_COMPRESSION: + if sys.version_info >= (3, 14): + from compression import zstd + else: + try: + import zstandard as zstd + except ImportError: + print("Error: zstandard module required when USE_COMPRESSION = True") + print("Install with: pip install zstandard") + sys.exit(1) def hash_file(filename) -> str: with open(filename, "rb", buffering=0) as f: @@ -64,30 +78,42 @@ def handle_dir(logger, from_path: str, to_path: str): continue file_hash = hash_file(absname) - filename = file_hash[0:HASH_LENGTH] + ".bin" + filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") to_abs = os.path.join(to_path, filename) if os.path.exists(to_abs): logger.info("Exists, skipped {} ({})".format(to_abs, absname)) else: - logger.info("cp {} {}".format(absname, to_abs)) - shutil.copyfile(absname, to_abs) + if USE_COMPRESSION: + logger.info("Compressing {} {}".format(absname, to_abs)) + with open(absname, 'rb') as src_file: + with open(to_abs, 'wb') as dst_file: + zstd.ZstdCompressor(level=3).copy_stream(src_file, dst_file) + else: + logger.info("cp {} {}".format(absname, to_abs)) + shutil.copyfile(absname, to_abs) def handle_tar(logger, tar, to_path: str): for member in tar.getmembers(): if member.isfile() or member.islnk(): f = tar.extractfile(member) file_hash = hash_fileobj(f) - filename = file_hash[0:HASH_LENGTH] + ".bin" + filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") to_abs = os.path.join(to_path, filename) if os.path.exists(to_abs): logger.info("Exists, skipped {} ({})".format(to_abs, member.name)) else: - logger.info("Extracted {} ({})".format(to_abs, member.name)) - to_file = open(to_abs, "wb") - f.seek(0) - shutil.copyfileobj(f, to_file) + if USE_COMPRESSION: + logger.info("Extracted and compressing {} ({})".format(to_abs, member.name)) + f.seek(0) + with open(to_abs, 'wb') as dst_file: + zstd.ZstdCompressor(level=3).copy_stream(f, dst_file) + else: + logger.info("Extracted {} ({})".format(to_abs, member.name)) + to_file = open(to_abs, "wb") + f.seek(0) + shutil.copyfileobj(f, to_file) if __name__ == "__main__": diff --git a/tools/fs2json.py b/tools/fs2json.py index 37b962c7..59d00bfa 100755 --- a/tools/fs2json.py +++ b/tools/fs2json.py @@ -17,6 +17,8 @@ import tarfile VERSION = 3 +USE_COMPRESSION = True + IDX_NAME = 0 IDX_SIZE = 1 IDX_MTIME = 2 @@ -197,7 +199,7 @@ def handle_dir(logger, path, exclude): obj[IDX_TARGET] = target elif isfile: file_hash = hash_file(absname) - filename = file_hash[0:HASH_LENGTH] + ".bin" + filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") existing = filename_to_hash.get(filename) assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash) filename_to_hash[filename] = file_hash @@ -240,7 +242,7 @@ def handle_tar(logger, tar): obj[IDX_MODE] |= S_IFREG f = tar.extractfile(member) file_hash = hash_fileobj(f) - filename = file_hash[0:HASH_LENGTH] + ".bin" + filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") existing = filename_to_hash.get(filename) assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash) filename_to_hash[filename] = file_hash