Add --zstd flag and use 19 lvl compression

This commit is contained in:
Дмитрий Ценеков 2025-11-26 16:57:32 +03:00 committed by Fabian
parent bd493da2dc
commit b91ad42fe2
3 changed files with 36 additions and 32 deletions

View file

@ -11,18 +11,6 @@ import sys
import io import io
HASH_LENGTH = 8 HASH_LENGTH = 8
USE_COMPRESSION = True
if USE_COMPRESSION:
if sys.version_info >= (3, 14):
from compression import zstd
else:
try:
import zstandard as zstd
except ImportError:
print("Error: zstandard module required when USE_COMPRESSION = True")
print("Install with: pip install zstandard")
sys.exit(1)
def hash_file(filename) -> str: def hash_file(filename) -> str:
with open(filename, "rb", buffering=0) as f: with open(filename, "rb", buffering=0) as f:
@ -43,23 +31,39 @@ def main():
formatter_class=argparse.RawTextHelpFormatter) formatter_class=argparse.RawTextHelpFormatter)
args.add_argument("from_path", metavar="from", help="from") args.add_argument("from_path", metavar="from", help="from")
args.add_argument("to_path", metavar="to", help="to") args.add_argument("to_path", metavar="to", help="to")
args.add_argument("--zstd", action="store_true", help="Use Zstandard compression")
args = args.parse_args() args = args.parse_args()
from_path = os.path.normpath(args.from_path) from_path = os.path.normpath(args.from_path)
to_path = os.path.normpath(args.to_path) to_path = os.path.normpath(args.to_path)
# Import zstd only if compression is requested
zstd_module = None
if args.zstd:
if sys.version_info >= (3, 14):
from compression import zstd
zstd_module = zstd
else:
try:
import zstandard as zstd
zstd_module = zstd
except ImportError:
print("Error: zstandard module required when using --zstd flag")
print("Install with: pip install zstandard")
sys.exit(1)
if os.path.isfile(from_path): if os.path.isfile(from_path):
tar = tarfile.open(from_path, "r") tar = tarfile.open(from_path, "r")
else: else:
tar = None tar = None
if tar: if tar:
handle_tar(logger, tar, to_path) handle_tar(logger, tar, to_path, args.zstd, zstd_module)
else: else:
handle_dir(logger, from_path, to_path) handle_dir(logger, from_path, to_path, args.zstd, zstd_module)
def handle_dir(logger, from_path: str, to_path: str): def handle_dir(logger, from_path: str, to_path: str, use_compression: bool, zstd_module):
def onerror(oserror): def onerror(oserror):
logger.warning(oserror) logger.warning(oserror)
@ -78,37 +82,37 @@ def handle_dir(logger, from_path: str, to_path: str):
continue continue
file_hash = hash_file(absname) file_hash = hash_file(absname)
filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if use_compression else ".bin")
to_abs = os.path.join(to_path, filename) to_abs = os.path.join(to_path, filename)
if os.path.exists(to_abs): if os.path.exists(to_abs):
logger.info("Exists, skipped {} ({})".format(to_abs, absname)) logger.info("Exists, skipped {} ({})".format(to_abs, absname))
else: else:
if USE_COMPRESSION: if use_compression:
logger.info("Compressing {} {}".format(absname, to_abs)) logger.info("Compressing {} {}".format(absname, to_abs))
with open(absname, 'rb') as src_file: with open(absname, 'rb') as src_file:
with open(to_abs, 'wb') as dst_file: with open(to_abs, 'wb') as dst_file:
zstd.ZstdCompressor(level=3).copy_stream(src_file, dst_file) zstd_module.ZstdCompressor(level=19).copy_stream(src_file, dst_file)
else: else:
logger.info("cp {} {}".format(absname, to_abs)) logger.info("cp {} {}".format(absname, to_abs))
shutil.copyfile(absname, to_abs) shutil.copyfile(absname, to_abs)
def handle_tar(logger, tar, to_path: str): def handle_tar(logger, tar, to_path: str, use_compression: bool, zstd_module):
for member in tar.getmembers(): for member in tar.getmembers():
if member.isfile() or member.islnk(): if member.isfile() or member.islnk():
f = tar.extractfile(member) f = tar.extractfile(member)
file_hash = hash_fileobj(f) file_hash = hash_fileobj(f)
filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if use_compression else ".bin")
to_abs = os.path.join(to_path, filename) to_abs = os.path.join(to_path, filename)
if os.path.exists(to_abs): if os.path.exists(to_abs):
logger.info("Exists, skipped {} ({})".format(to_abs, member.name)) logger.info("Exists, skipped {} ({})".format(to_abs, member.name))
else: else:
if USE_COMPRESSION: if use_compression:
logger.info("Extracted and compressing {} ({})".format(to_abs, member.name)) logger.info("Extracted and compressing {} ({})".format(to_abs, member.name))
f.seek(0) f.seek(0)
with open(to_abs, 'wb') as dst_file: with open(to_abs, 'wb') as dst_file:
zstd.ZstdCompressor(level=3).copy_stream(f, dst_file) zstd_module.ZstdCompressor(level=19).copy_stream(f, dst_file)
else: else:
logger.info("Extracted {} ({})".format(to_abs, member.name)) logger.info("Extracted {} ({})".format(to_abs, member.name))
to_file = open(to_abs, "wb") to_file = open(to_abs, "wb")

View file

@ -20,10 +20,10 @@ docker export "$CONTAINER_NAME" -o "$OUT_ROOTFS_TAR"
# https://github.com/iximiuz/docker-to-linux/issues/19#issuecomment-1242809707 # https://github.com/iximiuz/docker-to-linux/issues/19#issuecomment-1242809707
tar -f "$OUT_ROOTFS_TAR" --delete ".dockerenv" || true tar -f "$OUT_ROOTFS_TAR" --delete ".dockerenv" || true
../../../tools/fs2json.py --out "$OUT_FSJSON" "$OUT_ROOTFS_TAR" ../../../tools/fs2json.py --zstd --out "$OUT_FSJSON" "$OUT_ROOTFS_TAR"
# Note: Not deleting old files here # Note: Not deleting old files here
mkdir -p "$OUT_ROOTFS_FLAT" mkdir -p "$OUT_ROOTFS_FLAT"
../../../tools/copy-to-sha256.py "$OUT_ROOTFS_TAR" "$OUT_ROOTFS_FLAT" ../../../tools/copy-to-sha256.py --zstd "$OUT_ROOTFS_TAR" "$OUT_ROOTFS_FLAT"
echo "$OUT_ROOTFS_TAR", "$OUT_ROOTFS_FLAT" and "$OUT_FSJSON" created. echo "$OUT_ROOTFS_TAR", "$OUT_ROOTFS_FLAT" and "$OUT_FSJSON" created.

View file

@ -17,8 +17,6 @@ import tarfile
VERSION = 3 VERSION = 3
USE_COMPRESSION = True
IDX_NAME = 0 IDX_NAME = 0
IDX_SIZE = 1 IDX_SIZE = 1
IDX_MTIME = 2 IDX_MTIME = 2
@ -70,6 +68,8 @@ def main():
args.add_argument("path", args.add_argument("path",
metavar="path-or-tar", metavar="path-or-tar",
help="Base path or tar file to include in JSON") help="Base path or tar file to include in JSON")
args.add_argument("--zstd", action="store_true",
help="Use Zstandard compression")
args = args.parse_args() args = args.parse_args()
@ -81,9 +81,9 @@ def main():
tar = None tar = None
if tar: if tar:
(root, total_size) = handle_tar(logger, tar) (root, total_size) = handle_tar(logger, tar, args.zstd)
else: else:
(root, total_size) = handle_dir(logger, path, args.exclude) (root, total_size) = handle_dir(logger, path, args.exclude, args.zstd)
if False: if False:
# normalize the order of children, useful to debug differences between # normalize the order of children, useful to debug differences between
@ -105,7 +105,7 @@ def main():
logger.info("Creating json ...") logger.info("Creating json ...")
json.dump(result, args.out, check_circular=False, separators=(',', ':')) json.dump(result, args.out, check_circular=False, separators=(',', ':'))
def handle_dir(logger, path, exclude): def handle_dir(logger, path, exclude, use_compression):
path = path + "/" path = path + "/"
exclude = exclude or [] exclude = exclude or []
exclude = [os.path.join("/", os.path.normpath(p)) for p in exclude] exclude = [os.path.join("/", os.path.normpath(p)) for p in exclude]
@ -199,7 +199,7 @@ def handle_dir(logger, path, exclude):
obj[IDX_TARGET] = target obj[IDX_TARGET] = target
elif isfile: elif isfile:
file_hash = hash_file(absname) file_hash = hash_file(absname)
filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if use_compression else ".bin")
existing = filename_to_hash.get(filename) existing = filename_to_hash.get(filename)
assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash) assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash)
filename_to_hash[filename] = file_hash filename_to_hash[filename] = file_hash
@ -214,7 +214,7 @@ def handle_dir(logger, path, exclude):
return (mainroot, total_size) return (mainroot, total_size)
def handle_tar(logger, tar): def handle_tar(logger, tar, use_compression):
mainroot = [] mainroot = []
filename_to_hash = {} filename_to_hash = {}
total_size = 0 total_size = 0
@ -242,7 +242,7 @@ def handle_tar(logger, tar):
obj[IDX_MODE] |= S_IFREG obj[IDX_MODE] |= S_IFREG
f = tar.extractfile(member) f = tar.extractfile(member)
file_hash = hash_fileobj(f) file_hash = hash_fileobj(f)
filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if USE_COMPRESSION else ".bin") filename = file_hash[0:HASH_LENGTH] + (".bin.zst" if use_compression else ".bin")
existing = filename_to_hash.get(filename) existing = filename_to_hash.get(filename)
assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash) assert existing is None or existing == file_hash, "Collision in short hash (%s and %s)" % (existing, file_hash)
filename_to_hash[filename] = file_hash filename_to_hash[filename] = file_hash