ladybird/Libraries/LibCompress/Deflate.h

83 lines
2.7 KiB
C
Raw Normal View History

/*
* Copyright (c) 2020, the SerenityOS developers.
* Copyright (c) 2021, Idan Horowitz <idan.horowitz@serenityos.org>
* Copyright (c) 2025, Altomani Gianluca <altomanigianluca@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
LibGfx+LibCompress: WebPWriter performance regression reduction This moves both Gfx::CanonicalCode::write_symbol() and Compress::CanonicalCode::write_symbol() inline. It also adds `__attribute__((always_inline))` on the arguments to visit() in the latter. (ALWAYS_INLINE doesn't work on lambdas.) Numbers with `ministat`: I ran once: Build/lagom/bin/image -o test.bmp Base/res/wallpapers/sunset-retro.png and then ran to bench: ~/src/hack/bench.py -n 20 -o bench_foo1.txt \ Build/lagom/bin/image -o test.webp test.bmp ...and then `ministat bench_foo1.txt bench_foo2.txt` to compare. The previous commit increased the time for this command by 38% compared to the before state. With this, it's an 8.6% regression. So still a regression, but a smaller one. Or, in other words, this commit reduces times by 21% compared to the previous commit. Numbers with hyperfine are similar -- with this on top of the previous commit, this is a 7-11% regression, instead of an almost 50% regression. (A local branch that changes how we compute CanonicalCodes so that we actually compress a bit is perf-neutral since the image writing code doesn't change.) `hyperfine 'image -o test.webp test.bmp'`: * Before: 23.7 ms ± 0.7 ms (116 runs) * Previous commit: 33.2 ms ± 0.8 ms (82 runs) * This commit: 25.5 ms ± 0.7 ms (102 runs) `hyperfine 'animation -o wow.webp giphy.gif'`: * Before: 85.5 ms ± 2.0 ms (34 runs) * Previous commit: 127.7 ms ± 4.4 ms (22 runs) * This commit: 95.3 ms ± 2.1 ms (31 runs) `hyperfine 'animation -o wow.webp 7z7c.gif'`: * Before: 12.6 ms ± 0.6 ms (198 runs) * Previous commit: 16.5 ms ± 0.9 ms (153 runs) * This commit: 13.5 ms ± 0.6 ms (186 runs)
2024-05-08 18:57:53 -04:00
#include <AK/BitStream.h>
2023-02-09 03:11:50 +01:00
#include <AK/Stream.h>
#include <LibCompress/Zlib.h>
namespace Compress {
class CanonicalCode {
public:
CanonicalCode() = default;
2023-01-25 20:06:16 +01:00
ErrorOr<u32> read_symbol(LittleEndianInputBitStream&) const;
ErrorOr<void> write_symbol(LittleEndianOutputBitStream&, u32) const;
2022-04-01 20:58:27 +03:00
static CanonicalCode const& fixed_literal_codes();
static CanonicalCode const& fixed_distance_codes();
static ErrorOr<CanonicalCode> from_bytes(ReadonlyBytes);
private:
static constexpr size_t max_allowed_prefixed_code_length = 8;
struct PrefixTableEntry {
u16 symbol_value { 0 };
u16 code_length { 0 };
};
// Decompression - indexed by code
Vector<u16, 286> m_symbol_codes;
Vector<u16, 286> m_symbol_values;
Array<PrefixTableEntry, 1 << max_allowed_prefixed_code_length> m_prefix_table {};
size_t m_max_prefixed_code_length { 0 };
// Compression - indexed by symbol
// Deflate uses a maximum of 288 symbols (maximum of 32 for distances),
// but this is also used by webp, which can use up to 256 + 24 + (1 << 11) == 2328 symbols.
Vector<u16, 288> m_bit_codes {};
Vector<u16, 288> m_bit_code_lengths {};
};
LibGfx+LibCompress: WebPWriter performance regression reduction This moves both Gfx::CanonicalCode::write_symbol() and Compress::CanonicalCode::write_symbol() inline. It also adds `__attribute__((always_inline))` on the arguments to visit() in the latter. (ALWAYS_INLINE doesn't work on lambdas.) Numbers with `ministat`: I ran once: Build/lagom/bin/image -o test.bmp Base/res/wallpapers/sunset-retro.png and then ran to bench: ~/src/hack/bench.py -n 20 -o bench_foo1.txt \ Build/lagom/bin/image -o test.webp test.bmp ...and then `ministat bench_foo1.txt bench_foo2.txt` to compare. The previous commit increased the time for this command by 38% compared to the before state. With this, it's an 8.6% regression. So still a regression, but a smaller one. Or, in other words, this commit reduces times by 21% compared to the previous commit. Numbers with hyperfine are similar -- with this on top of the previous commit, this is a 7-11% regression, instead of an almost 50% regression. (A local branch that changes how we compute CanonicalCodes so that we actually compress a bit is perf-neutral since the image writing code doesn't change.) `hyperfine 'image -o test.webp test.bmp'`: * Before: 23.7 ms ± 0.7 ms (116 runs) * Previous commit: 33.2 ms ± 0.8 ms (82 runs) * This commit: 25.5 ms ± 0.7 ms (102 runs) `hyperfine 'animation -o wow.webp giphy.gif'`: * Before: 85.5 ms ± 2.0 ms (34 runs) * Previous commit: 127.7 ms ± 4.4 ms (22 runs) * This commit: 95.3 ms ± 2.1 ms (31 runs) `hyperfine 'animation -o wow.webp 7z7c.gif'`: * Before: 12.6 ms ± 0.6 ms (198 runs) * Previous commit: 16.5 ms ± 0.9 ms (153 runs) * This commit: 13.5 ms ± 0.6 ms (186 runs)
2024-05-08 18:57:53 -04:00
ALWAYS_INLINE ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& stream, u32 symbol) const
{
auto code = symbol < m_bit_codes.size() ? m_bit_codes[symbol] : 0u;
auto length = symbol < m_bit_code_lengths.size() ? m_bit_code_lengths[symbol] : 0u;
TRY(stream.write_bits(code, length));
return {};
}
class DeflateDecompressor final : public GenericZlibDecompressor {
public:
static ErrorOr<NonnullOwnPtr<DeflateDecompressor>> create(MaybeOwned<Stream>);
static ErrorOr<ByteBuffer> decompress_all(ReadonlyBytes);
private:
DeflateDecompressor(AK::FixedArray<u8> buffer, MaybeOwned<Stream> stream, z_stream* zstream)
: GenericZlibDecompressor(move(buffer), move(stream), zstream)
{
}
};
class DeflateCompressor final : public GenericZlibCompressor {
public:
static ErrorOr<NonnullOwnPtr<DeflateCompressor>> create(MaybeOwned<Stream>, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default);
static ErrorOr<ByteBuffer> compress_all(ReadonlyBytes, GenericZlibCompressionLevel = GenericZlibCompressionLevel::Default);
private:
DeflateCompressor(AK::FixedArray<u8> buffer, MaybeOwned<Stream> stream, z_stream* zstream)
: GenericZlibCompressor(move(buffer), move(stream), zstream)
{
}
};
}