ladybird/Libraries/LibGfx/ImageFormats/WebPSharedLossless.h

84 lines
2.5 KiB
C
Raw Normal View History

/*
* Copyright (c) 2024, Nico Weber <thakis@chromium.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibCompress/Deflate.h>
namespace Gfx {
constexpr Array kCodeLengthCodeOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
// WebP-lossless's CanonicalCodes are almost identical to deflate's.
// One difference is that codes with a single element in webp-lossless consume 0 bits to produce that single element,
// while they consume 1 bit in Compress::CanonicalCode. This class wraps Compress::CanonicalCode to handle the case
// where the codes contain just a single element, and dispatches to Compress::CanonicalCode else.
class CanonicalCode {
public:
CanonicalCode() = default;
static ErrorOr<CanonicalCode> from_bytes(ReadonlyBytes);
ErrorOr<u32> read_symbol(LittleEndianInputBitStream&) const;
ErrorOr<void> write_symbol(LittleEndianOutputBitStream&, u32) const;
private:
explicit CanonicalCode(u32 single_symbol)
: m_code(single_symbol)
{
}
explicit CanonicalCode(Compress::CanonicalCode code)
: m_code(move(code))
{
}
Variant<u32, Compress::CanonicalCode> m_code { 0 };
};
LibGfx+LibCompress: WebPWriter performance regression reduction This moves both Gfx::CanonicalCode::write_symbol() and Compress::CanonicalCode::write_symbol() inline. It also adds `__attribute__((always_inline))` on the arguments to visit() in the latter. (ALWAYS_INLINE doesn't work on lambdas.) Numbers with `ministat`: I ran once: Build/lagom/bin/image -o test.bmp Base/res/wallpapers/sunset-retro.png and then ran to bench: ~/src/hack/bench.py -n 20 -o bench_foo1.txt \ Build/lagom/bin/image -o test.webp test.bmp ...and then `ministat bench_foo1.txt bench_foo2.txt` to compare. The previous commit increased the time for this command by 38% compared to the before state. With this, it's an 8.6% regression. So still a regression, but a smaller one. Or, in other words, this commit reduces times by 21% compared to the previous commit. Numbers with hyperfine are similar -- with this on top of the previous commit, this is a 7-11% regression, instead of an almost 50% regression. (A local branch that changes how we compute CanonicalCodes so that we actually compress a bit is perf-neutral since the image writing code doesn't change.) `hyperfine 'image -o test.webp test.bmp'`: * Before: 23.7 ms ± 0.7 ms (116 runs) * Previous commit: 33.2 ms ± 0.8 ms (82 runs) * This commit: 25.5 ms ± 0.7 ms (102 runs) `hyperfine 'animation -o wow.webp giphy.gif'`: * Before: 85.5 ms ± 2.0 ms (34 runs) * Previous commit: 127.7 ms ± 4.4 ms (22 runs) * This commit: 95.3 ms ± 2.1 ms (31 runs) `hyperfine 'animation -o wow.webp 7z7c.gif'`: * Before: 12.6 ms ± 0.6 ms (198 runs) * Previous commit: 16.5 ms ± 0.9 ms (153 runs) * This commit: 13.5 ms ± 0.6 ms (186 runs)
2024-05-08 18:57:53 -04:00
ALWAYS_INLINE ErrorOr<void> CanonicalCode::write_symbol(LittleEndianOutputBitStream& bit_stream, u32 symbol) const
{
TRY(m_code.visit(
[&](u32 single_code) __attribute__((always_inline)) -> ErrorOr<void> { VERIFY(symbol == single_code); return {}; },
LibGfx+LibCompress: WebPWriter performance regression reduction This moves both Gfx::CanonicalCode::write_symbol() and Compress::CanonicalCode::write_symbol() inline. It also adds `__attribute__((always_inline))` on the arguments to visit() in the latter. (ALWAYS_INLINE doesn't work on lambdas.) Numbers with `ministat`: I ran once: Build/lagom/bin/image -o test.bmp Base/res/wallpapers/sunset-retro.png and then ran to bench: ~/src/hack/bench.py -n 20 -o bench_foo1.txt \ Build/lagom/bin/image -o test.webp test.bmp ...and then `ministat bench_foo1.txt bench_foo2.txt` to compare. The previous commit increased the time for this command by 38% compared to the before state. With this, it's an 8.6% regression. So still a regression, but a smaller one. Or, in other words, this commit reduces times by 21% compared to the previous commit. Numbers with hyperfine are similar -- with this on top of the previous commit, this is a 7-11% regression, instead of an almost 50% regression. (A local branch that changes how we compute CanonicalCodes so that we actually compress a bit is perf-neutral since the image writing code doesn't change.) `hyperfine 'image -o test.webp test.bmp'`: * Before: 23.7 ms ± 0.7 ms (116 runs) * Previous commit: 33.2 ms ± 0.8 ms (82 runs) * This commit: 25.5 ms ± 0.7 ms (102 runs) `hyperfine 'animation -o wow.webp giphy.gif'`: * Before: 85.5 ms ± 2.0 ms (34 runs) * Previous commit: 127.7 ms ± 4.4 ms (22 runs) * This commit: 95.3 ms ± 2.1 ms (31 runs) `hyperfine 'animation -o wow.webp 7z7c.gif'`: * Before: 12.6 ms ± 0.6 ms (198 runs) * Previous commit: 16.5 ms ± 0.9 ms (153 runs) * This commit: 13.5 ms ± 0.6 ms (186 runs)
2024-05-08 18:57:53 -04:00
[&](Compress::CanonicalCode const& code) __attribute__((always_inline)) { return code.write_symbol(bit_stream, symbol); }));
return {};
}
// https://developers.google.com/speed/webp/docs/webp_lossless_bitstream_specification#61_overview
// "From here on, we refer to this set as a prefix code group."
class PrefixCodeGroup {
public:
PrefixCodeGroup() = default;
PrefixCodeGroup(PrefixCodeGroup&&) = default;
PrefixCodeGroup(PrefixCodeGroup const&) = delete;
CanonicalCode& operator[](int i) { return m_codes[i]; }
CanonicalCode const& operator[](int i) const { return m_codes[i]; }
private:
Array<CanonicalCode, 5> m_codes;
};
enum class ImageKind {
SpatiallyCoded,
EntropyCoded,
};
enum TransformType {
// predictor-tx = %b00 predictor-image
PREDICTOR_TRANSFORM = 0,
// color-tx = %b01 color-image
COLOR_TRANSFORM = 1,
// subtract-green-tx = %b10
SUBTRACT_GREEN_TRANSFORM = 2,
// color-indexing-tx = %b11 color-indexing-image
COLOR_INDEXING_TRANSFORM = 3,
};
}