/* * FFv1 codec * * Copyright (c) 2024 Lynne * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ #ifndef GOLOMB #ifdef CACHED_SYMBOL_READER shared uint8_t state[CONTEXT_SIZE]; #define WRITE(c, off, val) put_rac_direct(c, state[off], val) #else #define WRITE(c, off, val) put_rac(c, uint64_t(slice_state) + (state_off + off), val) #endif /* Note - only handles signed values */ void put_symbol(inout RangeCoder c, uint state_off, int v) { bool is_nil = (v == 0); WRITE(c, 0, is_nil); if (is_nil) return; const int a = abs(v); const int e = findMSB(a); for (int i = 0; i < e; i++) WRITE(c, 1 + min(i, 9), true); WRITE(c, 1 + min(e, 9), false); for (int i = e - 1; i >= 0; i--) WRITE(c, 22 + min(i, 9), bool(bitfieldExtract(a, i, 1))); WRITE(c, 22 - 11 + min(e, 10), v < 0); } void encode_line_pcm(inout SliceContext sc, readonly uimage2D img, ivec2 sp, int y, int p, int comp, int bits) { int w = sc.slice_dim.x; #ifdef CACHED_SYMBOL_READER if (gl_LocalInvocationID.x > 0) return; #endif #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif for (int x = 0; x < w; x++) { uint v = imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]; for (int i = (bits - 1); i >= 0; i--) put_rac_equi(sc.c, bool(bitfieldExtract(v, i, 1))); } } void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, ivec2 sp, int y, int p, int comp, int bits, uint8_t quant_table_idx, const int run_index) { int w = sc.slice_dim.x; #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif for (int x = 0; x < w; x++) { ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, quant_table_idx, extend_lookup[quant_table_idx] > 0); d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; if (d[0] < 0) d = -d; d[1] = fold(d[1], bits); uint context_off = state_off + CONTEXT_SIZE*d[0]; #ifdef CACHED_SYMBOL_READER u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x); state[gl_LocalInvocationID.x] = sb.v; barrier(); if (gl_LocalInvocationID.x == 0) #endif put_symbol(sc.c, context_off, d[1]); #ifdef CACHED_SYMBOL_READER barrier(); sb.v = state[gl_LocalInvocationID.x]; #endif } } #else /* GOLOMB */ void encode_line(inout SliceContext sc, readonly uimage2D img, uint state_off, ivec2 sp, int y, int p, int comp, int bits, uint8_t quant_table_idx, inout int run_index) { int w = sc.slice_dim.x; #ifndef RGB if (p > 0 && p < 3) { w >>= chroma_shift.x; sp >>= chroma_shift; } #endif int run_count = 0; bool run_mode = false; for (int x = 0; x < w; x++) { ivec2 d = get_pred(img, sp, ivec2(x, y), comp, w, quant_table_idx, extend_lookup[quant_table_idx] > 0); d[1] = int(imageLoad(img, sp + LADDR(ivec2(x, y)))[comp]) - d[1]; if (d[0] < 0) d = -d; d[1] = fold(d[1], bits); if (d[0] == 0) run_mode = true; if (run_mode) { if (d[1] != 0) { /* A very unlikely loop */ while (run_count >= 1 << log2_run[run_index]) { run_count -= 1 << log2_run[run_index]; run_index++; put_bits(sc.pb, 1, 1); } put_bits(sc.pb, 1 + log2_run[run_index], run_count); if (run_index != 0) run_index--; run_count = 0; run_mode = false; if (d[1] > 0) d[1]--; } else { run_count++; } } if (!run_mode) { VlcState sb = VlcState(uint64_t(slice_state) + state_off + VLC_STATE_SIZE*d[0]); Symbol sym = get_vlc_symbol(sb, d[1], bits); put_bits(sc.pb, sym.bits, sym.val); } } if (run_mode) { while (run_count >= (1 << log2_run[run_index])) { run_count -= 1 << log2_run[run_index]; run_index++; put_bits(sc.pb, 1, 1); } if (run_count > 0) put_bits(sc.pb, 1, 1); } } #endif #ifdef RGB ivec4 load_components(ivec2 pos) { ivec4 pix = ivec4(imageLoad(src[0], pos)); if (planar_rgb != 0) { for (int i = 1; i < (3 + transparency); i++) pix[i] = int(imageLoad(src[i], pos)[0]); } return ivec4(pix[fmt_lut[0]], pix[fmt_lut[1]], pix[fmt_lut[2]], pix[fmt_lut[3]]); } void transform_sample(inout ivec4 pix, ivec2 rct_coef) { pix.b -= pix.g; pix.r -= pix.g; pix.g += (pix.r*rct_coef.x + pix.b*rct_coef.y) >> 2; pix.b += rct_offset; pix.r += rct_offset; } void preload_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct) { for (uint x = gl_LocalInvocationID.x; x < w; x += gl_WorkGroupSize.x) { ivec2 lpos = sp + LADDR(ivec2(x, y)); ivec2 pos = sc.slice_pos + ivec2(x, y); ivec4 pix = load_components(pos); if (expectEXT(apply_rct, true)) transform_sample(pix, sc.slice_rct_coef); imageStore(tmp, lpos, pix); } } #endif void encode_slice(inout SliceContext sc, const uint slice_idx) { ivec2 sp = sc.slice_pos; #ifndef RGB int bits = bits_per_raw_sample; #else int bits = 9; if (bits != 8 || sc.slice_coding_mode != 0) bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1); sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE; #endif #ifndef GOLOMB if (sc.slice_coding_mode == 1) { #ifndef RGB for (int c = 0; c < components; c++) { int h = sc.slice_dim.y; if (c > 0 && c < 3) h >>= chroma_shift.y; /* Takes into account dual-plane YUV formats */ int p = min(c, planes - 1); int comp = c - p; for (int y = 0; y < h; y++) encode_line_pcm(sc, src[p], sp, y, p, comp, bits); } #else for (int y = 0; y < sc.slice_dim.y; y++) { preload_rgb(sc, sp, sc.slice_dim.x, y, false); encode_line_pcm(sc, tmp, sp, y, 0, 1, bits); encode_line_pcm(sc, tmp, sp, y, 0, 2, bits); encode_line_pcm(sc, tmp, sp, y, 0, 0, bits); if (transparency == 1) encode_line_pcm(sc, tmp, sp, y, 0, 3, bits); } #endif } else #endif { u8vec4 quant_table_idx = sc.quant_table_idx.xyyz; u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size; #ifndef RGB for (int c = 0; c < components; c++) { int run_index = 0; int h = sc.slice_dim.y; if (c > 0 && c < 3) h >>= chroma_shift.y; int p = min(c, planes - 1); int comp = c - p; for (int y = 0; y < h; y++) encode_line(sc, src[p], slice_state_off[c], sp, y, p, comp, bits, quant_table_idx[c], run_index); } #else int run_index = 0; for (int y = 0; y < sc.slice_dim.y; y++) { preload_rgb(sc, sp, sc.slice_dim.x, y, true); encode_line(sc, tmp, slice_state_off[0], sp, y, 0, 1, bits, quant_table_idx[0], run_index); encode_line(sc, tmp, slice_state_off[1], sp, y, 0, 2, bits, quant_table_idx[1], run_index); encode_line(sc, tmp, slice_state_off[2], sp, y, 0, 0, bits, quant_table_idx[2], run_index); if (transparency == 1) encode_line(sc, tmp, slice_state_off[3], sp, y, 0, 3, bits, quant_table_idx[3], run_index); } #endif } } void finalize_slice(inout SliceContext sc, const uint slice_idx) { #ifdef CACHED_SYMBOL_READER if (gl_LocalInvocationID.x > 0) return; #endif #ifdef GOLOMB uint32_t enc_len = sc.hdr_len + flush_put_bits(sc.pb); #else uint32_t enc_len = rac_terminate(sc.c); #endif u8buf bs = u8buf(sc.c.bytestream_start); /* Append slice length */ u8vec4 enc_len_p = unpack8(enc_len); bs[enc_len + 0].v = enc_len_p.z; bs[enc_len + 1].v = enc_len_p.y; bs[enc_len + 2].v = enc_len_p.x; enc_len += 3; /* Calculate and write CRC */ if (ec != 0) { bs[enc_len].v = uint8_t(0); enc_len++; uint32_t crc = crcref; for (int i = 0; i < enc_len; i++) crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8); if (crcref != 0x00000000) crc ^= 0x8CD88196; u8vec4 crc_p = unpack8(crc); bs[enc_len + 0].v = crc_p.x; bs[enc_len + 1].v = crc_p.y; bs[enc_len + 2].v = crc_p.z; bs[enc_len + 3].v = crc_p.w; enc_len += 4; } slice_results[slice_idx*2 + 0] = enc_len; slice_results[slice_idx*2 + 1] = uint64_t(bs) - uint64_t(out_data); } void main(void) { const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x; encode_slice(slice_ctx[slice_idx], slice_idx); finalize_slice(slice_ctx[slice_idx], slice_idx); }