vulkan_ffv1: convert to compile-time SPIR-V generation

This commit is contained in:
Lynne 2026-02-03 11:41:16 +01:00
parent 82f0818ff2
commit 3dceda7769
No known key found for this signature in database
GPG key ID: A2FEA5F03F034464
13 changed files with 757 additions and 527 deletions

2
configure vendored
View file

@ -3354,7 +3354,7 @@ av1_vulkan_hwaccel_deps="vulkan"
av1_vulkan_hwaccel_select="av1_decoder"
dpx_vulkan_hwaccel_deps="vulkan spirv_compiler"
dpx_vulkan_hwaccel_select="dpx_decoder"
ffv1_vulkan_hwaccel_deps="vulkan spirv_library"
ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
ffv1_vulkan_hwaccel_select="ffv1_decoder"
h263_vaapi_hwaccel_deps="vaapi"
h263_vaapi_hwaccel_select="h263_decoder"

View file

@ -36,6 +36,22 @@ int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
FFVkBuffer *vkb, FFV1Context *f);
typedef struct FFv1ShaderParams {
VkDeviceAddress slice_data;
VkDeviceAddress slice_state;
uint32_t extend_lookup[8];
uint16_t context_count[8];
int fmt_lut[4];
uint16_t img_size[2];
uint32_t plane_state_size;
uint32_t key_frame;
uint32_t crcref;
int micro_version;
} FFv1ShaderParams;
typedef struct FFv1VkRCTParameters {
int fmt_lut[4];
int offset;

View file

@ -7,10 +7,13 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
vulkan/ffv1_enc_setup.o vulkan/ffv1_enc.o \
vulkan/ffv1_rct_search.o
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
vulkan/ffv1_dec_reset.comp.spv.o \
vulkan/ffv1_dec_reset_golomb.comp.spv.o \
vulkan/ffv1_dec.comp.spv.o \
vulkan/ffv1_dec_golomb.comp.spv.o \
vulkan/ffv1_dec_rgb.comp.spv.o \
vulkan/ffv1_dec_rgb_golomb.comp.spv.o
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \
vulkan/prores_raw_idct.comp.spv.o

View file

@ -0,0 +1,252 @@
/*
* FFv1 codec
*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef VULKAN_FFV1_COMMON_H
#define VULKAN_FFV1_COMMON_H
#include "rangecoder.comp"
#ifdef GOLOMB
#include "ffv1_vlc.comp"
#endif
#define MAX_QUANT_TABLES 8
#define MAX_CONTEXT_INPUTS 5
#define MAX_QUANT_TABLE_SIZE 256
#define MAX_QUANT_TABLE_MASK (MAX_QUANT_TABLE_SIZE - 1)
layout (constant_id = 0) const int rgb_linecache = 2;
layout (constant_id = 1) const bool has_crc = false;
layout (constant_id = 2) const int version = 0;
layout (constant_id = 3) const int quant_table_count = 0;
layout (constant_id = 4) const bool has_extend_lookup = false;
layout (constant_id = 5) const int rct_offset = 0;
layout (constant_id = 6) const int colorspace = 0;
layout (constant_id = 7) const bool transparency = false;
layout (constant_id = 8) const bool planar_rgb = false;
layout (constant_id = 9) const int codec_planes = 0;
layout (constant_id = 10) const int color_planes = 0;
layout (constant_id = 11) const int planes = 0;
layout (constant_id = 12) const int bits_per_raw_sample = 0;
layout (constant_id = 13) const int chroma_shift_x = 0;
layout (constant_id = 14) const int chroma_shift_y = 0;
const ivec2 chroma_shift = ivec2(chroma_shift_x, chroma_shift_y);
layout (push_constant, scalar) uniform pushConstants {
u8buf slice_data;
u8buf slice_state;
bool extend_lookup[MAX_QUANT_TABLES];
uint16_t context_count[MAX_QUANT_TABLES];
ivec4 fmt_lut;
u16vec2 img_size;
uint plane_state_size;
bool key_frame;
uint32_t crcref;
int micro_version;
};
#define TYPE int32_t
#define VTYPE2 i32vec2
#define VTYPE3 i32vec3
struct SliceContext {
RangeCoder c;
#ifdef DECODE
GetBitContext gb;
#else
PutBitContext pb; /* 8*8 bytes */
#endif
ivec2 slice_dim;
ivec2 slice_pos;
ivec2 slice_rct_coef;
u8vec3 quant_table_idx;
uint slice_coding_mode;
bool slice_reset_contexts;
};
layout (set = 1, binding = 0) buffer slice_ctx_buf {
SliceContext slice_ctx[];
};
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
{
uint mpw = 1 << chroma_shift;
uint awidth = align(width, mpw);
if ((version < 4) || ((version == 4) && (micro_version < 3)))
return width * sx / num_h_slices;
sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
if (sx == awidth)
sx = width;
return sx;
}
#if defined(ENCODE) || defined(DECODE)
layout (set = 0, binding = 1, scalar) readonly uniform quant_buf {
int16_t quant_table[MAX_QUANT_TABLES]
[MAX_CONTEXT_INPUTS]
[MAX_QUANT_TABLE_SIZE];
};
/* -1, { -1, 0 } */
int predict(int L, ivec2 top)
{
return mid_pred(L, L + top[1] - top[0], top[1]);
}
/* { -2, -1 }, { -1, 0, 1 }, 0 */
int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)
{
const int LT = top_l[0]; /* -1 */
const int T = top_l[1]; /* 0 */
const int RT = top_l[2]; /* 1 */
const int L = cur_l[1]; /* -1 */
int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];
if ((quant_table[quant_table_idx][3][127] == 0) &&
(quant_table[quant_table_idx][4][127] == 0))
return base;
const int TT = top2; /* -2 */
const int LL = cur_l[0]; /* -2 */
return base +
quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];
}
const uint32_t log2_run[41] = {
0, 0, 0, 0, 1, 1, 1, 1,
2, 2, 2, 2, 3, 3, 3, 3,
4, 4, 5, 5, 6, 6, 7, 7,
8, 9, 10, 11, 12, 13, 14, 15,
16, 17, 18, 19, 20, 21, 22, 23,
24,
};
#ifdef RGB
#define RGB_LBUF (rgb_linecache - 1)
#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
{
const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off;
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]),
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]);
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
if (has_extend_lookup && extend_lookup) {
TYPE cur2 = TYPE(0);
if (expectEXT(off.x > 0, true)) {
const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
}
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
/* top-2 became current upon swap when rgb_linecache == 2 */
ivec2 top2_off = off;
if (rgb_linecache != 2)
top2_off += ivec2(0, -2);
TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(top2_off))[comp]);
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
}
/* context, prediction */
return ivec2(base, predict(cur, VTYPE2(top)));
}
#else
#define LADDR(p) (p)
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
{
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
sp += off;
VTYPE3 top = VTYPE3(TYPE(0),
TYPE(0),
TYPE(0));
if (off.y > 0 && off != ivec2(0, 1))
top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
if (off.y > 0) {
top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
}
TYPE cur = TYPE(0);
if (off != ivec2(0, 0))
cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
if (has_extend_lookup && extend_lookup) {
TYPE cur2 = TYPE(0);
if (off.x > 0 && off != ivec2(1, 0)) {
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
}
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
TYPE top2 = TYPE(0);
if (off.y > 1)
top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
}
/* context, prediction */
return ivec2(base, predict(cur, VTYPE2(top)));
}
#endif /* RGB */
#endif /* ENCODE || DECODE */
#endif /* VULKAN_FFV1_COMMON_H */

View file

@ -20,14 +20,23 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef GOLOMB
#ifdef CACHED_SYMBOL_READER
shared uint8_t state[CONTEXT_SIZE];
#define READ(c, off) get_rac_direct(c, state[off])
#else
#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off))
#endif
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#define DECODE
#include "common.comp"
#include "ffv1_common.glsl"
layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf {
u32vec2 slice_offsets[];
};
layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
uint32_t slice_status[];
};
layout (set = 1, binding = 3) uniform uimage2D dec[];
#ifndef GOLOMB
#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off))
int get_isymbol(inout RangeCoder c, uint state_off)
{
if (READ(c, 0))
@ -56,11 +65,6 @@ int get_isymbol(inout RangeCoder c, uint state_off)
void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits)
{
#ifdef CACHED_SYMBOL_READER
if (gl_LocalInvocationID.x > 0)
return;
#endif
#ifndef RGB
if (p > 0 && p < 3) {
w = ceil_rshift(w, chroma_shift.x);
@ -79,7 +83,7 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int b
void decode_line(inout SliceContext sc, ivec2 sp, int w,
int y, int p, int bits, uint state_off,
uint8_t quant_table_idx, const int run_index)
uint8_t quant_table_idx, int run_index)
{
#ifndef RGB
if (p > 0 && p < 3) {
@ -90,34 +94,28 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
for (int x = 0; x < w; x++) {
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx, extend_lookup[quant_table_idx] > 0);
quant_table_idx, extend_lookup[quant_table_idx]);
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
#ifdef CACHED_SYMBOL_READER
u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);
state[gl_LocalInvocationID.x] = sb.v;
barrier();
if (gl_LocalInvocationID.x == 0) {
#endif
int diff = get_isymbol(sc.c, context_off);
if (pr[0] < 0)
diff = -diff;
int diff = get_isymbol(sc.c, context_off);
if (pr[0] < 0)
diff = -diff;
uint v = zero_extend(pr[1] + diff, bits);
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
#ifdef CACHED_SYMBOL_READER
}
barrier();
sb.v = state[gl_LocalInvocationID.x];
#endif
uint v = zero_extend(pr[1] + diff, bits);
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
}
}
#else
void golomb_init(inout SliceContext sc)
{
if (version == 3 && micro_version > 1 || version > 3)
get_rac_internal(sc.c, sc.c.range * 129 >> 8);
#else /* GOLOMB */
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
}
void decode_line(inout SliceContext sc, ivec2 sp, int w,
int y, int p, int bits, uint state_off,
@ -137,7 +135,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
ivec2 pos = sp + ivec2(x, y);
int diff;
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
quant_table_idx, extend_lookup[quant_table_idx] > 0);
quant_table_idx, extend_lookup[quant_table_idx]);
uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]);
VlcState sb = VlcState(uint64_t(slice_state) + context_off);
@ -209,7 +207,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
pix.r = int(imageLoad(dec[2], lpos)[0]);
pix.g = int(imageLoad(dec[0], lpos)[0]);
pix.b = int(imageLoad(dec[1], lpos)[0]);
if (transparency != 0)
if (transparency)
pix.a = int(imageLoad(dec[3], lpos)[0]);
if (expectEXT(apply_rct, true))
@ -219,7 +217,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
pix[fmt_lut[2]], pix[fmt_lut[3]]);
imageStore(dst[0], pos, pix);
if (planar_rgb != 0) {
if (planar_rgb) {
for (int i = 1; i < color_planes; i++)
imageStore(dst[i], pos, ivec4(pix[i]));
}
@ -232,71 +230,73 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
int w = sc.slice_dim.x;
ivec2 sp = sc.slice_pos;
#ifndef RGB
int bits = bits_per_raw_sample;
#else
int bits = 9;
#ifdef RGB
bits = 9;
if (bits != 8 || sc.slice_coding_mode != 0)
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
#endif
/* PCM coding */
#ifndef GOLOMB
/* PCM coding */
if (sc.slice_coding_mode == 1) {
#ifndef RGB
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
if (p > 0 && p < 3)
h = ceil_rshift(h, chroma_shift.y);
for (int y = 0; y < h; y++)
decode_line_pcm(sc, sp, w, y, p, bits);
}
#else
#ifdef RGB
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line_pcm(sc, sp, w, y, p, bits);
writeout_rgb(sc, sp, w, y, false);
}
#endif
} else
/* Arithmetic coding */
#endif
{
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;
#ifndef RGB
#else
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
if (p > 0 && p < 3)
h = ceil_rshift(h, chroma_shift.y);
int run_index = 0;
for (int y = 0; y < h; y++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], quant_table_idx[p], run_index);
}
#else
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], quant_table_idx[p], run_index);
writeout_rgb(sc, sp, w, y, true);
decode_line_pcm(sc, sp, w, y, p, bits);
}
#endif
return;
}
#endif
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
u32vec4 slice_state_off = (slice_idx*codec_planes +
uvec4(0, 1, 1, 2))*plane_state_size;
#ifdef GOLOMB
golomb_init(sc);
#endif
#ifdef RGB
int run_index = 0;
for (int y = 0; y < sc.slice_dim.y; y++) {
for (int p = 0; p < color_planes; p++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], quant_table_idx[p], run_index);
writeout_rgb(sc, sp, w, y, true);
}
#else
for (int p = 0; p < planes; p++) {
int h = sc.slice_dim.y;
if (p > 0 && p < 3)
h = ceil_rshift(h, chroma_shift.y);
int run_index = 0;
for (int y = 0; y < h; y++)
decode_line(sc, sp, w, y, p, bits,
slice_state_off[p], quant_table_idx[p], run_index);
}
#endif
}
void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
decode_slice(slice_ctx[slice_idx], slice_idx);
uint32_t status = corrupt ? uint32_t(corrupt) : overread;

View file

@ -0,0 +1,27 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#define GOLOMB
#include "ffv1_dec.comp.glsl"

View file

@ -0,0 +1,63 @@
/*
* FFv1 codec
*
* Copyright (c) 2024 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#include "common.comp"
#include "ffv1_common.glsl"
void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
if (!key_frame && !slice_ctx[slice_idx].slice_reset_contexts)
return;
const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];
uint contexts = context_count[qidx];
uint64_t slice_state_off = uint64_t(slice_state) +
slice_idx*plane_state_size*codec_planes;
#ifdef GOLOMB
uint64_t start = slice_state_off +
(gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) +
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {
VlcState sb = VlcState(start);
sb.drift = int16_t(0);
sb.error_sum = uint16_t(4);
sb.bias = int8_t(0);
sb.count = uint8_t(1);
start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
}
#else
uint64_t start = slice_state_off +
gl_WorkGroupID.z*plane_state_size +
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
u32buf(start).v = 0x80808080;
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
}
#endif
}

View file

@ -0,0 +1,27 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#define GOLOMB
#include "ffv1_dec_reset.comp.glsl"

View file

@ -0,0 +1,30 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#extension GL_EXT_shader_image_load_formatted : require
layout (set = 1, binding = 4) writeonly uniform uimage2D dst[];
#define RGB
#include "ffv1_dec.comp.glsl"

View file

@ -0,0 +1,27 @@
/*
* FFv1 codec
*
* Copyright (c) 2026 Lynne <dev@lynne.ee>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#define GOLOMB
#include "ffv1_dec_rgb.comp.glsl"

View file

@ -20,6 +20,23 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#pragma shader_stage(compute)
#extension GL_GOOGLE_include_directive : require
#include "common.comp"
#include "ffv1_common.glsl"
layout (set = 0, binding = 1, scalar) uniform crc_ieee_buf {
uint32_t crc_ieee[256];
};
layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf {
u32vec2 slice_offsets[];
};
layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
uint32_t slice_status[];
};
uint8_t setup_state[CONTEXT_SIZE];
uint get_usymbol(inout RangeCoder c)
@ -98,21 +115,9 @@ bool decode_slice_header(inout SliceContext sc)
return false;
}
void golomb_init(inout SliceContext sc)
{
if (version == 3 && micro_version > 1 || version > 3) {
setup_state[0] = uint8_t(129);
get_rac_direct(sc.c, setup_state[0]);
}
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
}
void main(void)
{
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
u8buf bs = u8buf(slice_data + slice_offsets[slice_idx].x);
uint32_t slice_size = slice_offsets[slice_idx].y;
@ -125,10 +130,7 @@ void main(void)
decode_slice_header(slice_ctx[slice_idx]);
if (golomb == 1)
golomb_init(slice_ctx[slice_idx]);
if (ec != 0 && check_crc != 0) {
if (has_crc) {
uint32_t crc = crcref;
for (int i = 0; i < slice_size; i++)
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);

View file

@ -20,6 +20,14 @@
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef CONTEXT_SIZE
#define CONTEXT_SIZE 32
layout (set = 0, binding = 0, scalar) uniform rangecoder_buf {
uint8_t zero_one_state[512];
};
#endif
struct RangeCoder {
uint64_t bytestream_start;
uint64_t bytestream;
@ -42,8 +50,6 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
r.outstanding_byte = uint8_t(0xFF);
}
#if !defined(DECODE)
#ifdef FULL_RENORM
/* Full renorm version that can handle outstanding_byte == 0xFF */
void renorm_encoder(inout RangeCoder c)
@ -178,8 +184,6 @@ uint32_t rac_terminate(inout RangeCoder c)
return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
}
#else
/* Decoder */
uint overread = 0;
bool corrupt = false;
@ -243,5 +247,3 @@ bool get_rac_equi(inout RangeCoder c)
{
return get_rac_internal(c, c.range >> 1);
}
#endif

View file

@ -23,18 +23,30 @@
#include "ffv1.h"
#include "ffv1_vulkan.h"
#include "libavutil/vulkan_spirv.h"
#include "libavutil/mem.h"
#define RGB_LINECACHE 2
extern const char *ff_source_common_comp;
extern const char *ff_source_rangecoder_comp;
extern const char *ff_source_ffv1_vlc_comp;
extern const char *ff_source_ffv1_common_comp;
extern const char *ff_source_ffv1_dec_setup_comp;
extern const char *ff_source_ffv1_reset_comp;
extern const char *ff_source_ffv1_dec_comp;
extern const unsigned char ff_ffv1_dec_setup_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_setup_comp_spv_len;
extern const unsigned char ff_ffv1_dec_reset_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_reset_comp_spv_len;
extern const unsigned char ff_ffv1_dec_reset_golomb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_reset_golomb_comp_spv_len;
extern const unsigned char ff_ffv1_dec_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_comp_spv_len;
extern const unsigned char ff_ffv1_dec_rgb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_rgb_comp_spv_len;
extern const unsigned char ff_ffv1_dec_golomb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_golomb_comp_spv_len;
extern const unsigned char ff_ffv1_dec_rgb_golomb_comp_spv_data[];
extern const unsigned int ff_ffv1_dec_rgb_golomb_comp_spv_len;
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
.codec_id = AV_CODEC_ID_FFV1,
@ -64,80 +76,15 @@ typedef struct FFv1VulkanDecodeContext {
FFVulkanShader reset;
FFVulkanShader decode;
FFVkBuffer rangecoder_static_buf;
FFVkBuffer rangecoder_buf;
FFVkBuffer quant_buf;
FFVkBuffer crc_tab_buf;
FFVkBuffer crc_buf;
AVBufferPool *slice_state_pool;
AVBufferPool *slice_offset_pool;
AVBufferPool *slice_status_pool;
} FFv1VulkanDecodeContext;
typedef struct FFv1VkParameters {
VkDeviceAddress slice_data;
VkDeviceAddress slice_state;
int fmt_lut[4];
uint32_t img_size[2];
uint32_t chroma_shift[2];
uint32_t plane_state_size;
uint32_t crcref;
int rct_offset;
uint8_t extend_lookup[8];
uint8_t bits_per_raw_sample;
uint8_t quant_table_count;
uint8_t version;
uint8_t micro_version;
uint8_t key_frame;
uint8_t planes;
uint8_t codec_planes;
uint8_t color_planes;
uint8_t transparency;
uint8_t planar_rgb;
uint8_t colorspace;
uint8_t ec;
uint8_t golomb;
uint8_t check_crc;
uint8_t padding[3];
} FFv1VkParameters;
static void add_push_data(FFVulkanShader *shd)
{
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLC(1, u8buf slice_data; );
GLSLC(1, u8buf slice_state; );
GLSLC(0, );
GLSLC(1, ivec4 fmt_lut; );
GLSLC(1, uvec2 img_size; );
GLSLC(1, uvec2 chroma_shift; );
GLSLC(0, );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint32_t crcref; );
GLSLC(1, int rct_offset; );
GLSLC(0, );
GLSLC(1, uint8_t extend_lookup[8]; );
GLSLC(1, uint8_t bits_per_raw_sample; );
GLSLC(1, uint8_t quant_table_count; );
GLSLC(1, uint8_t version; );
GLSLC(1, uint8_t micro_version; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t planes; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t color_planes; );
GLSLC(1, uint8_t transparency; );
GLSLC(1, uint8_t planar_rgb; );
GLSLC(1, uint8_t colorspace; );
GLSLC(1, uint8_t ec; );
GLSLC(1, uint8_t golomb; );
GLSLC(1, uint8_t check_crc; );
GLSLC(1, uint8_t padding[3]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
VK_SHADER_STAGE_COMPUTE_BIT);
}
static int vk_ffv1_start_frame(AVCodecContext *avctx,
const AVBufferRef *buffer_ref,
av_unused const uint8_t *buffer,
@ -291,13 +238,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
FFV1Context *f = avctx->priv_data;
FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
FFv1VkParameters pd;
FFv1VkResetParameters pd_reset;
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
enum AVPixelFormat sw_format = hwfc->sw_format;
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
!(sw_format == AV_PIX_FMT_YA8);
int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components;
@ -408,39 +352,25 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
VK_FORMAT_UNDEFINED);
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
pd = (FFv1VkParameters) {
FFv1ShaderParams pd = {
.slice_data = slices_buf->address,
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
.img_size[0] = f->picture.f->width,
.img_size[1] = f->picture.f->height,
.chroma_shift[0] = f->chroma_h_shift,
.chroma_shift[1] = f->chroma_v_shift,
.plane_state_size = fp->plane_state_size,
.crcref = f->crcref,
.rct_offset = 1 << bits,
.bits_per_raw_sample = bits,
.quant_table_count = f->quant_table_count,
.version = f->version,
.micro_version = f->micro_version,
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
.planes = av_pix_fmt_count_planes(sw_format),
.codec_planes = f->plane_count,
.color_planes = color_planes,
.transparency = f->transparency,
.planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
(ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
.colorspace = f->colorspace,
.ec = f->ec,
.golomb = f->ac == AC_GOLOMB_RICE,
.check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
.crcref = f->crcref,
.micro_version = f->micro_version,
};
for (int i = 0; i < f->quant_table_count; i++)
pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
(f->quant_tables[i][4][127] != 0);
for (int i = 0; i < f->quant_table_count; i++) {
pd.context_count[i] = f->context_count[i];
pd.extend_lookup[i] = f->quant_tables[i][3][127] ||
f->quant_tables[i][4][127];
}
/* For some reason the C FFv1 encoder/decoder treats these differently */
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
@ -451,7 +381,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
0, sizeof(FFv1ShaderParams), &pd);
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
@ -476,21 +406,9 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
VK_FORMAT_UNDEFINED);
ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
pd_reset = (FFv1VkResetParameters) {
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
.plane_state_size = fp->plane_state_size,
.codec_planes = f->plane_count,
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
.version = f->version,
.micro_version = f->micro_version,
};
for (int i = 0; i < f->quant_table_count; i++)
pd_reset.context_count[i] = f->context_count[i];
ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd_reset), &pd_reset);
0, sizeof(FFv1ShaderParams), &pd);
/* Sync between setup and reset shaders */
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
@ -530,27 +448,33 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
slice_state,
0, fp->slice_data_size*f->slice_count,
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
decode_dst, decode_dst_view,
1, 1,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
1, 1, 0,
slice_offset,
0, 2*f->slice_count*sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
1, 2, 0,
slice_status,
0, 2*f->slice_count*sizeof(uint32_t),
VK_FORMAT_UNDEFINED);
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
decode_dst, decode_dst_view,
1, 3,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
if (is_rgb)
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
f->picture.f, vp->view.out,
1, 3,
1, 4,
VK_IMAGE_LAYOUT_GENERAL,
VK_NULL_HANDLE);
ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
VK_SHADER_STAGE_COMPUTE_BIT,
0, sizeof(pd), &pd);
0, sizeof(FFv1ShaderParams), &pd);
/* Sync probabilities between reset and decode shaders */
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
@ -602,329 +526,175 @@ fail:
return 0;
}
static void define_shared_code(FFVulkanShader *shd, int use32bit)
{
int smp_bits = use32bit ? 32 : 16;
GLSLC(0, #define DECODE );
av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
GLSLF(0, #define TYPE int%i_t ,smp_bits);
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
GLSLD(ff_source_rangecoder_comp);
GLSLD(ff_source_ffv1_common_comp);
}
static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd)
FFVkExecPool *pool, FFVulkanShader *shd,
VkSpecializationInfo *sl)
{
int err;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(uint32_t []) { 1, 1, 1 }, 0);
RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
1, 1, 1,
0));
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
VK_SHADER_STAGE_COMPUTE_BIT);
/* Common codec header */
GLSLD(ff_source_common_comp);
add_push_data(shd);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
const FFVulkanDescriptorSetBinding desc_set_const[] = {
{ /* rangecoder_buf */
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "crc_ieee_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint32_t crc_ieee[256];",
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
{ /* crc_ieee_buf */
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0);
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
define_shared_code(shd, 0 /* Irrelevant */);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx",
.buf_elems = f->max_slice_count,
const FFVulkanDescriptorSetBinding desc_set[] = {
{ /* slice_data_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "slice_offsets_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "readonly",
.buf_content = "u32vec2 slice_offsets",
.buf_elems = 2*f->max_slice_count,
{ /* slice_offsets_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "slice_status_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "writeonly",
.buf_content = "uint32_t slice_status",
.buf_elems = 2*f->max_slice_count,
{ /* slice_status_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0);
GLSLD(ff_source_ffv1_dec_setup_comp);
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_link(s, shd,
ff_ffv1_dec_setup_comp_spv_data,
ff_ffv1_dec_setup_comp_spv_len, "main"));
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd, int ac)
FFVkExecPool *pool, FFVulkanShader *shd,
VkSpecializationInfo *sl, int ac)
{
int err;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
wg_dim, 1, 1,
0));
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(uint32_t []) { wg_dim, 1, 1 }, 0);
if (ac == AC_GOLOMB_RICE)
av_bprintf(&shd->src, "#define GOLOMB\n");
/* Common codec header */
GLSLD(ff_source_common_comp);
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
GLSLC(1, u8buf slice_state; );
GLSLC(1, uint plane_state_size; );
GLSLC(1, uint8_t codec_planes; );
GLSLC(1, uint8_t key_frame; );
GLSLC(1, uint8_t version; );
GLSLC(1, uint8_t micro_version; );
GLSLC(1, uint8_t padding[1]; );
GLSLC(0, }; );
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
VK_SHADER_STAGE_COMPUTE_BIT);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
const FFVulkanDescriptorSetBinding desc_set_const[] = {
{ /* rangecoder_buf */
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 1, 1, 0);
const FFVulkanDescriptorSetBinding desc_set[] = {
{ /* slice_data_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0);
define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
if (ac == AC_GOLOMB_RICE)
GLSLD(ff_source_ffv1_vlc_comp);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.mem_quali = "readonly",
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx",
.buf_elems = f->max_slice_count,
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
GLSLD(ff_source_ffv1_reset_comp);
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
RET(ff_vk_shader_link(s, shd,
ff_ffv1_dec_reset_golomb_comp_spv_data,
ff_ffv1_dec_reset_golomb_comp_spv_len, "main"));
else
RET(ff_vk_shader_link(s, shd,
ff_ffv1_dec_reset_comp_spv_data,
ff_ffv1_dec_reset_comp_spv_len, "main"));
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
FFVulkanShader *shd,
FFVkExecPool *pool, FFVulkanShader *shd,
AVHWFramesContext *dec_frames_ctx,
AVHWFramesContext *out_frames_ctx,
int ac, int rgb)
VkSpecializationInfo *sl, int ac, int rgb)
{
int err;
FFVulkanDescriptorSetBinding *desc_set;
uint8_t *spv_data;
size_t spv_len;
void *spv_opaque = NULL;
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
(uint32_t []) { 1, 1, 1 }, 0);
int use_cached_reader = ac != AC_GOLOMB_RICE &&
s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
VK_SHADER_STAGE_COMPUTE_BIT);
RET(ff_vk_shader_init(s, shd, "ffv1_dec",
VK_SHADER_STAGE_COMPUTE_BIT,
(const char *[]) { "GL_EXT_buffer_reference",
"GL_EXT_buffer_reference2" }, 2,
use_cached_reader ? CONTEXT_SIZE : 1, 1, 1,
0));
if (ac == AC_GOLOMB_RICE)
av_bprintf(&shd->src, "#define GOLOMB\n");
if (rgb)
av_bprintf(&shd->src, "#define RGB\n");
if (use_cached_reader)
av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n");
/* Common codec header */
GLSLD(ff_source_common_comp);
add_push_data(shd);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "rangecoder_static_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "uint8_t zero_one_state[512];",
const FFVulkanDescriptorSetBinding desc_set_const[] = {
{ /* rangecoder_buf */
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "quant_buf",
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_layout = "scalar",
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
{ /* quant_buf */
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
};
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0);
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
define_shared_code(shd, f->use32bit);
if (ac == AC_GOLOMB_RICE)
GLSLD(ff_source_ffv1_vlc_comp);
desc_set = (FFVulkanDescriptorSetBinding []) {
{
.name = "slice_data_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.buf_content = "SliceContext slice_ctx",
.buf_elems = f->max_slice_count,
const FFVulkanDescriptorSetBinding desc_set[] = {
{ /* slice_data_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "dec",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
{ /* slice_offsets_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "slice_status_buf",
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.mem_quali = "writeonly",
.buf_content = "uint32_t slice_status",
.buf_elems = 2*f->max_slice_count,
{ /* slice_status_buf */
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
},
{
.name = "dst",
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.dimensions = 2,
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
FF_VK_REP_NATIVE),
.mem_quali = "writeonly",
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
{ /* dec */
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
},
{ /* dst */
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
},
};
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0));
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4 + rgb, 0, 0);
GLSLD(ff_source_ffv1_dec_comp);
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
&spv_opaque));
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
if (ac == AC_GOLOMB_RICE) {
if (rgb)
ff_vk_shader_link(s, shd,
ff_ffv1_dec_rgb_golomb_comp_spv_data,
ff_ffv1_dec_rgb_golomb_comp_spv_len, "main");
else
ff_vk_shader_link(s, shd,
ff_ffv1_dec_golomb_comp_spv_data,
ff_ffv1_dec_golomb_comp_spv_len, "main");
} else {
if (rgb)
ff_vk_shader_link(s, shd,
ff_ffv1_dec_rgb_comp_spv_data,
ff_ffv1_dec_rgb_comp_spv_len, "main");
else
ff_vk_shader_link(s, shd,
ff_ffv1_dec_comp_spv_data,
ff_ffv1_dec_comp_spv_len, "main");
}
RET(ff_vk_shader_register_exec(s, pool, shd));
fail:
if (spv_opaque)
spv->free_shader(spv, &spv_opaque);
return err;
}
@ -954,7 +724,8 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
err = av_hwframe_ctx_init(*dst);
if (err < 0) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
av_log(avctx, AV_LOG_ERROR,
"Unable to initialize frame pool with format %s: %s\n",
av_get_pix_fmt_name(sw_format), av_err2str(err));
av_buffer_unref(dst);
return err;
@ -973,9 +744,9 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
ff_vk_shader_free(&ctx->s, &fv->reset);
ff_vk_shader_free(&ctx->s, &fv->decode);
ff_vk_free_buf(&ctx->s, &fv->rangecoder_buf);
ff_vk_free_buf(&ctx->s, &fv->quant_buf);
ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
ff_vk_free_buf(&ctx->s, &fv->crc_buf);
av_buffer_pool_uninit(&fv->slice_state_pool);
av_buffer_pool_uninit(&fv->slice_offset_pool);
@ -991,18 +762,11 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
FFVulkanDecodeShared *ctx = NULL;
FFv1VulkanDecodeContext *fv;
FFVkSPIRVCompiler *spv;
if (f->version < 3 ||
(f->version == 4 && f->micro_version > 3))
return AVERROR(ENOTSUP);
spv = ff_vk_spirv_init();
if (!spv) {
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
return AVERROR_EXTERNAL;
}
err = ff_vk_decode_init(avctx);
if (err < 0)
return err;
@ -1019,6 +783,8 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
AVHWFramesContext *dctx = hwfc;
enum AVPixelFormat sw_format = hwfc->sw_format;
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sw_format);
int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components;
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
!(sw_format == AV_PIX_FMT_YA8);
@ -1029,63 +795,78 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
dctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
}
SPEC_LIST_CREATE(sl, 15, 15*sizeof(uint32_t))
if (RGB_LINECACHE != 2)
SPEC_LIST_ADD(sl, 0, 32, RGB_LINECACHE);
if (f->ec && !!(avctx->err_recognition & AV_EF_CRCCHECK))
SPEC_LIST_ADD(sl, 1, 32, 1);
SPEC_LIST_ADD(sl, 2, 32, f->version);
SPEC_LIST_ADD(sl, 3, 32, f->quant_table_count);
for (int i = 0; i < f->quant_table_count; i++) {
if (f->quant_tables[i][3][127] || f->quant_tables[i][4][127]) {
SPEC_LIST_ADD(sl, 4, 32, 1);
break;
}
}
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
SPEC_LIST_ADD(sl, 5, 32, 1 << bits);
SPEC_LIST_ADD(sl, 6, 32, f->colorspace);
SPEC_LIST_ADD(sl, 7, 32, f->transparency);
SPEC_LIST_ADD(sl, 8, 32, ff_vk_mt_is_np_rgb(sw_format) &&
(desc->flags & AV_PIX_FMT_FLAG_PLANAR));
SPEC_LIST_ADD(sl, 9, 32, f->plane_count);
SPEC_LIST_ADD(sl, 10, 32, color_planes);
SPEC_LIST_ADD(sl, 11, 32, av_pix_fmt_count_planes(sw_format));
SPEC_LIST_ADD(sl, 12, 32, bits);
SPEC_LIST_ADD(sl, 13, 32, f->chroma_h_shift);
SPEC_LIST_ADD(sl, 14, 32, f->chroma_v_shift);
/* Setup shader */
RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup));
RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, &fv->setup, sl));
/* Reset shader */
RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool,
spv, &fv->reset, f->ac));
RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, &fv->reset, sl, f->ac));
/* Decode shaders */
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool,
spv, &fv->decode,
dctx,
hwfc,
f->ac,
is_rgb));
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
dctx, hwfc, sl, f->ac, is_rgb));
/* Range coder data */
RET(ff_ffv1_vk_init_state_transition_data(&ctx->s,
&fv->rangecoder_static_buf,
f));
/* Quantization table data */
RET(ff_ffv1_vk_init_quant_table_data(&ctx->s,
&fv->quant_buf,
f));
/* CRC table buffer */
RET(ff_ffv1_vk_init_crc_table_data(&ctx->s,
&fv->crc_tab_buf,
f));
/* Init static data */
RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, &fv->rangecoder_buf, f));
RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, &fv->crc_buf, f));
RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, &fv->quant_buf, f));
/* Update setup global descriptors */
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
&fv->setup, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
&fv->rangecoder_buf,
0, 512*sizeof(uint8_t),
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
&fv->setup, 0, 1, 0,
&fv->crc_tab_buf,
0, fv->crc_tab_buf.size,
&fv->crc_buf,
0, 256*sizeof(uint32_t),
VK_FORMAT_UNDEFINED));
/* Update decode global descriptors */
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
&fv->decode, 0, 0, 0,
&fv->rangecoder_static_buf,
0, fv->rangecoder_static_buf.size,
&fv->rangecoder_buf,
0, 512*sizeof(uint8_t),
VK_FORMAT_UNDEFINED));
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
&fv->decode, 0, 1, 0,
&fv->quant_buf,
0, fv->quant_buf.size,
0, VK_WHOLE_SIZE,
VK_FORMAT_UNDEFINED));
fail:
spv->uninit(&spv);
return err;
}