mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-04-18 16:40:23 +00:00
vulkan_ffv1: convert to compile-time SPIR-V generation
This commit is contained in:
parent
82f0818ff2
commit
3dceda7769
13 changed files with 757 additions and 527 deletions
2
configure
vendored
2
configure
vendored
|
|
@ -3354,7 +3354,7 @@ av1_vulkan_hwaccel_deps="vulkan"
|
|||
av1_vulkan_hwaccel_select="av1_decoder"
|
||||
dpx_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
dpx_vulkan_hwaccel_select="dpx_decoder"
|
||||
ffv1_vulkan_hwaccel_deps="vulkan spirv_library"
|
||||
ffv1_vulkan_hwaccel_deps="vulkan spirv_compiler"
|
||||
ffv1_vulkan_hwaccel_select="ffv1_decoder"
|
||||
h263_vaapi_hwaccel_deps="vaapi"
|
||||
h263_vaapi_hwaccel_select="h263_decoder"
|
||||
|
|
|
|||
|
|
@ -36,6 +36,22 @@ int ff_ffv1_vk_init_quant_table_data(FFVulkanContext *s,
|
|||
int ff_ffv1_vk_init_crc_table_data(FFVulkanContext *s,
|
||||
FFVkBuffer *vkb, FFV1Context *f);
|
||||
|
||||
typedef struct FFv1ShaderParams {
|
||||
VkDeviceAddress slice_data;
|
||||
VkDeviceAddress slice_state;
|
||||
|
||||
uint32_t extend_lookup[8];
|
||||
uint16_t context_count[8];
|
||||
|
||||
int fmt_lut[4];
|
||||
uint16_t img_size[2];
|
||||
|
||||
uint32_t plane_state_size;
|
||||
uint32_t key_frame;
|
||||
uint32_t crcref;
|
||||
int micro_version;
|
||||
} FFv1ShaderParams;
|
||||
|
||||
typedef struct FFv1VkRCTParameters {
|
||||
int fmt_lut[4];
|
||||
int offset;
|
||||
|
|
|
|||
|
|
@ -7,10 +7,13 @@ OBJS-$(CONFIG_FFV1_VULKAN_ENCODER) += vulkan/common.o \
|
|||
vulkan/ffv1_enc_setup.o vulkan/ffv1_enc.o \
|
||||
vulkan/ffv1_rct_search.o
|
||||
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/common.o \
|
||||
vulkan/rangecoder.o vulkan/ffv1_vlc.o \
|
||||
vulkan/ffv1_common.o vulkan/ffv1_reset.o \
|
||||
vulkan/ffv1_dec_setup.o vulkan/ffv1_dec.o
|
||||
OBJS-$(CONFIG_FFV1_VULKAN_HWACCEL) += vulkan/ffv1_dec_setup.comp.spv.o \
|
||||
vulkan/ffv1_dec_reset.comp.spv.o \
|
||||
vulkan/ffv1_dec_reset_golomb.comp.spv.o \
|
||||
vulkan/ffv1_dec.comp.spv.o \
|
||||
vulkan/ffv1_dec_golomb.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb.comp.spv.o \
|
||||
vulkan/ffv1_dec_rgb_golomb.comp.spv.o
|
||||
|
||||
OBJS-$(CONFIG_PRORES_RAW_VULKAN_HWACCEL) += vulkan/prores_raw_decode.comp.spv.o \
|
||||
vulkan/prores_raw_idct.comp.spv.o
|
||||
|
|
|
|||
252
libavcodec/vulkan/ffv1_common.glsl
Normal file
252
libavcodec/vulkan/ffv1_common.glsl
Normal file
|
|
@ -0,0 +1,252 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef VULKAN_FFV1_COMMON_H
|
||||
#define VULKAN_FFV1_COMMON_H
|
||||
|
||||
#include "rangecoder.comp"
|
||||
#ifdef GOLOMB
|
||||
#include "ffv1_vlc.comp"
|
||||
#endif
|
||||
|
||||
#define MAX_QUANT_TABLES 8
|
||||
#define MAX_CONTEXT_INPUTS 5
|
||||
#define MAX_QUANT_TABLE_SIZE 256
|
||||
#define MAX_QUANT_TABLE_MASK (MAX_QUANT_TABLE_SIZE - 1)
|
||||
|
||||
layout (constant_id = 0) const int rgb_linecache = 2;
|
||||
layout (constant_id = 1) const bool has_crc = false;
|
||||
layout (constant_id = 2) const int version = 0;
|
||||
layout (constant_id = 3) const int quant_table_count = 0;
|
||||
layout (constant_id = 4) const bool has_extend_lookup = false;
|
||||
|
||||
layout (constant_id = 5) const int rct_offset = 0;
|
||||
layout (constant_id = 6) const int colorspace = 0;
|
||||
layout (constant_id = 7) const bool transparency = false;
|
||||
layout (constant_id = 8) const bool planar_rgb = false;
|
||||
layout (constant_id = 9) const int codec_planes = 0;
|
||||
layout (constant_id = 10) const int color_planes = 0;
|
||||
layout (constant_id = 11) const int planes = 0;
|
||||
layout (constant_id = 12) const int bits_per_raw_sample = 0;
|
||||
|
||||
layout (constant_id = 13) const int chroma_shift_x = 0;
|
||||
layout (constant_id = 14) const int chroma_shift_y = 0;
|
||||
const ivec2 chroma_shift = ivec2(chroma_shift_x, chroma_shift_y);
|
||||
|
||||
layout (push_constant, scalar) uniform pushConstants {
|
||||
u8buf slice_data;
|
||||
u8buf slice_state;
|
||||
|
||||
bool extend_lookup[MAX_QUANT_TABLES];
|
||||
uint16_t context_count[MAX_QUANT_TABLES];
|
||||
|
||||
ivec4 fmt_lut;
|
||||
u16vec2 img_size;
|
||||
|
||||
uint plane_state_size;
|
||||
bool key_frame;
|
||||
uint32_t crcref;
|
||||
int micro_version;
|
||||
};
|
||||
|
||||
#define TYPE int32_t
|
||||
#define VTYPE2 i32vec2
|
||||
#define VTYPE3 i32vec3
|
||||
|
||||
struct SliceContext {
|
||||
RangeCoder c;
|
||||
|
||||
#ifdef DECODE
|
||||
GetBitContext gb;
|
||||
#else
|
||||
PutBitContext pb; /* 8*8 bytes */
|
||||
#endif
|
||||
|
||||
ivec2 slice_dim;
|
||||
ivec2 slice_pos;
|
||||
ivec2 slice_rct_coef;
|
||||
u8vec3 quant_table_idx;
|
||||
|
||||
uint slice_coding_mode;
|
||||
bool slice_reset_contexts;
|
||||
};
|
||||
|
||||
layout (set = 1, binding = 0) buffer slice_ctx_buf {
|
||||
SliceContext slice_ctx[];
|
||||
};
|
||||
|
||||
uint slice_coord(uint width, uint sx, uint num_h_slices, uint chroma_shift)
|
||||
{
|
||||
uint mpw = 1 << chroma_shift;
|
||||
uint awidth = align(width, mpw);
|
||||
|
||||
if ((version < 4) || ((version == 4) && (micro_version < 3)))
|
||||
return width * sx / num_h_slices;
|
||||
|
||||
sx = (2 * awidth * sx + num_h_slices * mpw) / (2 * num_h_slices * mpw) * mpw;
|
||||
if (sx == awidth)
|
||||
sx = width;
|
||||
|
||||
return sx;
|
||||
}
|
||||
|
||||
#if defined(ENCODE) || defined(DECODE)
|
||||
|
||||
layout (set = 0, binding = 1, scalar) readonly uniform quant_buf {
|
||||
int16_t quant_table[MAX_QUANT_TABLES]
|
||||
[MAX_CONTEXT_INPUTS]
|
||||
[MAX_QUANT_TABLE_SIZE];
|
||||
};
|
||||
|
||||
/* -1, { -1, 0 } */
|
||||
int predict(int L, ivec2 top)
|
||||
{
|
||||
return mid_pred(L, L + top[1] - top[0], top[1]);
|
||||
}
|
||||
|
||||
/* { -2, -1 }, { -1, 0, 1 }, 0 */
|
||||
int get_context(VTYPE2 cur_l, VTYPE3 top_l, TYPE top2, uint8_t quant_table_idx)
|
||||
{
|
||||
const int LT = top_l[0]; /* -1 */
|
||||
const int T = top_l[1]; /* 0 */
|
||||
const int RT = top_l[2]; /* 1 */
|
||||
const int L = cur_l[1]; /* -1 */
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(L - LT) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(LT - T) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][2][(T - RT) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
if ((quant_table[quant_table_idx][3][127] == 0) &&
|
||||
(quant_table[quant_table_idx][4][127] == 0))
|
||||
return base;
|
||||
|
||||
const int TT = top2; /* -2 */
|
||||
const int LL = cur_l[0]; /* -2 */
|
||||
return base +
|
||||
quant_table[quant_table_idx][3][(LL - L) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][4][(TT - T) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
const uint32_t log2_run[41] = {
|
||||
0, 0, 0, 0, 1, 1, 1, 1,
|
||||
2, 2, 2, 2, 3, 3, 3, 3,
|
||||
4, 4, 5, 5, 6, 6, 7, 7,
|
||||
8, 9, 10, 11, 12, 13, 14, 15,
|
||||
16, 17, 18, 19, 20, 21, 22, 23,
|
||||
24,
|
||||
};
|
||||
|
||||
#ifdef RGB
|
||||
#define RGB_LBUF (rgb_linecache - 1)
|
||||
#define LADDR(p) (ivec2((p).x, ((p).y & RGB_LBUF)))
|
||||
|
||||
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
|
||||
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
|
||||
{
|
||||
const ivec2 yoff_border1 = expectEXT(off.x == 0, false) ? off + ivec2(1, -1) : off;
|
||||
|
||||
/* Thanks to the same coincidence as below, we can skip checking if off == 0, 1 */
|
||||
VTYPE3 top = VTYPE3(TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, -1)))[comp]),
|
||||
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(0, -1)))[comp]),
|
||||
TYPE(imageLoad(pred, sp + LADDR(off + ivec2(min(1, sw - off.x - 1), -1)))[comp]));
|
||||
|
||||
/* Normally, we'd need to check if off != ivec2(0, 0) here, since otherwise, we must
|
||||
* return zero. However, ivec2(-1, 0) + ivec2(1, -1) == ivec2(0, -1), e.g. previous
|
||||
* row, 0 offset, same slice, which is zero since we zero out the buffer for RGB */
|
||||
TYPE cur = TYPE(imageLoad(pred, sp + LADDR(yoff_border1 + ivec2(-1, 0)))[comp]);
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
if (has_extend_lookup && extend_lookup) {
|
||||
TYPE cur2 = TYPE(0);
|
||||
if (expectEXT(off.x > 0, true)) {
|
||||
const ivec2 yoff_border2 = expectEXT(off.x == 1, false) ? ivec2(-1, -1) : ivec2(-2, 0);
|
||||
cur2 = TYPE(imageLoad(pred, sp + LADDR(off + yoff_border2))[comp]);
|
||||
}
|
||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
/* top-2 became current upon swap when rgb_linecache == 2 */
|
||||
ivec2 top2_off = off;
|
||||
if (rgb_linecache != 2)
|
||||
top2_off += ivec2(0, -2);
|
||||
|
||||
TYPE top2 = TYPE(imageLoad(pred, sp + LADDR(top2_off))[comp]);
|
||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
/* context, prediction */
|
||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
#define LADDR(p) (p)
|
||||
|
||||
ivec2 get_pred(readonly uimage2D pred, ivec2 sp, ivec2 off,
|
||||
int comp, int sw, uint8_t quant_table_idx, bool extend_lookup)
|
||||
{
|
||||
const ivec2 yoff_border1 = off.x == 0 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
sp += off;
|
||||
|
||||
VTYPE3 top = VTYPE3(TYPE(0),
|
||||
TYPE(0),
|
||||
TYPE(0));
|
||||
if (off.y > 0 && off != ivec2(0, 1))
|
||||
top[0] = TYPE(imageLoad(pred, sp + ivec2(-1, -1) + yoff_border1)[comp]);
|
||||
if (off.y > 0) {
|
||||
top[1] = TYPE(imageLoad(pred, sp + ivec2(0, -1))[comp]);
|
||||
top[2] = TYPE(imageLoad(pred, sp + ivec2(min(1, sw - off.x - 1), -1))[comp]);
|
||||
}
|
||||
|
||||
TYPE cur = TYPE(0);
|
||||
if (off != ivec2(0, 0))
|
||||
cur = TYPE(imageLoad(pred, sp + ivec2(-1, 0) + yoff_border1)[comp]);
|
||||
|
||||
int base = quant_table[quant_table_idx][0][(cur - top[0]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][1][(top[0] - top[1]) & MAX_QUANT_TABLE_MASK] +
|
||||
quant_table[quant_table_idx][2][(top[1] - top[2]) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
if (has_extend_lookup && extend_lookup) {
|
||||
TYPE cur2 = TYPE(0);
|
||||
if (off.x > 0 && off != ivec2(1, 0)) {
|
||||
const ivec2 yoff_border2 = off.x == 1 ? ivec2(1, -1) : ivec2(0, 0);
|
||||
cur2 = TYPE(imageLoad(pred, sp + ivec2(-2, 0) + yoff_border2)[comp]);
|
||||
}
|
||||
base += quant_table[quant_table_idx][3][(cur2 - cur) & MAX_QUANT_TABLE_MASK];
|
||||
|
||||
TYPE top2 = TYPE(0);
|
||||
if (off.y > 1)
|
||||
top2 = TYPE(imageLoad(pred, sp + ivec2(0, -2))[comp]);
|
||||
base += quant_table[quant_table_idx][4][(top2 - top[1]) & MAX_QUANT_TABLE_MASK];
|
||||
}
|
||||
|
||||
/* context, prediction */
|
||||
return ivec2(base, predict(cur, VTYPE2(top)));
|
||||
}
|
||||
|
||||
#endif /* RGB */
|
||||
|
||||
#endif /* ENCODE || DECODE */
|
||||
|
||||
#endif /* VULKAN_FFV1_COMMON_H */
|
||||
|
|
@ -20,14 +20,23 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef GOLOMB
|
||||
#ifdef CACHED_SYMBOL_READER
|
||||
shared uint8_t state[CONTEXT_SIZE];
|
||||
#define READ(c, off) get_rac_direct(c, state[off])
|
||||
#else
|
||||
#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off))
|
||||
#endif
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define DECODE
|
||||
#include "common.comp"
|
||||
#include "ffv1_common.glsl"
|
||||
|
||||
layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf {
|
||||
u32vec2 slice_offsets[];
|
||||
};
|
||||
layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
|
||||
uint32_t slice_status[];
|
||||
};
|
||||
layout (set = 1, binding = 3) uniform uimage2D dec[];
|
||||
|
||||
#ifndef GOLOMB
|
||||
#define READ(c, off) get_rac(c, uint64_t(slice_state) + (state_off + off))
|
||||
int get_isymbol(inout RangeCoder c, uint state_off)
|
||||
{
|
||||
if (READ(c, 0))
|
||||
|
|
@ -56,11 +65,6 @@ int get_isymbol(inout RangeCoder c, uint state_off)
|
|||
|
||||
void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int bits)
|
||||
{
|
||||
#ifdef CACHED_SYMBOL_READER
|
||||
if (gl_LocalInvocationID.x > 0)
|
||||
return;
|
||||
#endif
|
||||
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
w = ceil_rshift(w, chroma_shift.x);
|
||||
|
|
@ -79,7 +83,7 @@ void decode_line_pcm(inout SliceContext sc, ivec2 sp, int w, int y, int p, int b
|
|||
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint state_off,
|
||||
uint8_t quant_table_idx, const int run_index)
|
||||
uint8_t quant_table_idx, int run_index)
|
||||
{
|
||||
#ifndef RGB
|
||||
if (p > 0 && p < 3) {
|
||||
|
|
@ -90,34 +94,28 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
|||
|
||||
for (int x = 0; x < w; x++) {
|
||||
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
||||
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||
quant_table_idx, extend_lookup[quant_table_idx]);
|
||||
|
||||
uint context_off = state_off + CONTEXT_SIZE*abs(pr[0]);
|
||||
#ifdef CACHED_SYMBOL_READER
|
||||
u8buf sb = u8buf(uint64_t(slice_state) + context_off + gl_LocalInvocationID.x);
|
||||
state[gl_LocalInvocationID.x] = sb.v;
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
|
||||
#endif
|
||||
int diff = get_isymbol(sc.c, context_off);
|
||||
if (pr[0] < 0)
|
||||
diff = -diff;
|
||||
|
||||
int diff = get_isymbol(sc.c, context_off);
|
||||
if (pr[0] < 0)
|
||||
diff = -diff;
|
||||
|
||||
uint v = zero_extend(pr[1] + diff, bits);
|
||||
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
|
||||
|
||||
#ifdef CACHED_SYMBOL_READER
|
||||
}
|
||||
|
||||
barrier();
|
||||
sb.v = state[gl_LocalInvocationID.x];
|
||||
#endif
|
||||
uint v = zero_extend(pr[1] + diff, bits);
|
||||
imageStore(dec[p], sp + LADDR(ivec2(x, y)), uvec4(v));
|
||||
}
|
||||
}
|
||||
#else
|
||||
void golomb_init(inout SliceContext sc)
|
||||
{
|
||||
if (version == 3 && micro_version > 1 || version > 3)
|
||||
get_rac_internal(sc.c, sc.c.range * 129 >> 8);
|
||||
|
||||
#else /* GOLOMB */
|
||||
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
|
||||
init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
|
||||
int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
|
||||
}
|
||||
|
||||
void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
||||
int y, int p, int bits, uint state_off,
|
||||
|
|
@ -137,7 +135,7 @@ void decode_line(inout SliceContext sc, ivec2 sp, int w,
|
|||
ivec2 pos = sp + ivec2(x, y);
|
||||
int diff;
|
||||
ivec2 pr = get_pred(dec[p], sp, ivec2(x, y), 0, w,
|
||||
quant_table_idx, extend_lookup[quant_table_idx] > 0);
|
||||
quant_table_idx, extend_lookup[quant_table_idx]);
|
||||
|
||||
uint context_off = state_off + VLC_STATE_SIZE*abs(pr[0]);
|
||||
VlcState sb = VlcState(uint64_t(slice_state) + context_off);
|
||||
|
|
@ -209,7 +207,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
|
|||
pix.r = int(imageLoad(dec[2], lpos)[0]);
|
||||
pix.g = int(imageLoad(dec[0], lpos)[0]);
|
||||
pix.b = int(imageLoad(dec[1], lpos)[0]);
|
||||
if (transparency != 0)
|
||||
if (transparency)
|
||||
pix.a = int(imageLoad(dec[3], lpos)[0]);
|
||||
|
||||
if (expectEXT(apply_rct, true))
|
||||
|
|
@ -219,7 +217,7 @@ void writeout_rgb(in SliceContext sc, ivec2 sp, int w, int y, bool apply_rct)
|
|||
pix[fmt_lut[2]], pix[fmt_lut[3]]);
|
||||
|
||||
imageStore(dst[0], pos, pix);
|
||||
if (planar_rgb != 0) {
|
||||
if (planar_rgb) {
|
||||
for (int i = 1; i < color_planes; i++)
|
||||
imageStore(dst[i], pos, ivec4(pix[i]));
|
||||
}
|
||||
|
|
@ -232,71 +230,73 @@ void decode_slice(inout SliceContext sc, const uint slice_idx)
|
|||
int w = sc.slice_dim.x;
|
||||
ivec2 sp = sc.slice_pos;
|
||||
|
||||
#ifndef RGB
|
||||
int bits = bits_per_raw_sample;
|
||||
#else
|
||||
int bits = 9;
|
||||
#ifdef RGB
|
||||
bits = 9;
|
||||
if (bits != 8 || sc.slice_coding_mode != 0)
|
||||
bits = bits_per_raw_sample + int(sc.slice_coding_mode != 1);
|
||||
|
||||
sp.y = int(gl_WorkGroupID.y)*RGB_LINECACHE;
|
||||
sp.y = int(gl_WorkGroupID.y)*rgb_linecache;
|
||||
#endif
|
||||
|
||||
/* PCM coding */
|
||||
#ifndef GOLOMB
|
||||
/* PCM coding */
|
||||
if (sc.slice_coding_mode == 1) {
|
||||
#ifndef RGB
|
||||
for (int p = 0; p < planes; p++) {
|
||||
int h = sc.slice_dim.y;
|
||||
if (p > 0 && p < 3)
|
||||
h = ceil_rshift(h, chroma_shift.y);
|
||||
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line_pcm(sc, sp, w, y, p, bits);
|
||||
}
|
||||
#else
|
||||
#ifdef RGB
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line_pcm(sc, sp, w, y, p, bits);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, false);
|
||||
}
|
||||
#endif
|
||||
} else
|
||||
|
||||
/* Arithmetic coding */
|
||||
#endif
|
||||
{
|
||||
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
|
||||
u32vec4 slice_state_off = (slice_idx*codec_planes + uvec4(0, 1, 1, 2))*plane_state_size;
|
||||
|
||||
#ifndef RGB
|
||||
#else
|
||||
for (int p = 0; p < planes; p++) {
|
||||
int h = sc.slice_dim.y;
|
||||
if (p > 0 && p < 3)
|
||||
h = ceil_rshift(h, chroma_shift.y);
|
||||
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
}
|
||||
#else
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, true);
|
||||
decode_line_pcm(sc, sp, w, y, p, bits);
|
||||
}
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
u8vec4 quant_table_idx = sc.quant_table_idx.xyyz;
|
||||
u32vec4 slice_state_off = (slice_idx*codec_planes +
|
||||
uvec4(0, 1, 1, 2))*plane_state_size;
|
||||
|
||||
#ifdef GOLOMB
|
||||
golomb_init(sc);
|
||||
#endif
|
||||
|
||||
#ifdef RGB
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < sc.slice_dim.y; y++) {
|
||||
for (int p = 0; p < color_planes; p++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
|
||||
writeout_rgb(sc, sp, w, y, true);
|
||||
}
|
||||
#else
|
||||
for (int p = 0; p < planes; p++) {
|
||||
int h = sc.slice_dim.y;
|
||||
if (p > 0 && p < 3)
|
||||
h = ceil_rshift(h, chroma_shift.y);
|
||||
|
||||
int run_index = 0;
|
||||
for (int y = 0; y < h; y++)
|
||||
decode_line(sc, sp, w, y, p, bits,
|
||||
slice_state_off[p], quant_table_idx[p], run_index);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
decode_slice(slice_ctx[slice_idx], slice_idx);
|
||||
|
||||
uint32_t status = corrupt ? uint32_t(corrupt) : overread;
|
||||
27
libavcodec/vulkan/ffv1_dec_golomb.comp.glsl
Normal file
27
libavcodec/vulkan/ffv1_dec_golomb.comp.glsl
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define GOLOMB
|
||||
#include "ffv1_dec.comp.glsl"
|
||||
63
libavcodec/vulkan/ffv1_dec_reset.comp.glsl
Normal file
63
libavcodec/vulkan/ffv1_dec_reset.comp.glsl
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2024 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#include "common.comp"
|
||||
#include "ffv1_common.glsl"
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
if (!key_frame && !slice_ctx[slice_idx].slice_reset_contexts)
|
||||
return;
|
||||
|
||||
const uint8_t qidx = slice_ctx[slice_idx].quant_table_idx[gl_WorkGroupID.z];
|
||||
uint contexts = context_count[qidx];
|
||||
uint64_t slice_state_off = uint64_t(slice_state) +
|
||||
slice_idx*plane_state_size*codec_planes;
|
||||
|
||||
#ifdef GOLOMB
|
||||
uint64_t start = slice_state_off +
|
||||
(gl_WorkGroupID.z*(plane_state_size/VLC_STATE_SIZE) +
|
||||
gl_LocalInvocationID.x)*VLC_STATE_SIZE;
|
||||
for (uint x = gl_LocalInvocationID.x; x < contexts; x += gl_WorkGroupSize.x) {
|
||||
VlcState sb = VlcState(start);
|
||||
sb.drift = int16_t(0);
|
||||
sb.error_sum = uint16_t(4);
|
||||
sb.bias = int8_t(0);
|
||||
sb.count = uint8_t(1);
|
||||
start += gl_WorkGroupSize.x*VLC_STATE_SIZE;
|
||||
}
|
||||
#else
|
||||
uint64_t start = slice_state_off +
|
||||
gl_WorkGroupID.z*plane_state_size +
|
||||
(gl_LocalInvocationID.x << 2 /* dwords */); /* Bytes */
|
||||
uint count_total = contexts*(CONTEXT_SIZE /* bytes */ >> 2 /* dwords */);
|
||||
for (uint x = gl_LocalInvocationID.x; x < count_total; x += gl_WorkGroupSize.x) {
|
||||
u32buf(start).v = 0x80808080;
|
||||
start += gl_WorkGroupSize.x*(CONTEXT_SIZE >> 3 /* 1/8th of context */);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
27
libavcodec/vulkan/ffv1_dec_reset_golomb.comp.glsl
Normal file
27
libavcodec/vulkan/ffv1_dec_reset_golomb.comp.glsl
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define GOLOMB
|
||||
#include "ffv1_dec_reset.comp.glsl"
|
||||
30
libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
Normal file
30
libavcodec/vulkan/ffv1_dec_rgb.comp.glsl
Normal file
|
|
@ -0,0 +1,30 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
#extension GL_EXT_shader_image_load_formatted : require
|
||||
|
||||
layout (set = 1, binding = 4) writeonly uniform uimage2D dst[];
|
||||
|
||||
#define RGB
|
||||
#include "ffv1_dec.comp.glsl"
|
||||
27
libavcodec/vulkan/ffv1_dec_rgb_golomb.comp.glsl
Normal file
27
libavcodec/vulkan/ffv1_dec_rgb_golomb.comp.glsl
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
/*
|
||||
* FFv1 codec
|
||||
*
|
||||
* Copyright (c) 2026 Lynne <dev@lynne.ee>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#define GOLOMB
|
||||
#include "ffv1_dec_rgb.comp.glsl"
|
||||
|
|
@ -20,6 +20,23 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#pragma shader_stage(compute)
|
||||
#extension GL_GOOGLE_include_directive : require
|
||||
|
||||
#include "common.comp"
|
||||
#include "ffv1_common.glsl"
|
||||
|
||||
layout (set = 0, binding = 1, scalar) uniform crc_ieee_buf {
|
||||
uint32_t crc_ieee[256];
|
||||
};
|
||||
|
||||
layout (set = 1, binding = 1, scalar) readonly buffer slice_offsets_buf {
|
||||
u32vec2 slice_offsets[];
|
||||
};
|
||||
layout (set = 1, binding = 2, scalar) writeonly buffer slice_status_buf {
|
||||
uint32_t slice_status[];
|
||||
};
|
||||
|
||||
uint8_t setup_state[CONTEXT_SIZE];
|
||||
|
||||
uint get_usymbol(inout RangeCoder c)
|
||||
|
|
@ -98,21 +115,9 @@ bool decode_slice_header(inout SliceContext sc)
|
|||
return false;
|
||||
}
|
||||
|
||||
void golomb_init(inout SliceContext sc)
|
||||
{
|
||||
if (version == 3 && micro_version > 1 || version > 3) {
|
||||
setup_state[0] = uint8_t(129);
|
||||
get_rac_direct(sc.c, setup_state[0]);
|
||||
}
|
||||
|
||||
uint64_t ac_byte_count = sc.c.bytestream - sc.c.bytestream_start - 1;
|
||||
init_get_bits(sc.gb, u8buf(sc.c.bytestream_start + ac_byte_count),
|
||||
int(sc.c.bytestream_end - sc.c.bytestream_start - ac_byte_count));
|
||||
}
|
||||
|
||||
void main(void)
|
||||
{
|
||||
const uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
uint slice_idx = gl_WorkGroupID.y*gl_NumWorkGroups.x + gl_WorkGroupID.x;
|
||||
|
||||
u8buf bs = u8buf(slice_data + slice_offsets[slice_idx].x);
|
||||
uint32_t slice_size = slice_offsets[slice_idx].y;
|
||||
|
|
@ -125,10 +130,7 @@ void main(void)
|
|||
|
||||
decode_slice_header(slice_ctx[slice_idx]);
|
||||
|
||||
if (golomb == 1)
|
||||
golomb_init(slice_ctx[slice_idx]);
|
||||
|
||||
if (ec != 0 && check_crc != 0) {
|
||||
if (has_crc) {
|
||||
uint32_t crc = crcref;
|
||||
for (int i = 0; i < slice_size; i++)
|
||||
crc = crc_ieee[(crc & 0xFF) ^ uint32_t(bs[i].v)] ^ (crc >> 8);
|
||||
|
|
@ -20,6 +20,14 @@
|
|||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef CONTEXT_SIZE
|
||||
#define CONTEXT_SIZE 32
|
||||
|
||||
layout (set = 0, binding = 0, scalar) uniform rangecoder_buf {
|
||||
uint8_t zero_one_state[512];
|
||||
};
|
||||
#endif
|
||||
|
||||
struct RangeCoder {
|
||||
uint64_t bytestream_start;
|
||||
uint64_t bytestream;
|
||||
|
|
@ -42,8 +50,6 @@ void rac_init(out RangeCoder r, u8buf data, uint buf_size)
|
|||
r.outstanding_byte = uint8_t(0xFF);
|
||||
}
|
||||
|
||||
#if !defined(DECODE)
|
||||
|
||||
#ifdef FULL_RENORM
|
||||
/* Full renorm version that can handle outstanding_byte == 0xFF */
|
||||
void renorm_encoder(inout RangeCoder c)
|
||||
|
|
@ -178,8 +184,6 @@ uint32_t rac_terminate(inout RangeCoder c)
|
|||
return uint32_t(uint64_t(c.bytestream) - uint64_t(c.bytestream_start));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
/* Decoder */
|
||||
uint overread = 0;
|
||||
bool corrupt = false;
|
||||
|
|
@ -243,5 +247,3 @@ bool get_rac_equi(inout RangeCoder c)
|
|||
{
|
||||
return get_rac_internal(c, c.range >> 1);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -23,18 +23,30 @@
|
|||
|
||||
#include "ffv1.h"
|
||||
#include "ffv1_vulkan.h"
|
||||
#include "libavutil/vulkan_spirv.h"
|
||||
#include "libavutil/mem.h"
|
||||
|
||||
#define RGB_LINECACHE 2
|
||||
|
||||
extern const char *ff_source_common_comp;
|
||||
extern const char *ff_source_rangecoder_comp;
|
||||
extern const char *ff_source_ffv1_vlc_comp;
|
||||
extern const char *ff_source_ffv1_common_comp;
|
||||
extern const char *ff_source_ffv1_dec_setup_comp;
|
||||
extern const char *ff_source_ffv1_reset_comp;
|
||||
extern const char *ff_source_ffv1_dec_comp;
|
||||
extern const unsigned char ff_ffv1_dec_setup_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_setup_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_reset_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_reset_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_reset_golomb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_reset_golomb_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_rgb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_rgb_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_golomb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_golomb_comp_spv_len;
|
||||
|
||||
extern const unsigned char ff_ffv1_dec_rgb_golomb_comp_spv_data[];
|
||||
extern const unsigned int ff_ffv1_dec_rgb_golomb_comp_spv_len;
|
||||
|
||||
const FFVulkanDecodeDescriptor ff_vk_dec_ffv1_desc = {
|
||||
.codec_id = AV_CODEC_ID_FFV1,
|
||||
|
|
@ -64,80 +76,15 @@ typedef struct FFv1VulkanDecodeContext {
|
|||
FFVulkanShader reset;
|
||||
FFVulkanShader decode;
|
||||
|
||||
FFVkBuffer rangecoder_static_buf;
|
||||
FFVkBuffer rangecoder_buf;
|
||||
FFVkBuffer quant_buf;
|
||||
FFVkBuffer crc_tab_buf;
|
||||
FFVkBuffer crc_buf;
|
||||
|
||||
AVBufferPool *slice_state_pool;
|
||||
AVBufferPool *slice_offset_pool;
|
||||
AVBufferPool *slice_status_pool;
|
||||
} FFv1VulkanDecodeContext;
|
||||
|
||||
typedef struct FFv1VkParameters {
|
||||
VkDeviceAddress slice_data;
|
||||
VkDeviceAddress slice_state;
|
||||
|
||||
int fmt_lut[4];
|
||||
uint32_t img_size[2];
|
||||
uint32_t chroma_shift[2];
|
||||
|
||||
uint32_t plane_state_size;
|
||||
uint32_t crcref;
|
||||
int rct_offset;
|
||||
|
||||
uint8_t extend_lookup[8];
|
||||
uint8_t bits_per_raw_sample;
|
||||
uint8_t quant_table_count;
|
||||
uint8_t version;
|
||||
uint8_t micro_version;
|
||||
uint8_t key_frame;
|
||||
uint8_t planes;
|
||||
uint8_t codec_planes;
|
||||
uint8_t color_planes;
|
||||
uint8_t transparency;
|
||||
uint8_t planar_rgb;
|
||||
uint8_t colorspace;
|
||||
uint8_t ec;
|
||||
uint8_t golomb;
|
||||
uint8_t check_crc;
|
||||
uint8_t padding[3];
|
||||
} FFv1VkParameters;
|
||||
|
||||
static void add_push_data(FFVulkanShader *shd)
|
||||
{
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLC(1, u8buf slice_data; );
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, ivec4 fmt_lut; );
|
||||
GLSLC(1, uvec2 img_size; );
|
||||
GLSLC(1, uvec2 chroma_shift; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint plane_state_size; );
|
||||
GLSLC(1, uint32_t crcref; );
|
||||
GLSLC(1, int rct_offset; );
|
||||
GLSLC(0, );
|
||||
GLSLC(1, uint8_t extend_lookup[8]; );
|
||||
GLSLC(1, uint8_t bits_per_raw_sample; );
|
||||
GLSLC(1, uint8_t quant_table_count; );
|
||||
GLSLC(1, uint8_t version; );
|
||||
GLSLC(1, uint8_t micro_version; );
|
||||
GLSLC(1, uint8_t key_frame; );
|
||||
GLSLC(1, uint8_t planes; );
|
||||
GLSLC(1, uint8_t codec_planes; );
|
||||
GLSLC(1, uint8_t color_planes; );
|
||||
GLSLC(1, uint8_t transparency; );
|
||||
GLSLC(1, uint8_t planar_rgb; );
|
||||
GLSLC(1, uint8_t colorspace; );
|
||||
GLSLC(1, uint8_t ec; );
|
||||
GLSLC(1, uint8_t golomb; );
|
||||
GLSLC(1, uint8_t check_crc; );
|
||||
GLSLC(1, uint8_t padding[3]; );
|
||||
GLSLC(0, }; );
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkParameters),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
}
|
||||
|
||||
static int vk_ffv1_start_frame(AVCodecContext *avctx,
|
||||
const AVBufferRef *buffer_ref,
|
||||
av_unused const uint8_t *buffer,
|
||||
|
|
@ -291,13 +238,10 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|||
|
||||
FFV1Context *f = avctx->priv_data;
|
||||
FFv1VulkanDecodeContext *fv = ctx->sd_ctx;
|
||||
FFv1VkParameters pd;
|
||||
FFv1VkResetParameters pd_reset;
|
||||
|
||||
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
enum AVPixelFormat sw_format = hwfc->sw_format;
|
||||
|
||||
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
|
||||
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
|
||||
!(sw_format == AV_PIX_FMT_YA8);
|
||||
int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components;
|
||||
|
|
@ -408,39 +352,25 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|||
VK_FORMAT_UNDEFINED);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, &fv->setup);
|
||||
pd = (FFv1VkParameters) {
|
||||
|
||||
FFv1ShaderParams pd = {
|
||||
.slice_data = slices_buf->address,
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
|
||||
.img_size[0] = f->picture.f->width,
|
||||
.img_size[1] = f->picture.f->height,
|
||||
.chroma_shift[0] = f->chroma_h_shift,
|
||||
.chroma_shift[1] = f->chroma_v_shift,
|
||||
|
||||
.plane_state_size = fp->plane_state_size,
|
||||
.crcref = f->crcref,
|
||||
.rct_offset = 1 << bits,
|
||||
|
||||
.bits_per_raw_sample = bits,
|
||||
.quant_table_count = f->quant_table_count,
|
||||
.version = f->version,
|
||||
.micro_version = f->micro_version,
|
||||
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
|
||||
.planes = av_pix_fmt_count_planes(sw_format),
|
||||
.codec_planes = f->plane_count,
|
||||
.color_planes = color_planes,
|
||||
.transparency = f->transparency,
|
||||
.planar_rgb = ff_vk_mt_is_np_rgb(sw_format) &&
|
||||
(ff_vk_count_images((AVVkFrame *)f->picture.f->data[0]) > 1),
|
||||
.colorspace = f->colorspace,
|
||||
.ec = f->ec,
|
||||
.golomb = f->ac == AC_GOLOMB_RICE,
|
||||
.check_crc = !!(avctx->err_recognition & AV_EF_CRCCHECK),
|
||||
.crcref = f->crcref,
|
||||
.micro_version = f->micro_version,
|
||||
};
|
||||
for (int i = 0; i < f->quant_table_count; i++)
|
||||
pd.extend_lookup[i] = (f->quant_tables[i][3][127] != 0) ||
|
||||
(f->quant_tables[i][4][127] != 0);
|
||||
|
||||
for (int i = 0; i < f->quant_table_count; i++) {
|
||||
pd.context_count[i] = f->context_count[i];
|
||||
pd.extend_lookup[i] = f->quant_tables[i][3][127] ||
|
||||
f->quant_tables[i][4][127];
|
||||
}
|
||||
|
||||
/* For some reason the C FFv1 encoder/decoder treats these differently */
|
||||
if (sw_format == AV_PIX_FMT_GBRP10 || sw_format == AV_PIX_FMT_GBRP12 ||
|
||||
|
|
@ -451,7 +381,7 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, &fv->setup,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
0, sizeof(FFv1ShaderParams), &pd);
|
||||
|
||||
vk->CmdDispatch(exec->buf, f->num_h_slices, f->num_v_slices, 1);
|
||||
|
||||
|
|
@ -476,21 +406,9 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|||
VK_FORMAT_UNDEFINED);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, reset_shader);
|
||||
|
||||
pd_reset = (FFv1VkResetParameters) {
|
||||
.slice_state = slice_state->address + f->slice_count*fp->slice_data_size,
|
||||
.plane_state_size = fp->plane_state_size,
|
||||
.codec_planes = f->plane_count,
|
||||
.key_frame = f->picture.f->flags & AV_FRAME_FLAG_KEY,
|
||||
.version = f->version,
|
||||
.micro_version = f->micro_version,
|
||||
};
|
||||
for (int i = 0; i < f->quant_table_count; i++)
|
||||
pd_reset.context_count[i] = f->context_count[i];
|
||||
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, reset_shader,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd_reset), &pd_reset);
|
||||
0, sizeof(FFv1ShaderParams), &pd);
|
||||
|
||||
/* Sync between setup and reset shaders */
|
||||
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
|
||||
|
|
@ -530,27 +448,33 @@ static int vk_ffv1_end_frame(AVCodecContext *avctx)
|
|||
slice_state,
|
||||
0, fp->slice_data_size*f->slice_count,
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
|
||||
decode_dst, decode_dst_view,
|
||||
1, 1,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
|
||||
1, 1, 0,
|
||||
slice_offset,
|
||||
0, 2*f->slice_count*sizeof(uint32_t),
|
||||
VK_FORMAT_UNDEFINED);
|
||||
ff_vk_shader_update_desc_buffer(&ctx->s, exec, decode_shader,
|
||||
1, 2, 0,
|
||||
slice_status,
|
||||
0, 2*f->slice_count*sizeof(uint32_t),
|
||||
VK_FORMAT_UNDEFINED);
|
||||
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
|
||||
decode_dst, decode_dst_view,
|
||||
1, 3,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
if (is_rgb)
|
||||
ff_vk_shader_update_img_array(&ctx->s, exec, decode_shader,
|
||||
f->picture.f, vp->view.out,
|
||||
1, 3,
|
||||
1, 4,
|
||||
VK_IMAGE_LAYOUT_GENERAL,
|
||||
VK_NULL_HANDLE);
|
||||
|
||||
ff_vk_exec_bind_shader(&ctx->s, exec, decode_shader);
|
||||
ff_vk_shader_update_push_const(&ctx->s, exec, decode_shader,
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
0, sizeof(pd), &pd);
|
||||
0, sizeof(FFv1ShaderParams), &pd);
|
||||
|
||||
/* Sync probabilities between reset and decode shaders */
|
||||
ff_vk_buf_barrier(buf_bar[nb_buf_bar++], slice_state,
|
||||
|
|
@ -602,329 +526,175 @@ fail:
|
|||
return 0;
|
||||
}
|
||||
|
||||
static void define_shared_code(FFVulkanShader *shd, int use32bit)
|
||||
{
|
||||
int smp_bits = use32bit ? 32 : 16;
|
||||
|
||||
GLSLC(0, #define DECODE );
|
||||
|
||||
av_bprintf(&shd->src, "#define RGB_LINECACHE %i\n" ,RGB_LINECACHE);
|
||||
av_bprintf(&shd->src, "#define CONTEXT_SIZE %i\n" ,CONTEXT_SIZE);
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_MASK 0x%x\n" ,MAX_QUANT_TABLE_MASK);
|
||||
|
||||
GLSLF(0, #define TYPE int%i_t ,smp_bits);
|
||||
GLSLF(0, #define VTYPE2 i%ivec2 ,smp_bits);
|
||||
GLSLF(0, #define VTYPE3 i%ivec3 ,smp_bits);
|
||||
GLSLD(ff_source_rangecoder_comp);
|
||||
GLSLD(ff_source_ffv1_common_comp);
|
||||
}
|
||||
|
||||
static int init_setup_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd)
|
||||
FFVkExecPool *pool, FFVulkanShader *shd,
|
||||
VkSpecializationInfo *sl)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
|
||||
(uint32_t []) { 1, 1, 1 }, 0);
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, "ffv1_dec_setup",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
1, 1, 1,
|
||||
0));
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
/* Common codec header */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
add_push_data(shd);
|
||||
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
||||
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "rangecoder_static_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "uint8_t zero_one_state[512];",
|
||||
const FFVulkanDescriptorSetBinding desc_set_const[] = {
|
||||
{ /* rangecoder_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "crc_ieee_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "uint32_t crc_ieee[256];",
|
||||
},
|
||||
{
|
||||
.name = "quant_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
||||
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
||||
{ /* crc_ieee_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0);
|
||||
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 1, 0));
|
||||
|
||||
define_shared_code(shd, 0 /* Irrelevant */);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "slice_data_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.buf_content = "SliceContext slice_ctx",
|
||||
.buf_elems = f->max_slice_count,
|
||||
const FFVulkanDescriptorSetBinding desc_set[] = {
|
||||
{ /* slice_data_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "slice_offsets_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_quali = "readonly",
|
||||
.buf_content = "u32vec2 slice_offsets",
|
||||
.buf_elems = 2*f->max_slice_count,
|
||||
{ /* slice_offsets_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "slice_status_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_quali = "writeonly",
|
||||
.buf_content = "uint32_t slice_status",
|
||||
.buf_elems = 2*f->max_slice_count,
|
||||
{ /* slice_status_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0));
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3, 0, 0);
|
||||
|
||||
GLSLD(ff_source_ffv1_dec_setup_comp);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
||||
RET(ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_setup_comp_spv_data,
|
||||
ff_ffv1_dec_setup_comp_spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int init_reset_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd, int ac)
|
||||
FFVkExecPool *pool, FFVulkanShader *shd,
|
||||
VkSpecializationInfo *sl, int ac)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
int wg_dim = FFMIN(s->props.properties.limits.maxComputeWorkGroupSize[0], 1024);
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, "ffv1_dec_reset",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
wg_dim, 1, 1,
|
||||
0));
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
|
||||
(uint32_t []) { wg_dim, 1, 1 }, 0);
|
||||
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
av_bprintf(&shd->src, "#define GOLOMB\n");
|
||||
|
||||
/* Common codec header */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
GLSLC(0, layout(push_constant, scalar) uniform pushConstants { );
|
||||
GLSLF(1, uint context_count[%i]; ,MAX_QUANT_TABLES);
|
||||
GLSLC(1, u8buf slice_state; );
|
||||
GLSLC(1, uint plane_state_size; );
|
||||
GLSLC(1, uint8_t codec_planes; );
|
||||
GLSLC(1, uint8_t key_frame; );
|
||||
GLSLC(1, uint8_t version; );
|
||||
GLSLC(1, uint8_t micro_version; );
|
||||
GLSLC(1, uint8_t padding[1]; );
|
||||
GLSLC(0, }; );
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1VkResetParameters),
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
||||
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "rangecoder_static_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "uint8_t zero_one_state[512];",
|
||||
},
|
||||
{
|
||||
.name = "quant_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
||||
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
||||
const FFVulkanDescriptorSetBinding desc_set_const[] = {
|
||||
{ /* rangecoder_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 1, 1, 0);
|
||||
|
||||
const FFVulkanDescriptorSetBinding desc_set[] = {
|
||||
{ /* slice_data_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0);
|
||||
|
||||
define_shared_code(shd, 0 /* Bit depth irrelevant for the reset shader */);
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
GLSLD(ff_source_ffv1_vlc_comp);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "slice_data_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.mem_quali = "readonly",
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.buf_content = "SliceContext slice_ctx",
|
||||
.buf_elems = f->max_slice_count,
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 1, 0, 0));
|
||||
|
||||
GLSLD(ff_source_ffv1_reset_comp);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
||||
RET(ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_reset_golomb_comp_spv_data,
|
||||
ff_ffv1_dec_reset_golomb_comp_spv_len, "main"));
|
||||
else
|
||||
RET(ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_reset_comp_spv_data,
|
||||
ff_ffv1_dec_reset_comp_spv_len, "main"));
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
static int init_decode_shader(FFV1Context *f, FFVulkanContext *s,
|
||||
FFVkExecPool *pool, FFVkSPIRVCompiler *spv,
|
||||
FFVulkanShader *shd,
|
||||
FFVkExecPool *pool, FFVulkanShader *shd,
|
||||
AVHWFramesContext *dec_frames_ctx,
|
||||
AVHWFramesContext *out_frames_ctx,
|
||||
int ac, int rgb)
|
||||
VkSpecializationInfo *sl, int ac, int rgb)
|
||||
{
|
||||
int err;
|
||||
FFVulkanDescriptorSetBinding *desc_set;
|
||||
|
||||
uint8_t *spv_data;
|
||||
size_t spv_len;
|
||||
void *spv_opaque = NULL;
|
||||
ff_vk_shader_load(shd, VK_SHADER_STAGE_COMPUTE_BIT, sl,
|
||||
(uint32_t []) { 1, 1, 1 }, 0);
|
||||
|
||||
int use_cached_reader = ac != AC_GOLOMB_RICE &&
|
||||
s->driver_props.driverID == VK_DRIVER_ID_MESA_RADV;
|
||||
ff_vk_shader_add_push_const(shd, 0, sizeof(FFv1ShaderParams),
|
||||
VK_SHADER_STAGE_COMPUTE_BIT);
|
||||
|
||||
RET(ff_vk_shader_init(s, shd, "ffv1_dec",
|
||||
VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
(const char *[]) { "GL_EXT_buffer_reference",
|
||||
"GL_EXT_buffer_reference2" }, 2,
|
||||
use_cached_reader ? CONTEXT_SIZE : 1, 1, 1,
|
||||
0));
|
||||
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
av_bprintf(&shd->src, "#define GOLOMB\n");
|
||||
|
||||
if (rgb)
|
||||
av_bprintf(&shd->src, "#define RGB\n");
|
||||
|
||||
if (use_cached_reader)
|
||||
av_bprintf(&shd->src, "#define CACHED_SYMBOL_READER 1\n");
|
||||
|
||||
/* Common codec header */
|
||||
GLSLD(ff_source_common_comp);
|
||||
|
||||
add_push_data(shd);
|
||||
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLES %i\n", MAX_QUANT_TABLES);
|
||||
av_bprintf(&shd->src, "#define MAX_CONTEXT_INPUTS %i\n", MAX_CONTEXT_INPUTS);
|
||||
av_bprintf(&shd->src, "#define MAX_QUANT_TABLE_SIZE %i\n", MAX_QUANT_TABLE_SIZE);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "rangecoder_static_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "uint8_t zero_one_state[512];",
|
||||
const FFVulkanDescriptorSetBinding desc_set_const[] = {
|
||||
{ /* rangecoder_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "quant_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_layout = "scalar",
|
||||
.buf_content = "int16_t quant_table[MAX_QUANT_TABLES]"
|
||||
"[MAX_CONTEXT_INPUTS][MAX_QUANT_TABLE_SIZE];",
|
||||
{ /* quant_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
};
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set_const, 2, 1, 0);
|
||||
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 2, 1, 0));
|
||||
|
||||
define_shared_code(shd, f->use32bit);
|
||||
if (ac == AC_GOLOMB_RICE)
|
||||
GLSLD(ff_source_ffv1_vlc_comp);
|
||||
|
||||
desc_set = (FFVulkanDescriptorSetBinding []) {
|
||||
{
|
||||
.name = "slice_data_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.buf_content = "SliceContext slice_ctx",
|
||||
.buf_elems = f->max_slice_count,
|
||||
const FFVulkanDescriptorSetBinding desc_set[] = {
|
||||
{ /* slice_data_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "dec",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(dec_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
{ /* slice_offsets_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "slice_status_buf",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.mem_quali = "writeonly",
|
||||
.buf_content = "uint32_t slice_status",
|
||||
.buf_elems = 2*f->max_slice_count,
|
||||
{ /* slice_status_buf */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
},
|
||||
{
|
||||
.name = "dst",
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.dimensions = 2,
|
||||
.mem_layout = ff_vk_shader_rep_fmt(out_frames_ctx->sw_format,
|
||||
FF_VK_REP_NATIVE),
|
||||
.mem_quali = "writeonly",
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
{ /* dec */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.elems = av_pix_fmt_count_planes(dec_frames_ctx->sw_format),
|
||||
},
|
||||
{ /* dst */
|
||||
.type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
|
||||
.stages = VK_SHADER_STAGE_COMPUTE_BIT,
|
||||
.elems = av_pix_fmt_count_planes(out_frames_ctx->sw_format),
|
||||
},
|
||||
};
|
||||
RET(ff_vk_shader_add_descriptor_set(s, shd, desc_set, 3 + rgb, 0, 0));
|
||||
ff_vk_shader_add_descriptor_set(s, shd, desc_set, 4 + rgb, 0, 0);
|
||||
|
||||
GLSLD(ff_source_ffv1_dec_comp);
|
||||
|
||||
RET(spv->compile_shader(s, spv, shd, &spv_data, &spv_len, "main",
|
||||
&spv_opaque));
|
||||
RET(ff_vk_shader_link(s, shd, spv_data, spv_len, "main"));
|
||||
if (ac == AC_GOLOMB_RICE) {
|
||||
if (rgb)
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_rgb_golomb_comp_spv_data,
|
||||
ff_ffv1_dec_rgb_golomb_comp_spv_len, "main");
|
||||
else
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_golomb_comp_spv_data,
|
||||
ff_ffv1_dec_golomb_comp_spv_len, "main");
|
||||
} else {
|
||||
if (rgb)
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_rgb_comp_spv_data,
|
||||
ff_ffv1_dec_rgb_comp_spv_len, "main");
|
||||
else
|
||||
ff_vk_shader_link(s, shd,
|
||||
ff_ffv1_dec_comp_spv_data,
|
||||
ff_ffv1_dec_comp_spv_len, "main");
|
||||
}
|
||||
|
||||
RET(ff_vk_shader_register_exec(s, pool, shd));
|
||||
|
||||
fail:
|
||||
if (spv_opaque)
|
||||
spv->free_shader(spv, &spv_opaque);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
@ -954,7 +724,8 @@ static int init_indirect(AVCodecContext *avctx, FFVulkanContext *s,
|
|||
|
||||
err = av_hwframe_ctx_init(*dst);
|
||||
if (err < 0) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unable to initialize frame pool with format %s: %s\n",
|
||||
av_log(avctx, AV_LOG_ERROR,
|
||||
"Unable to initialize frame pool with format %s: %s\n",
|
||||
av_get_pix_fmt_name(sw_format), av_err2str(err));
|
||||
av_buffer_unref(dst);
|
||||
return err;
|
||||
|
|
@ -973,9 +744,9 @@ static void vk_decode_ffv1_uninit(FFVulkanDecodeShared *ctx)
|
|||
ff_vk_shader_free(&ctx->s, &fv->reset);
|
||||
ff_vk_shader_free(&ctx->s, &fv->decode);
|
||||
|
||||
ff_vk_free_buf(&ctx->s, &fv->rangecoder_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->quant_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->rangecoder_static_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->crc_tab_buf);
|
||||
ff_vk_free_buf(&ctx->s, &fv->crc_buf);
|
||||
|
||||
av_buffer_pool_uninit(&fv->slice_state_pool);
|
||||
av_buffer_pool_uninit(&fv->slice_offset_pool);
|
||||
|
|
@ -991,18 +762,11 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
|||
FFVulkanDecodeContext *dec = avctx->internal->hwaccel_priv_data;
|
||||
FFVulkanDecodeShared *ctx = NULL;
|
||||
FFv1VulkanDecodeContext *fv;
|
||||
FFVkSPIRVCompiler *spv;
|
||||
|
||||
if (f->version < 3 ||
|
||||
(f->version == 4 && f->micro_version > 3))
|
||||
return AVERROR(ENOTSUP);
|
||||
|
||||
spv = ff_vk_spirv_init();
|
||||
if (!spv) {
|
||||
av_log(avctx, AV_LOG_ERROR, "Unable to initialize SPIR-V compiler!\n");
|
||||
return AVERROR_EXTERNAL;
|
||||
}
|
||||
|
||||
err = ff_vk_decode_init(avctx);
|
||||
if (err < 0)
|
||||
return err;
|
||||
|
|
@ -1019,6 +783,8 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
|||
AVHWFramesContext *hwfc = (AVHWFramesContext *)avctx->hw_frames_ctx->data;
|
||||
AVHWFramesContext *dctx = hwfc;
|
||||
enum AVPixelFormat sw_format = hwfc->sw_format;
|
||||
const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sw_format);
|
||||
int color_planes = av_pix_fmt_desc_get(avctx->sw_pix_fmt)->nb_components;
|
||||
int is_rgb = !(f->colorspace == 0 && sw_format != AV_PIX_FMT_YA8) &&
|
||||
!(sw_format == AV_PIX_FMT_YA8);
|
||||
|
||||
|
|
@ -1029,63 +795,78 @@ static int vk_decode_ffv1_init(AVCodecContext *avctx)
|
|||
dctx = (AVHWFramesContext *)fv->intermediate_frames_ref->data;
|
||||
}
|
||||
|
||||
SPEC_LIST_CREATE(sl, 15, 15*sizeof(uint32_t))
|
||||
|
||||
if (RGB_LINECACHE != 2)
|
||||
SPEC_LIST_ADD(sl, 0, 32, RGB_LINECACHE);
|
||||
|
||||
if (f->ec && !!(avctx->err_recognition & AV_EF_CRCCHECK))
|
||||
SPEC_LIST_ADD(sl, 1, 32, 1);
|
||||
|
||||
SPEC_LIST_ADD(sl, 2, 32, f->version);
|
||||
SPEC_LIST_ADD(sl, 3, 32, f->quant_table_count);
|
||||
|
||||
for (int i = 0; i < f->quant_table_count; i++) {
|
||||
if (f->quant_tables[i][3][127] || f->quant_tables[i][4][127]) {
|
||||
SPEC_LIST_ADD(sl, 4, 32, 1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
int bits = f->avctx->bits_per_raw_sample > 0 ? f->avctx->bits_per_raw_sample : 8;
|
||||
SPEC_LIST_ADD(sl, 5, 32, 1 << bits);
|
||||
SPEC_LIST_ADD(sl, 6, 32, f->colorspace);
|
||||
SPEC_LIST_ADD(sl, 7, 32, f->transparency);
|
||||
SPEC_LIST_ADD(sl, 8, 32, ff_vk_mt_is_np_rgb(sw_format) &&
|
||||
(desc->flags & AV_PIX_FMT_FLAG_PLANAR));
|
||||
SPEC_LIST_ADD(sl, 9, 32, f->plane_count);
|
||||
SPEC_LIST_ADD(sl, 10, 32, color_planes);
|
||||
SPEC_LIST_ADD(sl, 11, 32, av_pix_fmt_count_planes(sw_format));
|
||||
SPEC_LIST_ADD(sl, 12, 32, bits);
|
||||
|
||||
SPEC_LIST_ADD(sl, 13, 32, f->chroma_h_shift);
|
||||
SPEC_LIST_ADD(sl, 14, 32, f->chroma_v_shift);
|
||||
|
||||
/* Setup shader */
|
||||
RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, spv, &fv->setup));
|
||||
RET(init_setup_shader(f, &ctx->s, &ctx->exec_pool, &fv->setup, sl));
|
||||
|
||||
/* Reset shader */
|
||||
RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool,
|
||||
spv, &fv->reset, f->ac));
|
||||
RET(init_reset_shader(f, &ctx->s, &ctx->exec_pool, &fv->reset, sl, f->ac));
|
||||
|
||||
/* Decode shaders */
|
||||
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool,
|
||||
spv, &fv->decode,
|
||||
dctx,
|
||||
hwfc,
|
||||
f->ac,
|
||||
is_rgb));
|
||||
RET(init_decode_shader(f, &ctx->s, &ctx->exec_pool, &fv->decode,
|
||||
dctx, hwfc, sl, f->ac, is_rgb));
|
||||
|
||||
/* Range coder data */
|
||||
RET(ff_ffv1_vk_init_state_transition_data(&ctx->s,
|
||||
&fv->rangecoder_static_buf,
|
||||
f));
|
||||
|
||||
/* Quantization table data */
|
||||
RET(ff_ffv1_vk_init_quant_table_data(&ctx->s,
|
||||
&fv->quant_buf,
|
||||
f));
|
||||
|
||||
/* CRC table buffer */
|
||||
RET(ff_ffv1_vk_init_crc_table_data(&ctx->s,
|
||||
&fv->crc_tab_buf,
|
||||
f));
|
||||
/* Init static data */
|
||||
RET(ff_ffv1_vk_init_state_transition_data(&ctx->s, &fv->rangecoder_buf, f));
|
||||
RET(ff_ffv1_vk_init_crc_table_data(&ctx->s, &fv->crc_buf, f));
|
||||
RET(ff_ffv1_vk_init_quant_table_data(&ctx->s, &fv->quant_buf, f));
|
||||
|
||||
/* Update setup global descriptors */
|
||||
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
||||
&fv->setup, 0, 0, 0,
|
||||
&fv->rangecoder_static_buf,
|
||||
0, fv->rangecoder_static_buf.size,
|
||||
&fv->rangecoder_buf,
|
||||
0, 512*sizeof(uint8_t),
|
||||
VK_FORMAT_UNDEFINED));
|
||||
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
||||
&fv->setup, 0, 1, 0,
|
||||
&fv->crc_tab_buf,
|
||||
0, fv->crc_tab_buf.size,
|
||||
&fv->crc_buf,
|
||||
0, 256*sizeof(uint32_t),
|
||||
VK_FORMAT_UNDEFINED));
|
||||
|
||||
/* Update decode global descriptors */
|
||||
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
||||
&fv->decode, 0, 0, 0,
|
||||
&fv->rangecoder_static_buf,
|
||||
0, fv->rangecoder_static_buf.size,
|
||||
&fv->rangecoder_buf,
|
||||
0, 512*sizeof(uint8_t),
|
||||
VK_FORMAT_UNDEFINED));
|
||||
RET(ff_vk_shader_update_desc_buffer(&ctx->s, &ctx->exec_pool.contexts[0],
|
||||
&fv->decode, 0, 1, 0,
|
||||
&fv->quant_buf,
|
||||
0, fv->quant_buf.size,
|
||||
0, VK_WHOLE_SIZE,
|
||||
VK_FORMAT_UNDEFINED));
|
||||
|
||||
fail:
|
||||
spv->uninit(&spv);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue