swscale/ops_backend: avoid UB from incorrect function signature

Annoying C-ism; we can't overload the function type even though they will
always be pointers. We can't even get away with using (void *) in the
function signature, despite casts to void * being technically valid.

Avoid the issue altogether by just moving the process loop into the
type-specific template altogether, and just referring to the correct
compiled process function at runtime. Hopefully, the compiler should be
able to optimize these into a single implementation. GCC, at least, compiles
these down into a single implementation plus three stubs that just jmp
to the correct one.

Signed-off-by: Niklas Haas <git@haasn.dev>
This commit is contained in:
Niklas Haas 2026-02-24 12:54:27 +01:00 committed by Niklas Haas
parent ec959e20c5
commit e4abfb8e51
2 changed files with 36 additions and 24 deletions

View file

@ -48,29 +48,6 @@ typedef float f32block_t[SWS_BLOCK_SIZE];
# include "ops_tmpl_float.c"
#undef BIT_DEPTH
static void process(const SwsOpExec *exec, const void *priv,
const int bx_start, const int y_start, int bx_end, int y_end)
{
const SwsOpChain *chain = priv;
const SwsOpImpl *impl = chain->impl;
u32block_t x, y, z, w; /* allocate enough space for any intermediate */
SwsOpIter iterdata;
SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
for (iter->y = y_start; iter->y < y_end; iter->y++) {
for (int i = 0; i < 4; i++) {
iter->in[i] = exec->in[i] + (iter->y - y_start) * exec->in_stride[i];
iter->out[i] = exec->out[i] + (iter->y - y_start) * exec->out_stride[i];
}
for (int block = bx_start; block < bx_end; block++) {
iter->x = block * SWS_BLOCK_SIZE;
CONTINUE(u32block_t, x, y, z, w);
}
}
}
static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
{
int ret;
@ -79,6 +56,9 @@ static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
if (!chain)
return AVERROR(ENOMEM);
av_assert0(ops->num_ops > 0);
const SwsPixelType read_type = ops->ops[0].type;
static const SwsOpTable *const tables[] = {
&bitfn(op_table_int, u8),
&bitfn(op_table_int, u16),
@ -96,12 +76,20 @@ static int compile(SwsContext *ctx, SwsOpList *ops, SwsCompiledOp *out)
}
*out = (SwsCompiledOp) {
.func = process,
.block_size = SWS_BLOCK_SIZE,
.cpu_flags = chain->cpu_flags,
.priv = chain,
.free = ff_sws_op_chain_free_cb,
};
switch (read_type) {
case SWS_PIXEL_U8: out->func = process_u8; break;
case SWS_PIXEL_U16: out->func = process_u16; break;
case SWS_PIXEL_U32: out->func = process_u32; break;
case SWS_PIXEL_F32: out->func = process_f32; break;
default: return AVERROR(EINVAL);
}
return 0;
}

View file

@ -175,3 +175,27 @@ WRAP_COMMON_PATTERNS(scale,
.setup = ff_sws_setup_q,
.flexible = true,
);
static void fn(process)(const SwsOpExec *exec, const void *priv,
const int bx_start, const int y_start,
int bx_end, int y_end)
{
const SwsOpChain *chain = priv;
const SwsOpImpl *impl = chain->impl;
u32block_t x, y, z, w; /* allocate enough space for any intermediate */
SwsOpIter iterdata;
SwsOpIter *iter = &iterdata; /* for CONTINUE() macro to work */
for (iter->y = y_start; iter->y < y_end; iter->y++) {
for (int i = 0; i < 4; i++) {
iter->in[i] = exec->in[i] + (iter->y - y_start) * exec->in_stride[i];
iter->out[i] = exec->out[i] + (iter->y - y_start) * exec->out_stride[i];
}
for (int block = bx_start; block < bx_end; block++) {
iter->x = block * SWS_BLOCK_SIZE;
CONTINUE(block_t, (void *) x, (void *) y, (void *) z, (void *) w);
}
}
}