mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-04-18 00:20:21 +00:00
650 lines
22 KiB
C
650 lines
22 KiB
C
/**
|
|
* Copyright (C) 2025 Niklas Haas
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#include "libavutil/avassert.h"
|
|
#include "libavutil/cpu.h"
|
|
#include "libavutil/mathematics.h"
|
|
#include "libavutil/mem.h"
|
|
#include "libavutil/mem_internal.h"
|
|
#include "libavutil/refstruct.h"
|
|
|
|
#include "ops.h"
|
|
#include "ops_internal.h"
|
|
#include "ops_dispatch.h"
|
|
|
|
typedef struct SwsOpPass {
|
|
SwsCompiledOp comp;
|
|
SwsOpExec exec_base;
|
|
SwsOpExec exec_tail;
|
|
size_t num_blocks;
|
|
int tail_off_in;
|
|
int tail_off_out;
|
|
int tail_size_in;
|
|
int tail_size_out;
|
|
int planes_in;
|
|
int planes_out;
|
|
int pixel_bits_in;
|
|
int pixel_bits_out;
|
|
int idx_in[4];
|
|
int idx_out[4];
|
|
int *offsets_y;
|
|
int filter_size;
|
|
bool memcpy_first;
|
|
bool memcpy_last;
|
|
bool memcpy_out;
|
|
size_t tail_blocks;
|
|
uint8_t *tail_buf; /* extra memory for fixing unpadded tails */
|
|
unsigned int tail_buf_size;
|
|
} SwsOpPass;
|
|
|
|
int ff_sws_ops_compile_backend(SwsContext *ctx, const SwsOpBackend *backend,
|
|
const SwsOpList *ops, SwsCompiledOp *out)
|
|
{
|
|
SwsOpList *copy;
|
|
SwsCompiledOp compiled = {0};
|
|
int ret = 0;
|
|
|
|
copy = ff_sws_op_list_duplicate(ops);
|
|
if (!copy)
|
|
return AVERROR(ENOMEM);
|
|
|
|
/* Ensure these are always set during compilation */
|
|
ff_sws_op_list_update_comps(copy);
|
|
|
|
ret = backend->compile(ctx, copy, &compiled);
|
|
if (ret < 0) {
|
|
int msg_lev = ret == AVERROR(ENOTSUP) ? AV_LOG_TRACE : AV_LOG_ERROR;
|
|
av_log(ctx, msg_lev, "Backend '%s' failed to compile operations: %s\n",
|
|
backend->name, av_err2str(ret));
|
|
} else {
|
|
*out = compiled;
|
|
}
|
|
|
|
ff_sws_op_list_free(©);
|
|
return ret;
|
|
}
|
|
|
|
int ff_sws_ops_compile(SwsContext *ctx, const SwsOpList *ops, SwsCompiledOp *out)
|
|
{
|
|
for (int n = 0; ff_sws_op_backends[n]; n++) {
|
|
const SwsOpBackend *backend = ff_sws_op_backends[n];
|
|
if (ops->src.hw_format != backend->hw_format ||
|
|
ops->dst.hw_format != backend->hw_format)
|
|
continue;
|
|
if (ff_sws_ops_compile_backend(ctx, backend, ops, out) < 0)
|
|
continue;
|
|
|
|
av_log(ctx, AV_LOG_VERBOSE, "Compiled using backend '%s': "
|
|
"block size = %d, over-read = %d, over-write = %d, cpu flags = 0x%x\n",
|
|
backend->name, out->block_size, out->over_read, out->over_write,
|
|
out->cpu_flags);
|
|
|
|
ff_sws_op_list_print(ctx, AV_LOG_VERBOSE, AV_LOG_TRACE, ops);
|
|
return 0;
|
|
}
|
|
|
|
return AVERROR(ENOTSUP);
|
|
}
|
|
|
|
void ff_sws_compiled_op_unref(SwsCompiledOp *comp)
|
|
{
|
|
if (comp->free)
|
|
comp->free(comp->priv);
|
|
|
|
*comp = (SwsCompiledOp) {0};
|
|
}
|
|
|
|
static void op_pass_free(void *ptr)
|
|
{
|
|
SwsOpPass *p = ptr;
|
|
if (!p)
|
|
return;
|
|
|
|
ff_sws_compiled_op_unref(&p->comp);
|
|
av_refstruct_unref(&p->offsets_y);
|
|
av_free(p->exec_base.in_bump_y);
|
|
av_free(p->exec_base.in_offset_x);
|
|
av_free(p->tail_buf);
|
|
av_free(p);
|
|
}
|
|
|
|
static inline void get_row_data(const SwsOpPass *p, const int y_dst,
|
|
const uint8_t *in[4], uint8_t *out[4])
|
|
{
|
|
const SwsOpExec *base = &p->exec_base;
|
|
const int y_src = p->offsets_y ? p->offsets_y[y_dst] : y_dst;
|
|
for (int i = 0; i < p->planes_in; i++)
|
|
in[i] = base->in[i] + (y_src >> base->in_sub_y[i]) * base->in_stride[i];
|
|
for (int i = 0; i < p->planes_out; i++)
|
|
out[i] = base->out[i] + (y_dst >> base->out_sub_y[i]) * base->out_stride[i];
|
|
}
|
|
|
|
static inline size_t pixel_bytes(size_t pixels, int pixel_bits,
|
|
enum AVRounding rounding)
|
|
{
|
|
const uint64_t bits = (uint64_t) pixels * pixel_bits;
|
|
switch (rounding) {
|
|
case AV_ROUND_ZERO:
|
|
case AV_ROUND_DOWN:
|
|
return bits >> 3;
|
|
case AV_ROUND_INF:
|
|
case AV_ROUND_UP:
|
|
return (bits + 7) >> 3;
|
|
default:
|
|
av_unreachable("Invalid rounding mode");
|
|
return (size_t) -1;
|
|
}
|
|
}
|
|
|
|
static size_t safe_bytes_pad(int linesize, int plane_pad)
|
|
{
|
|
av_assert1(linesize);
|
|
int64_t safe_bytes = FFABS((int64_t) linesize) - plane_pad;
|
|
return FFMAX(safe_bytes, 0);
|
|
}
|
|
|
|
static size_t safe_blocks_offset(size_t num_blocks, unsigned block_size,
|
|
ptrdiff_t safe_offset,
|
|
const int32_t *offset_bytes)
|
|
{
|
|
size_t safe_blocks = num_blocks;
|
|
while (safe_blocks && offset_bytes[safe_blocks * block_size - 1] > safe_offset)
|
|
safe_blocks--;
|
|
return safe_blocks;
|
|
}
|
|
|
|
static int op_pass_setup(const SwsFrame *out, const SwsFrame *in,
|
|
const SwsPass *pass)
|
|
{
|
|
const AVPixFmtDescriptor *indesc = av_pix_fmt_desc_get(in->format);
|
|
const AVPixFmtDescriptor *outdesc = av_pix_fmt_desc_get(out->format);
|
|
|
|
SwsOpPass *p = pass->priv;
|
|
SwsOpExec *exec = &p->exec_base;
|
|
const SwsCompiledOp *comp = &p->comp;
|
|
|
|
/* Set up main loop parameters */
|
|
const unsigned block_size = comp->block_size;
|
|
const size_t num_blocks = (pass->width + block_size - 1) / block_size;
|
|
const size_t aligned_w = num_blocks * block_size;
|
|
if (aligned_w < pass->width) /* overflow */
|
|
return AVERROR(EINVAL);
|
|
p->num_blocks = num_blocks;
|
|
p->memcpy_first = false;
|
|
p->memcpy_last = false;
|
|
p->memcpy_out = false;
|
|
|
|
size_t safe_blocks = num_blocks;
|
|
for (int i = 0; i < p->planes_in; i++) {
|
|
int idx = p->idx_in[i];
|
|
int chroma = idx == 1 || idx == 2;
|
|
int sub_x = chroma ? indesc->log2_chroma_w : 0;
|
|
int sub_y = chroma ? indesc->log2_chroma_h : 0;
|
|
size_t safe_bytes = safe_bytes_pad(in->linesize[idx], comp->over_read);
|
|
size_t safe_blocks_in;
|
|
if (exec->in_offset_x) {
|
|
size_t filter_size = pixel_bytes(p->filter_size, p->pixel_bits_in,
|
|
AV_ROUND_UP);
|
|
safe_blocks_in = safe_blocks_offset(num_blocks, block_size,
|
|
safe_bytes - filter_size,
|
|
exec->in_offset_x);
|
|
} else {
|
|
safe_blocks_in = safe_bytes / exec->block_size_in;
|
|
}
|
|
|
|
if (safe_blocks_in < num_blocks) {
|
|
p->memcpy_first |= in->linesize[idx] < 0;
|
|
p->memcpy_last |= in->linesize[idx] > 0;
|
|
safe_blocks = FFMIN(safe_blocks, safe_blocks_in);
|
|
}
|
|
|
|
size_t loop_size = num_blocks * exec->block_size_in;
|
|
exec->in[i] = in->data[idx];
|
|
exec->in_stride[i] = in->linesize[idx];
|
|
exec->in_bump[i] = in->linesize[idx] - loop_size;
|
|
exec->in_sub_y[i] = sub_y;
|
|
exec->in_sub_x[i] = sub_x;
|
|
}
|
|
|
|
for (int i = 0; i < p->planes_out; i++) {
|
|
int idx = p->idx_out[i];
|
|
int chroma = idx == 1 || idx == 2;
|
|
int sub_x = chroma ? outdesc->log2_chroma_w : 0;
|
|
int sub_y = chroma ? outdesc->log2_chroma_h : 0;
|
|
size_t safe_bytes = safe_bytes_pad(out->linesize[idx], comp->over_write);
|
|
size_t safe_blocks_out = safe_bytes / exec->block_size_out;
|
|
if (safe_blocks_out < num_blocks) {
|
|
p->memcpy_out = true;
|
|
safe_blocks = FFMIN(safe_blocks, safe_blocks_out);
|
|
}
|
|
|
|
size_t loop_size = num_blocks * exec->block_size_out;
|
|
exec->out[i] = out->data[idx];
|
|
exec->out_stride[i] = out->linesize[idx];
|
|
exec->out_bump[i] = out->linesize[idx] - loop_size;
|
|
exec->out_sub_y[i] = sub_y;
|
|
exec->out_sub_x[i] = sub_x;
|
|
}
|
|
|
|
const bool memcpy_in = p->memcpy_first || p->memcpy_last;
|
|
if (!memcpy_in && !p->memcpy_out) {
|
|
av_assert0(safe_blocks == num_blocks);
|
|
return 0;
|
|
}
|
|
|
|
/* Set-up tail section parameters and buffers */
|
|
SwsOpExec *tail = &p->exec_tail;
|
|
const int align = av_cpu_max_align();
|
|
size_t alloc_size = 0;
|
|
*tail = *exec;
|
|
|
|
const size_t safe_width = safe_blocks * block_size;
|
|
const size_t tail_size = pass->width - safe_width;
|
|
p->tail_off_out = pixel_bytes(safe_width, p->pixel_bits_out, AV_ROUND_DOWN);
|
|
p->tail_size_out = pixel_bytes(tail_size, p->pixel_bits_out, AV_ROUND_UP);
|
|
p->tail_blocks = num_blocks - safe_blocks;
|
|
|
|
if (exec->in_offset_x) {
|
|
p->tail_off_in = exec->in_offset_x[safe_width];
|
|
p->tail_size_in = exec->in_offset_x[pass->width - 1] - p->tail_off_in;
|
|
p->tail_size_in += pixel_bytes(p->filter_size, p->pixel_bits_in, AV_ROUND_UP);
|
|
} else {
|
|
p->tail_off_in = pixel_bytes(safe_width, p->pixel_bits_in, AV_ROUND_DOWN);
|
|
p->tail_size_in = pixel_bytes(tail_size, p->pixel_bits_in, AV_ROUND_UP);
|
|
}
|
|
|
|
const size_t alloc_width = aligned_w - safe_width;
|
|
for (int i = 0; memcpy_in && i < p->planes_in; i++) {
|
|
size_t needed_size;
|
|
if (exec->in_offset_x) {
|
|
/* The input offset map is already padded to multiples of the block
|
|
* size, and clamps the input offsets to the image boundaries; so
|
|
* we just need to compensate for the comp->over_read */
|
|
needed_size = p->tail_size_in;
|
|
} else {
|
|
needed_size = pixel_bytes(alloc_width, p->pixel_bits_in, AV_ROUND_UP);
|
|
}
|
|
size_t loop_size = p->tail_blocks * exec->block_size_in;
|
|
tail->in_stride[i] = FFALIGN(needed_size + comp->over_read, align);
|
|
tail->in_bump[i] = tail->in_stride[i] - loop_size;
|
|
alloc_size += tail->in_stride[i] * in->height;
|
|
}
|
|
|
|
for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
|
|
size_t needed_size = pixel_bytes(alloc_width, p->pixel_bits_out, AV_ROUND_UP);
|
|
size_t loop_size = p->tail_blocks * exec->block_size_out;
|
|
tail->out_stride[i] = FFALIGN(needed_size + comp->over_write, align);
|
|
tail->out_bump[i] = tail->out_stride[i] - loop_size;
|
|
alloc_size += tail->out_stride[i] * out->height;
|
|
}
|
|
|
|
if (memcpy_in && exec->in_offset_x) {
|
|
/* `in_offset_x` is indexed relative to the line start, not the start
|
|
* of the section being processed; so we need to over-allocate this
|
|
* array to the full width of the image, even though we will only
|
|
* partially fill in the offsets relevant to the tail region */
|
|
alloc_size += aligned_w * sizeof(*exec->in_offset_x);
|
|
}
|
|
|
|
av_fast_mallocz(&p->tail_buf, &p->tail_buf_size, alloc_size);
|
|
if (!p->tail_buf)
|
|
return AVERROR(ENOMEM);
|
|
|
|
uint8_t *tail_buf = p->tail_buf;
|
|
for (int i = 0; memcpy_in && i < p->planes_in; i++) {
|
|
tail->in[i] = tail_buf;
|
|
tail_buf += tail->in_stride[i] * in->height;
|
|
}
|
|
|
|
for (int i = 0; p->memcpy_out && i < p->planes_out; i++) {
|
|
tail->out[i] = tail_buf;
|
|
tail_buf += tail->out_stride[i] * out->height;
|
|
}
|
|
|
|
if (memcpy_in && exec->in_offset_x) {
|
|
tail->in_offset_x = (int32_t *) tail_buf;
|
|
for (int i = safe_width; i < aligned_w; i++)
|
|
tail->in_offset_x[i] = exec->in_offset_x[i] - p->tail_off_in;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void copy_lines(uint8_t *dst, const size_t dst_stride,
|
|
const uint8_t *src, const size_t src_stride,
|
|
const int h, const size_t bytes)
|
|
{
|
|
for (int y = 0; y < h; y++) {
|
|
memcpy(dst, src, bytes);
|
|
dst += dst_stride;
|
|
src += src_stride;
|
|
}
|
|
}
|
|
|
|
static void op_pass_run(const SwsFrame *out, const SwsFrame *in, const int y,
|
|
const int h, const SwsPass *pass)
|
|
{
|
|
const SwsOpPass *p = pass->priv;
|
|
const SwsCompiledOp *comp = &p->comp;
|
|
|
|
/* Fill exec metadata for this slice */
|
|
DECLARE_ALIGNED_32(SwsOpExec, exec) = p->exec_base;
|
|
exec.slice_y = y;
|
|
exec.slice_h = h;
|
|
|
|
/**
|
|
* To ensure safety, we need to consider the following:
|
|
*
|
|
* 1. We can overread the input, unless this is the last line of an
|
|
* unpadded buffer. All defined operations can handle arbitrary pixel
|
|
* input, so overread of arbitrary data is fine. For flipped images,
|
|
* this condition is actually *inverted* to where the first line is
|
|
* the one at the end of the buffer.
|
|
*
|
|
* 2. We can overwrite the output, as long as we don't write more than the
|
|
* amount of pixels that fit into one linesize. So we always need to
|
|
* memcpy the last column on the output side if unpadded.
|
|
*/
|
|
|
|
const bool memcpy_in = p->memcpy_last && y + h == pass->height ||
|
|
p->memcpy_first && y == 0;
|
|
const bool memcpy_out = p->memcpy_out;
|
|
const size_t num_blocks = p->num_blocks;
|
|
const size_t tail_blocks = p->tail_blocks;
|
|
|
|
get_row_data(p, y, exec.in, exec.out);
|
|
if (!memcpy_in && !memcpy_out) {
|
|
/* Fast path (fully aligned/padded inputs and outputs) */
|
|
comp->func(&exec, comp->priv, 0, y, num_blocks, y + h);
|
|
return;
|
|
}
|
|
|
|
/* Non-aligned case (slow path); process main blocks as normal, and
|
|
* a separate tail (via memcpy into an appropriately padded buffer) */
|
|
if (num_blocks > tail_blocks) {
|
|
for (int i = 0; i < 4; i++) {
|
|
/* We process fewer blocks, so the in_bump needs to be increased
|
|
* to reflect that the plane pointers are left on the last block,
|
|
* not the end of the processed line, after each loop iteration */
|
|
exec.in_bump[i] += exec.block_size_in * tail_blocks;
|
|
exec.out_bump[i] += exec.block_size_out * tail_blocks;
|
|
}
|
|
|
|
comp->func(&exec, comp->priv, 0, y, num_blocks - tail_blocks, y + h);
|
|
}
|
|
|
|
DECLARE_ALIGNED_32(SwsOpExec, tail) = p->exec_tail;
|
|
tail.slice_y = y;
|
|
tail.slice_h = h;
|
|
|
|
for (int i = 0; i < p->planes_in; i++) {
|
|
/* Input offsets are relative to the base pointer */
|
|
if (!exec.in_offset_x || memcpy_in)
|
|
exec.in[i] += p->tail_off_in;
|
|
tail.in[i] += y * tail.in_stride[i];
|
|
}
|
|
for (int i = 0; i < p->planes_out; i++) {
|
|
exec.out[i] += p->tail_off_out;
|
|
tail.out[i] += y * tail.out_stride[i];
|
|
}
|
|
|
|
for (int i = 0; i < p->planes_in; i++) {
|
|
if (memcpy_in) {
|
|
copy_lines((uint8_t *) tail.in[i], tail.in_stride[i],
|
|
exec.in[i], exec.in_stride[i], h, p->tail_size_in);
|
|
} else {
|
|
/* Reuse input pointers directly */
|
|
const size_t loop_size = tail_blocks * exec.block_size_in;
|
|
tail.in[i] = exec.in[i];
|
|
tail.in_stride[i] = exec.in_stride[i];
|
|
tail.in_bump[i] = exec.in_stride[i] - loop_size;
|
|
}
|
|
}
|
|
|
|
for (int i = 0; !memcpy_out && i < p->planes_out; i++) {
|
|
/* Reuse output pointers directly */
|
|
const size_t loop_size = tail_blocks * exec.block_size_out;
|
|
tail.out[i] = exec.out[i];
|
|
tail.out_stride[i] = exec.out_stride[i];
|
|
tail.out_bump[i] = exec.out_stride[i] - loop_size;
|
|
}
|
|
|
|
/* Dispatch kernel over tail */
|
|
av_assert1(tail_blocks > 0);
|
|
comp->func(&tail, comp->priv, num_blocks - tail_blocks, y, num_blocks, y + h);
|
|
|
|
for (int i = 0; memcpy_out && i < p->planes_out; i++) {
|
|
copy_lines(exec.out[i], exec.out_stride[i],
|
|
tail.out[i], tail.out_stride[i], h, p->tail_size_out);
|
|
}
|
|
}
|
|
|
|
static int rw_planes(const SwsOp *op)
|
|
{
|
|
return op->rw.packed ? 1 : op->rw.elems;
|
|
}
|
|
|
|
static int rw_pixel_bits(const SwsOp *op)
|
|
{
|
|
const int elems = op->rw.packed ? op->rw.elems : 1;
|
|
const int size = ff_sws_pixel_type_size(op->type);
|
|
const int bits = 8 >> op->rw.frac;
|
|
av_assert1(bits >= 1);
|
|
return elems * size * bits;
|
|
}
|
|
|
|
static void align_pass(SwsPass *pass, int block_size, int over_rw, int pixel_bits)
|
|
{
|
|
if (!pass)
|
|
return;
|
|
|
|
/* Add at least as many pixels as needed to cover the padding requirement */
|
|
const int pad = (over_rw * 8 + pixel_bits - 1) / pixel_bits;
|
|
|
|
SwsPassBuffer *buf = pass->output;
|
|
buf->width_align = FFMAX(buf->width_align, block_size);
|
|
buf->width_pad = FFMAX(buf->width_pad, pad);
|
|
}
|
|
|
|
static int compile(SwsGraph *graph, const SwsOpList *ops, SwsPass *input,
|
|
SwsPass **output)
|
|
{
|
|
SwsContext *ctx = graph->ctx;
|
|
SwsOpPass *p = av_mallocz(sizeof(*p));
|
|
if (!p)
|
|
return AVERROR(ENOMEM);
|
|
|
|
int ret = ff_sws_ops_compile(ctx, ops, &p->comp);
|
|
if (ret < 0)
|
|
goto fail;
|
|
|
|
const SwsCompiledOp *comp = &p->comp;
|
|
const SwsFormat *dst = &ops->dst;
|
|
if (p->comp.opaque) {
|
|
SwsCompiledOp c = *comp;
|
|
av_free(p);
|
|
return ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
|
|
input, c.slice_align, c.func_opaque,
|
|
NULL, c.priv, c.free, output);
|
|
}
|
|
|
|
const SwsOp *read = ff_sws_op_list_input(ops);
|
|
const SwsOp *write = ff_sws_op_list_output(ops);
|
|
p->planes_in = rw_planes(read);
|
|
p->planes_out = rw_planes(write);
|
|
p->pixel_bits_in = rw_pixel_bits(read);
|
|
p->pixel_bits_out = rw_pixel_bits(write);
|
|
p->exec_base = (SwsOpExec) {
|
|
.width = dst->width,
|
|
.height = dst->height,
|
|
};
|
|
|
|
const int64_t block_bits_in = (int64_t) comp->block_size * p->pixel_bits_in;
|
|
const int64_t block_bits_out = (int64_t) comp->block_size * p->pixel_bits_out;
|
|
if (block_bits_in & 0x7 || block_bits_out & 0x7) {
|
|
av_log(ctx, AV_LOG_ERROR, "Block size must be a multiple of the pixel size.\n");
|
|
ret = AVERROR(EINVAL);
|
|
goto fail;
|
|
}
|
|
|
|
p->exec_base.block_size_in = block_bits_in >> 3;
|
|
p->exec_base.block_size_out = block_bits_out >> 3;
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
p->idx_in[i] = i < p->planes_in ? ops->plane_src[i] : -1;
|
|
p->idx_out[i] = i < p->planes_out ? ops->plane_dst[i] : -1;
|
|
}
|
|
|
|
const SwsFilterWeights *filter = read->rw.kernel;
|
|
if (read->rw.filter == SWS_OP_FILTER_V) {
|
|
p->offsets_y = av_refstruct_ref(filter->offsets);
|
|
|
|
/* Compute relative pointer bumps for each output line */
|
|
int32_t *bump = av_malloc_array(filter->dst_size, sizeof(*bump));
|
|
if (!bump) {
|
|
ret = AVERROR(ENOMEM);
|
|
goto fail;
|
|
}
|
|
|
|
int line = filter->offsets[0];
|
|
for (int y = 0; y < filter->dst_size - 1; y++) {
|
|
int next = filter->offsets[y + 1];
|
|
bump[y] = next - line - 1;
|
|
line = next;
|
|
}
|
|
bump[filter->dst_size - 1] = 0;
|
|
p->exec_base.in_bump_y = bump;
|
|
} else if (read->rw.filter == SWS_OP_FILTER_H) {
|
|
/* Compute pixel offset map for each output line */
|
|
const int pixels = FFALIGN(filter->dst_size, p->comp.block_size);
|
|
int32_t *offset = av_malloc_array(pixels, sizeof(*offset));
|
|
if (!offset) {
|
|
ret = AVERROR(ENOMEM);
|
|
goto fail;
|
|
}
|
|
|
|
for (int x = 0; x < filter->dst_size; x++) {
|
|
/* Sanity check; if the tap would land on a half-pixel, we cannot
|
|
* reasonably expect the implementation to know about this. Just
|
|
* error out in such (theoretical) cases. */
|
|
int64_t bits = (int64_t) filter->offsets[x] * p->pixel_bits_in;
|
|
if ((bits & 0x7) || (bits >> 3) > INT32_MAX) {
|
|
ret = AVERROR(EINVAL);
|
|
goto fail;
|
|
}
|
|
offset[x] = bits >> 3;
|
|
}
|
|
for (int x = filter->dst_size; x < pixels; x++)
|
|
offset[x] = offset[filter->dst_size - 1];
|
|
p->exec_base.in_offset_x = offset;
|
|
p->exec_base.block_size_in = 0; /* ptr does not advance */
|
|
p->filter_size = filter->filter_size;
|
|
}
|
|
|
|
ret = ff_sws_graph_add_pass(graph, dst->format, dst->width, dst->height,
|
|
input, comp->slice_align, op_pass_run,
|
|
op_pass_setup, p, op_pass_free, output);
|
|
if (ret < 0)
|
|
return ret;
|
|
|
|
align_pass(input, comp->block_size, comp->over_read, p->pixel_bits_in);
|
|
align_pass(*output, comp->block_size, comp->over_write, p->pixel_bits_out);
|
|
return 0;
|
|
|
|
fail:
|
|
op_pass_free(p);
|
|
return ret;
|
|
}
|
|
|
|
int ff_sws_compile_pass(SwsGraph *graph, SwsOpList **pops, int flags,
|
|
SwsPass *input, SwsPass **output)
|
|
{
|
|
const int passes_orig = graph->num_passes;
|
|
SwsContext *ctx = graph->ctx;
|
|
SwsOpList *ops = *pops;
|
|
int ret = 0;
|
|
|
|
/* Check if the whole operation graph is an end-to-end no-op */
|
|
if (ff_sws_op_list_is_noop(ops)) {
|
|
*output = input;
|
|
goto out;
|
|
}
|
|
|
|
const SwsOp *read = ff_sws_op_list_input(ops);
|
|
const SwsOp *write = ff_sws_op_list_output(ops);
|
|
if (!read || !write) {
|
|
av_log(ctx, AV_LOG_ERROR, "First and last operations must be a read "
|
|
"and write, respectively.\n");
|
|
ret = AVERROR(EINVAL);
|
|
goto out;
|
|
}
|
|
|
|
if (flags & SWS_OP_FLAG_OPTIMIZE) {
|
|
ret = ff_sws_op_list_optimize(ops);
|
|
if (ret < 0)
|
|
goto out;
|
|
av_log(ctx, AV_LOG_DEBUG, "Operation list after optimizing:\n");
|
|
ff_sws_op_list_print(ctx, AV_LOG_DEBUG, AV_LOG_TRACE, ops);
|
|
}
|
|
|
|
ret = compile(graph, ops, input, output);
|
|
if (ret != AVERROR(ENOTSUP))
|
|
goto out;
|
|
|
|
av_log(ctx, AV_LOG_DEBUG, "Retrying with separated filter passes.\n");
|
|
SwsPass *prev = input;
|
|
while (ops) {
|
|
SwsOpList *rest;
|
|
ret = ff_sws_op_list_subpass(ops, &rest);
|
|
if (ret < 0)
|
|
goto out;
|
|
|
|
if (prev == input && !rest) {
|
|
/* No point in compiling an unsplit pass again */
|
|
ret = AVERROR(ENOTSUP);
|
|
goto out;
|
|
}
|
|
|
|
ret = compile(graph, ops, prev, &prev);
|
|
if (ret < 0) {
|
|
ff_sws_op_list_free(&rest);
|
|
goto out;
|
|
}
|
|
|
|
ff_sws_op_list_free(&ops);
|
|
ops = rest;
|
|
}
|
|
|
|
/* Return last subpass successfully compiled */
|
|
av_log(ctx, AV_LOG_VERBOSE, "Using %d separate passes.\n",
|
|
graph->num_passes - passes_orig);
|
|
*output = prev;
|
|
|
|
out:
|
|
if (ret == AVERROR(ENOTSUP)) {
|
|
av_log(ctx, AV_LOG_WARNING, "No backend found for operations:\n");
|
|
ff_sws_op_list_print(ctx, AV_LOG_WARNING, AV_LOG_TRACE, ops);
|
|
}
|
|
if (ret < 0)
|
|
ff_sws_graph_rollback(graph, passes_orig);
|
|
ff_sws_op_list_free(&ops);
|
|
*pops = NULL;
|
|
return ret;
|
|
}
|