ffmpeg/libavcodec/osq.c
Andreas Rheinhardt 12c2a4e117 avcodec: Don't set AVCodec.sample_fmts,ch_layouts for decoders
It is pointless for them given that these values will
be overridden by the decoder lateron anyway.
The only exceptions to this are scenarios where the decoder
actually checks request_sample_fmt or where there are both
fixed- and floating point decoders.

(IMO something like get_format() for audio would be better for
both cases.)

Reviewed-by: James Almer <jamrial@gmail.com>
Signed-off-by: Andreas Rheinhardt <andreas.rheinhardt@outlook.com>
2026-02-11 12:07:35 +01:00

499 lines
14 KiB
C

/*
* OSQ audio decoder
* Copyright (c) 2023 Paul B Mahol
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/internal.h"
#include "libavutil/intreadwrite.h"
#include "libavutil/mem.h"
#include "avcodec.h"
#include "codec_internal.h"
#include "decode.h"
#include "internal.h"
#define BITSTREAM_READER_LE
#include "get_bits.h"
#include "unary.h"
#define OFFSET 5
typedef struct OSQChannel {
unsigned prediction;
unsigned coding_mode;
unsigned residue_parameter;
unsigned residue_bits;
unsigned history[3];
unsigned pos, count;
double sum;
int32_t prev;
} OSQChannel;
typedef struct OSQContext {
GetBitContext gb;
OSQChannel ch[2];
uint8_t *bitstream;
size_t max_framesize;
size_t bitstream_size;
int factor;
int decorrelate;
int frame_samples;
uint64_t nb_samples;
int32_t *decode_buffer[2];
AVPacket *pkt;
int pkt_offset;
} OSQContext;
static av_cold void osq_flush(AVCodecContext *avctx)
{
OSQContext *s = avctx->priv_data;
s->bitstream_size = 0;
s->pkt_offset = 0;
}
static av_cold int osq_close(AVCodecContext *avctx)
{
OSQContext *s = avctx->priv_data;
av_freep(&s->bitstream);
s->bitstream_size = 0;
for (int ch = 0; ch < FF_ARRAY_ELEMS(s->decode_buffer); ch++)
av_freep(&s->decode_buffer[ch]);
return 0;
}
static av_cold int osq_init(AVCodecContext *avctx)
{
OSQContext *s = avctx->priv_data;
if (avctx->extradata_size < 48)
return AVERROR(EINVAL);
if (avctx->extradata[0] != 1) {
av_log(avctx, AV_LOG_ERROR, "Unsupported version.\n");
return AVERROR_INVALIDDATA;
}
avctx->sample_rate = AV_RL32(avctx->extradata + 4);
if (avctx->sample_rate < 1)
return AVERROR_INVALIDDATA;
av_channel_layout_uninit(&avctx->ch_layout);
avctx->ch_layout.order = AV_CHANNEL_ORDER_UNSPEC;
avctx->ch_layout.nb_channels = avctx->extradata[3];
if (avctx->ch_layout.nb_channels < 1)
return AVERROR_INVALIDDATA;
if (avctx->ch_layout.nb_channels > FF_ARRAY_ELEMS(s->decode_buffer))
return AVERROR_INVALIDDATA;
s->factor = 1;
switch (avctx->extradata[2]) {
case 8: avctx->sample_fmt = AV_SAMPLE_FMT_U8P; break;
case 16: avctx->sample_fmt = AV_SAMPLE_FMT_S16P; break;
case 20:
case 24: s->factor = 256;
avctx->sample_fmt = AV_SAMPLE_FMT_S32P; break;
default: return AVERROR_INVALIDDATA;
}
avctx->bits_per_raw_sample = avctx->extradata[2];
s->nb_samples = AV_RL64(avctx->extradata + 16);
s->frame_samples = AV_RL16(avctx->extradata + 8);
s->max_framesize = (s->frame_samples * 16 + 1024) * avctx->ch_layout.nb_channels;
s->bitstream = av_calloc(s->max_framesize + AV_INPUT_BUFFER_PADDING_SIZE, sizeof(*s->bitstream));
if (!s->bitstream)
return AVERROR(ENOMEM);
for (int ch = 0; ch < avctx->ch_layout.nb_channels; ch++) {
s->decode_buffer[ch] = av_calloc(s->frame_samples + OFFSET,
sizeof(*s->decode_buffer[ch]));
if (!s->decode_buffer[ch])
return AVERROR(ENOMEM);
}
s->pkt = avctx->internal->in_pkt;
return 0;
}
static void reset_stats(OSQChannel *cb)
{
memset(cb->history, 0, sizeof(cb->history));
cb->pos = cb->count = cb->sum = 0;
}
static void update_stats(OSQChannel *cb, int val)
{
cb->sum += FFABS((int64_t)val) - cb->history[cb->pos];
cb->history[cb->pos] = FFABS((int64_t)val);
cb->pos++;
cb->count++;
//NOTE for this to make sense count would need to be limited to FF_ARRAY_ELEMS(cb->history)
//Otherwise the average computation later makes no sense
if (cb->pos >= FF_ARRAY_ELEMS(cb->history))
cb->pos = 0;
}
static int update_residue_parameter(OSQChannel *cb)
{
double sum, x;
int rice_k;
sum = cb->sum;
if (!sum)
return 0;
x = sum / cb->count;
av_assert2(x <= 0x80000000U);
rice_k = av_ceil_log2(x);
if (rice_k >= 30) {
double f = floor(sum / 1.4426952 + 0.5);
if (f <= 1) {
rice_k = 1;
} else if (f >= 31) {
rice_k = 31;
} else
rice_k = f;
}
return rice_k;
}
static uint32_t get_urice(GetBitContext *gb, int k)
{
uint32_t z, x, b;
x = get_unary(gb, 1, 512);
b = get_bits_long(gb, k);
z = b | x << k;
return z;
}
static int32_t get_srice(GetBitContext *gb, int x)
{
uint32_t y = get_urice(gb, x);
return get_bits1(gb) ? -y : y;
}
static int osq_channel_parameters(AVCodecContext *avctx, int ch)
{
OSQContext *s = avctx->priv_data;
OSQChannel *cb = &s->ch[ch];
GetBitContext *gb = &s->gb;
cb->prev = 0;
cb->prediction = get_urice(gb, 5);
cb->coding_mode = get_urice(gb, 3);
if (cb->prediction >= 15)
return AVERROR_INVALIDDATA;
if (cb->coding_mode > 0 && cb->coding_mode < 3) {
cb->residue_parameter = get_urice(gb, 4);
if (!cb->residue_parameter || cb->residue_parameter >= 31)
return AVERROR_INVALIDDATA;
if (cb->coding_mode == 2)
avpriv_request_sample(avctx, "coding mode 2");
} else if (cb->coding_mode == 3) {
cb->residue_bits = get_urice(gb, 4);
if (!cb->residue_bits || cb->residue_bits >= 31)
return AVERROR_INVALIDDATA;
} else if (cb->coding_mode) {
return AVERROR_INVALIDDATA;
}
if (cb->coding_mode == 2)
reset_stats(cb);
return 0;
}
#define A (-1)
#define B (-2)
#define C (-3)
#define D (-4)
#define E (-5)
#define P2 (((unsigned)dst[A] + dst[A]) - dst[B])
#define P3 (((unsigned)dst[A] - dst[B]) * 3 + dst[C])
static int do_decode(AVCodecContext *avctx, AVFrame *frame, int decorrelate, int downsample)
{
OSQContext *s = avctx->priv_data;
const int nb_channels = avctx->ch_layout.nb_channels;
const int nb_samples = frame->nb_samples;
GetBitContext *gb = &s->gb;
for (int n = 0; n < nb_samples; n++) {
for (int ch = 0; ch < nb_channels; ch++) {
OSQChannel *cb = &s->ch[ch];
int32_t *dst = s->decode_buffer[ch] + OFFSET;
int32_t p, prev = cb->prev;
if (nb_channels == 2 && ch == 1 && decorrelate != s->decorrelate) {
if (!decorrelate) {
s->decode_buffer[1][OFFSET+A] += s->decode_buffer[0][OFFSET+B];
s->decode_buffer[1][OFFSET+B] += s->decode_buffer[0][OFFSET+C];
s->decode_buffer[1][OFFSET+C] += s->decode_buffer[0][OFFSET+D];
s->decode_buffer[1][OFFSET+D] += s->decode_buffer[0][OFFSET+E];
} else {
s->decode_buffer[1][OFFSET+A] -= s->decode_buffer[0][OFFSET+B];
s->decode_buffer[1][OFFSET+B] -= s->decode_buffer[0][OFFSET+C];
s->decode_buffer[1][OFFSET+C] -= s->decode_buffer[0][OFFSET+D];
s->decode_buffer[1][OFFSET+D] -= s->decode_buffer[0][OFFSET+E];
}
s->decorrelate = decorrelate;
}
if (!cb->coding_mode) {
dst[n] = 0;
} else if (cb->coding_mode == 3) {
dst[n] = get_sbits_long(gb, cb->residue_bits);
} else {
dst[n] = get_srice(gb, cb->residue_parameter);
}
if (get_bits_left(gb) < 0) {
av_log(avctx, AV_LOG_ERROR, "overread!\n");
return AVERROR_INVALIDDATA;
}
p = prev / 2;
prev = dst[n];
switch (cb->prediction) {
case 0:
break;
case 1:
dst[n] += (unsigned)dst[A];
break;
case 2:
dst[n] += (unsigned)dst[A] + p;
break;
case 3:
dst[n] += P2;
break;
case 4:
dst[n] += P2 + p;
break;
case 5:
dst[n] += P3;
break;
case 6:
dst[n] += P3 + p;
break;
case 7:
dst[n] += (int)(P2 + P3) / 2 + (unsigned)p;
break;
case 8:
dst[n] += (int)(P2 + P3) / 2 + 0U;
break;
case 9:
dst[n] += (int)(P2 * 2 + P3) / 3 + (unsigned)p;
break;
case 10:
dst[n] += (int)(P2 + P3 * 2) / 3 + (unsigned)p;
break;
case 11:
dst[n] += (int)((unsigned)dst[A] + dst[B]) / 2 + 0U;
break;
case 12:
dst[n] += (unsigned)dst[B];
break;
case 13:
dst[n] += (int)((unsigned)dst[D] + dst[B]) / 2 + 0U;
break;
case 14:
dst[n] += (int)((unsigned)P2 + dst[A]) / 2 + (unsigned)p;
break;
default:
return AVERROR_INVALIDDATA;
}
cb->prev = prev;
if (downsample)
dst[n] *= 256U;
dst[E] = dst[D];
dst[D] = dst[C];
dst[C] = dst[B];
dst[B] = dst[A];
dst[A] = dst[n];
if (cb->coding_mode == 2) {
update_stats(cb, dst[n]);
cb->residue_parameter = update_residue_parameter(cb);
}
if (nb_channels == 2 && ch == 1) {
if (decorrelate)
dst[n] += (unsigned)s->decode_buffer[0][OFFSET+n];
}
if (downsample)
dst[A] /= 256;
}
}
return 0;
}
static int osq_decode_block(AVCodecContext *avctx, AVFrame *frame)
{
const int nb_channels = avctx->ch_layout.nb_channels;
const int nb_samples = frame->nb_samples;
OSQContext *s = avctx->priv_data;
const unsigned factor = s->factor;
int ret, decorrelate, downsample;
GetBitContext *gb = &s->gb;
skip_bits1(gb);
decorrelate = get_bits1(gb);
downsample = get_bits1(gb);
for (int ch = 0; ch < nb_channels; ch++) {
if ((ret = osq_channel_parameters(avctx, ch)) < 0) {
av_log(avctx, AV_LOG_ERROR, "invalid channel parameters\n");
return ret;
}
}
if ((ret = do_decode(avctx, frame, decorrelate, downsample)) < 0)
return ret;
align_get_bits(gb);
switch (avctx->sample_fmt) {
case AV_SAMPLE_FMT_U8P:
for (int ch = 0; ch < nb_channels; ch++) {
uint8_t *dst = (uint8_t *)frame->extended_data[ch];
int32_t *src = s->decode_buffer[ch] + OFFSET;
for (int n = 0; n < nb_samples; n++)
dst[n] = av_clip_uint8(src[n] + 0x80ll);
}
break;
case AV_SAMPLE_FMT_S16P:
for (int ch = 0; ch < nb_channels; ch++) {
int16_t *dst = (int16_t *)frame->extended_data[ch];
int32_t *src = s->decode_buffer[ch] + OFFSET;
for (int n = 0; n < nb_samples; n++)
dst[n] = (int16_t)src[n];
}
break;
case AV_SAMPLE_FMT_S32P:
for (int ch = 0; ch < nb_channels; ch++) {
int32_t *dst = (int32_t *)frame->extended_data[ch];
int32_t *src = s->decode_buffer[ch] + OFFSET;
for (int n = 0; n < nb_samples; n++)
dst[n] = src[n] * factor;
}
break;
default:
return AVERROR_BUG;
}
return 0;
}
static int osq_receive_frame(AVCodecContext *avctx, AVFrame *frame)
{
OSQContext *s = avctx->priv_data;
GetBitContext *gb = &s->gb;
int ret, n;
while (s->bitstream_size < s->max_framesize) {
int size;
if (!s->pkt->data) {
ret = ff_decode_get_packet(avctx, s->pkt);
if (ret == AVERROR_EOF && s->bitstream_size > 0)
break;
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
return ret;
if (ret < 0)
goto fail;
}
size = FFMIN(s->pkt->size - s->pkt_offset, s->max_framesize - s->bitstream_size);
memcpy(s->bitstream + s->bitstream_size, s->pkt->data + s->pkt_offset, size);
s->bitstream_size += size;
s->pkt_offset += size;
if (s->pkt_offset == s->pkt->size) {
av_packet_unref(s->pkt);
s->pkt_offset = 0;
}
}
frame->nb_samples = FFMIN(s->frame_samples, s->nb_samples);
if (frame->nb_samples <= 0)
return AVERROR_EOF;
if ((ret = ff_get_buffer(avctx, frame, 0)) < 0)
goto fail;
if ((ret = init_get_bits8(gb, s->bitstream, s->bitstream_size)) < 0)
goto fail;
if ((ret = osq_decode_block(avctx, frame)) < 0)
goto fail;
s->nb_samples -= frame->nb_samples;
n = get_bits_count(gb) / 8;
if (n > s->bitstream_size) {
ret = AVERROR_INVALIDDATA;
goto fail;
}
memmove(s->bitstream, &s->bitstream[n], s->bitstream_size - n);
s->bitstream_size -= n;
return 0;
fail:
s->bitstream_size = 0;
s->pkt_offset = 0;
av_packet_unref(s->pkt);
return ret;
}
const FFCodec ff_osq_decoder = {
.p.name = "osq",
CODEC_LONG_NAME("OSQ (Original Sound Quality)"),
.p.type = AVMEDIA_TYPE_AUDIO,
.p.id = AV_CODEC_ID_OSQ,
.priv_data_size = sizeof(OSQContext),
.init = osq_init,
FF_CODEC_RECEIVE_FRAME_CB(osq_receive_frame),
.close = osq_close,
.p.capabilities = AV_CODEC_CAP_CHANNEL_CONF |
AV_CODEC_CAP_DR1,
.caps_internal = FF_CODEC_CAP_INIT_CLEANUP,
.flush = osq_flush,
};