mirror of
https://git.ffmpeg.org/ffmpeg.git
synced 2026-06-17 12:55:22 +00:00
Add NEON-optimized implementations for HEVC angular intra prediction
modes 10 (pure horizontal) and 26 (pure vertical) at 8-bit depth.
Mode 10 (Horizontal):
- Broadcasts left[y] to fill each row using ld2r/ld4r for efficiency
- Applies edge smoothing for luma blocks smaller than 32x32
Mode 26 (Vertical):
- Copies top reference row to all output rows
- Applies edge smoothing for luma blocks smaller than 32x32
Edge smoothing uses uhsub+usqadd to compute the filtered result
directly in 8-bit, avoiding widening to 16-bit intermediates.
The C pred_angular wrappers are made non-static with ff_ prefix to
allow the NEON dispatch to fall back to C for modes not yet optimized.
This will be reverted once all angular modes are implemented.
Note: since pred_angular[] is a per-size function pointer (not
per-mode), checkasm benchmarks will show '_neon' for all 33 modes
even though only modes 10/26 are truly accelerated; unoptimized
modes show ~1.0x speedup as they pass through the NEON wrapper to
the C fallback with negligible overhead.
Speedup over C on Apple M4 (checkasm --bench, 15-run average):
Mode 10 (Horizontal):
4x4: 4.66x 8x8: 5.80x 16x16: 16.86x 32x32: 24.89x
Mode 26 (Vertical):
4x4: 1.16x 8x8: 1.83x 16x16: 2.45x 32x32: 4.50x
Signed-off-by: Jun Zhao <barryjzhao@tencent.com>
77 lines
3.3 KiB
C
77 lines
3.3 KiB
C
/*
|
|
* HEVC video Decoder
|
|
*
|
|
* Copyright (C) 2012 - 2013 Guillaume Martres
|
|
*
|
|
* This file is part of FFmpeg.
|
|
*
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
*/
|
|
|
|
#ifndef AVCODEC_HEVC_PRED_H
|
|
#define AVCODEC_HEVC_PRED_H
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
|
|
struct HEVCLocalContext;
|
|
struct HEVCPPS;
|
|
|
|
typedef struct HEVCPredContext {
|
|
void (*intra_pred[4])(struct HEVCLocalContext *lc,
|
|
const struct HEVCPPS *pps, int x0, int y0, int c_idx);
|
|
|
|
void (*pred_planar[4])(uint8_t *src, const uint8_t *top,
|
|
const uint8_t *left, ptrdiff_t stride);
|
|
void (*pred_dc)(uint8_t *src, const uint8_t *top, const uint8_t *left,
|
|
ptrdiff_t stride, int log2_size, int c_idx);
|
|
void (*pred_angular[4])(uint8_t *src, const uint8_t *top,
|
|
const uint8_t *left, ptrdiff_t stride,
|
|
int c_idx, int mode);
|
|
|
|
void (*ref_filter_3tap[3])(uint8_t *filtered_left, uint8_t *filtered_top,
|
|
const uint8_t *left, const uint8_t *top,
|
|
int size);
|
|
void (*ref_filter_strong)(uint8_t *filtered_top, uint8_t *left,
|
|
const uint8_t *top);
|
|
} HEVCPredContext;
|
|
|
|
void ff_hevc_pred_init(HEVCPredContext *hpc, int bit_depth);
|
|
void ff_hevc_pred_init_mips(HEVCPredContext *hpc, int bit_depth);
|
|
void ff_hevc_pred_init_aarch64(HEVCPredContext *hpc, int bit_depth);
|
|
|
|
/* C angular prediction fallbacks (non-static for arch-specific partial override) */
|
|
#define HEVC_PRED_ANGULAR_DECL(depth) \
|
|
void ff_hevc_pred_angular_0_ ## depth(uint8_t *src, const uint8_t *top, \
|
|
const uint8_t *left, ptrdiff_t stride, \
|
|
int c_idx, int mode); \
|
|
void ff_hevc_pred_angular_1_ ## depth(uint8_t *src, const uint8_t *top, \
|
|
const uint8_t *left, ptrdiff_t stride, \
|
|
int c_idx, int mode); \
|
|
void ff_hevc_pred_angular_2_ ## depth(uint8_t *src, const uint8_t *top, \
|
|
const uint8_t *left, ptrdiff_t stride, \
|
|
int c_idx, int mode); \
|
|
void ff_hevc_pred_angular_3_ ## depth(uint8_t *src, const uint8_t *top, \
|
|
const uint8_t *left, ptrdiff_t stride, \
|
|
int c_idx, int mode);
|
|
|
|
HEVC_PRED_ANGULAR_DECL(8)
|
|
HEVC_PRED_ANGULAR_DECL(9)
|
|
HEVC_PRED_ANGULAR_DECL(10)
|
|
HEVC_PRED_ANGULAR_DECL(12)
|
|
|
|
#undef HEVC_PRED_ANGULAR_DECL
|
|
|
|
#endif /* AVCODEC_HEVC_PRED_H */
|