opus: Update to upstream version 1.1.5

(cherry picked from commit 47e0e530a9)
This commit is contained in:
Rémi Verschelde 2017-05-28 21:15:14 +02:00
parent 15ab3804df
commit 75a6e0f65a
40 changed files with 426 additions and 251 deletions

View file

@ -235,7 +235,7 @@ Files extracted from the upstream source:
## opus ## opus
- Upstream: https://opus-codec.org - Upstream: https://opus-codec.org
- Version: 1.1.4 (opus) and 0.8 (opusfile) - Version: 1.1.5 (opus) and 0.8 (opusfile)
- License: BSD-3-Clause - License: BSD-3-Clause
Files extracted from upstream source: Files extracted from upstream source:

View file

@ -78,6 +78,15 @@ static OPUS_INLINE void _celt_fatal(const char *str, const char *file, int line)
#define UADD32(a,b) ((a)+(b)) #define UADD32(a,b) ((a)+(b))
#define USUB32(a,b) ((a)-(b)) #define USUB32(a,b) ((a)-(b))
/* Set this if opus_int64 is a native type of the CPU. */
/* Assume that all LP64 architectures have fast 64-bit types; also x86_64
(which can be ILP32 for x32) and Win64 (which is LLP64). */
#if defined(__x86_64__) || defined(__LP64__) || defined(_WIN64)
#define OPUS_FAST_INT64 1
#else
#define OPUS_FAST_INT64 0
#endif
#define PRINT_MIPS(file) #define PRINT_MIPS(file)
#ifdef FIXED_POINT #ifdef FIXED_POINT
@ -118,7 +127,9 @@ static OPUS_INLINE opus_int16 SAT16(opus_int32 x) {
#include "fixed_generic.h" #include "fixed_generic.h"
#ifdef OPUS_ARM_INLINE_EDSP #ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/fixed_arm64.h"
#elif OPUS_ARM_INLINE_EDSP
#include "arm/fixed_armv5e.h" #include "arm/fixed_armv5e.h"
#elif defined (OPUS_ARM_INLINE_ASM) #elif defined (OPUS_ARM_INLINE_ASM)
#include "arm/fixed_armv4.h" #include "arm/fixed_armv4.h"

View file

@ -36,6 +36,9 @@
#if defined(OPUS_HAVE_RTCD) #if defined(OPUS_HAVE_RTCD)
# if defined(FIXED_POINT) # if defined(FIXED_POINT)
# if ((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int , int) = { const opus_val16 *, opus_val32 *, int , int) = {
celt_pitch_xcorr_c, /* ARMv4 */ celt_pitch_xcorr_c, /* ARMv4 */
@ -43,8 +46,10 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */ MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */ MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
}; };
# endif
# else /* !FIXED_POINT */ # else /* !FIXED_POINT */
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *, void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = { const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */ celt_pitch_xcorr_c, /* ARMv4 */
@ -55,6 +60,23 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
# endif # endif
# endif /* FIXED_POINT */ # endif /* FIXED_POINT */
#if defined(FIXED_POINT) && defined(OPUS_HAVE_RTCD) && \
defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len
) = {
xcorr_kernel_c, /* ARMv4 */
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
};
#endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
# if defined(HAVE_ARM_NE10) # if defined(HAVE_ARM_NE10)
# if defined(CUSTOM_MODES) # if defined(CUSTOM_MODES)

View file

@ -37,11 +37,12 @@
#include "cpu_support.h" #include "cpu_support.h"
#include "os_support.h" #include "os_support.h"
#include "opus_types.h" #include "opus_types.h"
#include "arch.h"
#define OPUS_CPU_ARM_V4 (1) #define OPUS_CPU_ARM_V4_FLAG (1<<OPUS_ARCH_ARM_V4)
#define OPUS_CPU_ARM_EDSP (1<<1) #define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA (1<<2) #define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON (1<<3) #define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
#if defined(_MSC_VER) #if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@ -55,20 +56,22 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
/* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit /* MSVC has no OPUS_INLINE __asm support for ARM, but it does let you __emit
* instructions via their assembled hex code. * instructions via their assembled hex code.
* All of these instructions should be essentially nops. */ * All of these instructions should be essentially nops. */
# if defined(OPUS_ARM_MAY_HAVE_EDSP) # if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{ __try{
/*PLD [r13]*/ /*PLD [r13]*/
__emit(0xF5DDF000); __emit(0xF5DDF000);
flags|=OPUS_CPU_ARM_EDSP; flags|=OPUS_CPU_ARM_EDSP_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
} }
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) # if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
__try{ __try{
/*SHADD8 r3,r3,r3*/ /*SHADD8 r3,r3,r3*/
__emit(0xE6333F93); __emit(0xE6333F93);
flags|=OPUS_CPU_ARM_MEDIA; flags|=OPUS_CPU_ARM_MEDIA_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
@ -77,7 +80,7 @@ static OPUS_INLINE opus_uint32 opus_cpu_capabilities(void){
__try{ __try{
/*VORR q0,q0,q0*/ /*VORR q0,q0,q0*/
__emit(0xF2200150); __emit(0xF2200150);
flags|=OPUS_CPU_ARM_NEON; flags|=OPUS_CPU_ARM_NEON_FLAG;
} }
__except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){ __except(GetExceptionCode()==EXCEPTION_ILLEGAL_INSTRUCTION){
/*Ignore exception.*/ /*Ignore exception.*/
@ -107,26 +110,26 @@ opus_uint32 opus_cpu_capabilities(void)
while(fgets(buf, 512, cpuinfo) != NULL) while(fgets(buf, 512, cpuinfo) != NULL)
{ {
# if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(OPUS_ARM_MAY_HAVE_EDSP) || defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for edsp and neon flag */ /* Search for edsp and neon flag */
if(memcmp(buf, "Features", 8) == 0) if(memcmp(buf, "Features", 8) == 0)
{ {
char *p; char *p;
# if defined(OPUS_ARM_MAY_HAVE_EDSP)
p = strstr(buf, " edsp"); p = strstr(buf, " edsp");
if(p != NULL && (p[5] == ' ' || p[5] == '\n')) if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_EDSP; flags |= OPUS_CPU_ARM_EDSP_FLAG;
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR) # if defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
p = strstr(buf, " neon"); p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n')) if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON; flags |= OPUS_CPU_ARM_NEON_FLAG;
# endif # endif
} }
# endif # endif
# if defined(OPUS_ARM_MAY_HAVE_MEDIA) # if defined(OPUS_ARM_MAY_HAVE_MEDIA) \
|| defined(OPUS_ARM_MAY_HAVE_NEON) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
/* Search for media capabilities (>= ARMv6) */ /* Search for media capabilities (>= ARMv6) */
if(memcmp(buf, "CPU architecture:", 17) == 0) if(memcmp(buf, "CPU architecture:", 17) == 0)
{ {
@ -134,7 +137,7 @@ opus_uint32 opus_cpu_capabilities(void)
version = atoi(buf+17); version = atoi(buf+17);
if(version >= 6) if(version >= 6)
flags |= OPUS_CPU_ARM_MEDIA; flags |= OPUS_CPU_ARM_MEDIA_FLAG;
} }
# endif # endif
} }
@ -156,18 +159,26 @@ int opus_select_arch(void)
opus_uint32 flags = opus_cpu_capabilities(); opus_uint32 flags = opus_cpu_capabilities();
int arch = 0; int arch = 0;
if(!(flags & OPUS_CPU_ARM_EDSP)) if(!(flags & OPUS_CPU_ARM_EDSP_FLAG)) {
/* Asserts ensure arch values are sequential */
celt_assert(arch == OPUS_ARCH_ARM_V4);
return arch; return arch;
}
arch++; arch++;
if(!(flags & OPUS_CPU_ARM_MEDIA)) if(!(flags & OPUS_CPU_ARM_MEDIA_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_EDSP);
return arch; return arch;
}
arch++; arch++;
if(!(flags & OPUS_CPU_ARM_NEON)) if(!(flags & OPUS_CPU_ARM_NEON_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_MEDIA);
return arch; return arch;
}
arch++; arch++;
celt_assert(arch == OPUS_ARCH_ARM_NEON);
return arch; return arch;
} }

View file

@ -66,6 +66,12 @@
# if defined(OPUS_HAVE_RTCD) # if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void); int opus_select_arch(void);
#define OPUS_ARCH_ARM_V4 (0)
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
# endif # endif
#endif #endif

View file

@ -37,7 +37,66 @@
#include <arm_neon.h> #include <arm_neon.h>
#include "../pitch.h" #include "../pitch.h"
#if !defined(FIXED_POINT) #if defined(FIXED_POINT)
void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
int32x4_t a = vld1q_s32(sum);
/* Load y[0...3] */
/* This requires len>0 to always be valid (which we assert in the C code). */
int16x4_t y0 = vld1_s16(y);
y += 4;
for (j = 0; j + 8 <= len; j += 8)
{
/* Load x[0...7] */
int16x8_t xx = vld1q_s16(x);
int16x4_t x0 = vget_low_s16(xx);
int16x4_t x4 = vget_high_s16(xx);
/* Load y[4...11] */
int16x8_t yy = vld1q_s16(y);
int16x4_t y4 = vget_low_s16(yy);
int16x4_t y8 = vget_high_s16(yy);
int32x4_t a0 = vmlal_lane_s16(a, y0, x0, 0);
int32x4_t a1 = vmlal_lane_s16(a0, y4, x4, 0);
int16x4_t y1 = vext_s16(y0, y4, 1);
int16x4_t y5 = vext_s16(y4, y8, 1);
int32x4_t a2 = vmlal_lane_s16(a1, y1, x0, 1);
int32x4_t a3 = vmlal_lane_s16(a2, y5, x4, 1);
int16x4_t y2 = vext_s16(y0, y4, 2);
int16x4_t y6 = vext_s16(y4, y8, 2);
int32x4_t a4 = vmlal_lane_s16(a3, y2, x0, 2);
int32x4_t a5 = vmlal_lane_s16(a4, y6, x4, 2);
int16x4_t y3 = vext_s16(y0, y4, 3);
int16x4_t y7 = vext_s16(y4, y8, 3);
int32x4_t a6 = vmlal_lane_s16(a5, y3, x0, 3);
int32x4_t a7 = vmlal_lane_s16(a6, y7, x4, 3);
y0 = y8;
a = a7;
x += 8;
y += 8;
}
for (; j < len; j++)
{
int16x4_t x0 = vld1_dup_s16(x); /* load next x */
int32x4_t a0 = vmlal_s16(a, y0, x0);
int16x4_t y4 = vld1_dup_s16(y); /* load next y */
y0 = vext_s16(y0, y4, 1);
a = a0;
x++;
y++;
}
vst1q_s32(sum, a);
}
#else
/* /*
* Function: xcorr_kernel_neon_float * Function: xcorr_kernel_neon_float
* --------------------------------- * ---------------------------------

View file

@ -46,10 +46,53 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch); opus_val32 *xcorr, int len, int max_pitch);
# endif # endif
# if !defined(OPUS_HAVE_RTCD) # if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_ARM_MAY_HAVE_NEON) && !defined(OPUS_ARM_PRESUME_NEON)) || \
(defined(OPUS_ARM_MAY_HAVE_MEDIA) && !defined(OPUS_ARM_PRESUME_MEDIA)) || \
(defined(OPUS_ARM_MAY_HAVE_EDSP) && !defined(OPUS_ARM_PRESUME_EDSP)))
extern opus_val32
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_EDSP) || \
defined(OPUS_ARM_PRESUME_MEDIA) || \
defined(OPUS_ARM_PRESUME_NEON)
# define OVERRIDE_PITCH_XCORR (1) # define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \ # define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch)) ((void)(arch),PRESUME_NEON(celt_pitch_xcorr)(_x, _y, xcorr, len, max_pitch))
# endif
# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void xcorr_kernel_neon_fixed(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y,
opus_val32 sum[4],
int len);
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_XCORR_KERNEL (1)
# define xcorr_kernel(x, y, sum, len, arch) \
((void)arch, xcorr_kernel_neon_fixed(x, y, sum, len))
# endif # endif
#else /* Start !FIXED_POINT */ #else /* Start !FIXED_POINT */
@ -57,12 +100,27 @@ opus_val32 celt_pitch_xcorr_edsp(const opus_val16 *_x, const opus_val16 *_y,
#if defined(OPUS_ARM_MAY_HAVE_NEON_INTR) #if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y, void celt_pitch_xcorr_float_neon(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch); opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OPUS_HAVE_RTCD) || defined(OPUS_ARM_PRESUME_NEON_INTR)
#define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
#endif
#endif #endif
# if defined(OPUS_HAVE_RTCD) && \
(defined(OPUS_ARM_MAY_HAVE_NEON_INTR) && !defined(OPUS_ARM_PRESUME_NEON_INTR))
extern void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# elif defined(OPUS_ARM_PRESUME_NEON_INTR)
# define OVERRIDE_PITCH_XCORR (1)
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((void)(arch),celt_pitch_xcorr_float_neon(_x, _y, xcorr, len, max_pitch))
# endif
#endif /* end !FIXED_POINT */ #endif /* end !FIXED_POINT */
#endif #endif

View file

@ -414,7 +414,7 @@ static void stereo_merge(celt_norm * OPUS_RESTRICT X, celt_norm * OPUS_RESTRICT
/* Compensating for the mid normalization */ /* Compensating for the mid normalization */
xp = MULT16_32_Q15(mid, xp); xp = MULT16_32_Q15(mid, xp);
/* mid and side are in Q15, not Q14 like X and Y */ /* mid and side are in Q15, not Q14 like X and Y */
mid2 = SHR32(mid, 1); mid2 = SHR16(mid, 1);
El = MULT16_16(mid2, mid2) + side - 2*xp; El = MULT16_16(mid2, mid2) + side - 2*xp;
Er = MULT16_16(mid2, mid2) + side + 2*xp; Er = MULT16_16(mid2, mid2) + side + 2*xp;
if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28)) if (Er < QCONST32(6e-4f, 28) || El < QCONST32(6e-4f, 28))
@ -714,7 +714,7 @@ static void compute_theta(struct band_ctx *ctx, struct split_ctx *sctx,
if (qn!=1) if (qn!=1)
{ {
if (encode) if (encode)
itheta = (itheta*qn+8192)>>14; itheta = (itheta*(opus_int32)qn+8192)>>14;
/* Entropy coding of the angle. We use a uniform pdf for the /* Entropy coding of the angle. We use a uniform pdf for the
time split, a step for stereo, and a triangular one for the rest. */ time split, a step for stereo, and a triangular one for the rest. */

View file

@ -209,7 +209,7 @@ void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
#endif #endif
#ifndef OVERRIDE_COMB_FILTER_CONST #ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ # define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12)) ((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif #endif

View file

@ -82,6 +82,7 @@ struct OpusCustomDecoder {
int error; int error;
int last_pitch_index; int last_pitch_index;
int loss_count; int loss_count;
int skip_plc;
int postfilter_period; int postfilter_period;
int postfilter_period_old; int postfilter_period_old;
opus_val16 postfilter_gain; opus_val16 postfilter_gain;
@ -164,8 +165,6 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_init(CELTDecoder *st, const CELTMod
st->signalling = 1; st->signalling = 1;
st->arch = opus_select_arch(); st->arch = opus_select_arch();
st->loss_count = 0;
opus_custom_decoder_ctl(st, OPUS_RESET_STATE); opus_custom_decoder_ctl(st, OPUS_RESET_STATE);
return OPUS_OK; return OPUS_OK;
@ -447,7 +446,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
loss_count = st->loss_count; loss_count = st->loss_count;
start = st->start; start = st->start;
noise_based = loss_count >= 5 || start != 0; noise_based = loss_count >= 5 || start != 0 || st->skip_plc;
if (noise_based) if (noise_based)
{ {
/* Noise-based PLC/CNG */ /* Noise-based PLC/CNG */
@ -832,6 +831,10 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
return frame_size/st->downsample; return frame_size/st->downsample;
} }
/* Check if there are at least two packets received consecutively before
* turning on the pitch-based PLC */
st->skip_plc = st->loss_count != 0;
if (dec == NULL) if (dec == NULL)
{ {
ec_dec_init(&_dec,(unsigned char*)data,len); ec_dec_init(&_dec,(unsigned char*)data,len);
@ -1198,6 +1201,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
((char*)&st->DECODER_RESET_START - (char*)st)); ((char*)&st->DECODER_RESET_START - (char*)st));
for (i=0;i<2*st->mode->nbEBands;i++) for (i=0;i<2*st->mode->nbEBands;i++)
oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT); oldLogE[i]=oldLogE2[i]=-QCONST16(28.f,DB_SHIFT);
st->skip_plc = 1;
} }
break; break;
case OPUS_GET_PITCH_REQUEST: case OPUS_GET_PITCH_REQUEST:

View file

@ -1175,10 +1175,10 @@ static int run_prefilter(CELTEncoder *st, celt_sig *in, celt_sig *prefilter_mem,
if (N>COMBFILTER_MAXPERIOD) if (N>COMBFILTER_MAXPERIOD)
{ {
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD); OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD, pre[c]+N, COMBFILTER_MAXPERIOD);
} else { } else {
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N); OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD, prefilter_mem+c*COMBFILTER_MAXPERIOD+N, COMBFILTER_MAXPERIOD-N);
OPUS_MOVE(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N); OPUS_COPY(prefilter_mem+c*COMBFILTER_MAXPERIOD+COMBFILTER_MAXPERIOD-N, pre[c]+COMBFILTER_MAXPERIOD, N);
} }
} while (++c<CC); } while (++c<CC);
@ -1281,12 +1281,15 @@ static int compute_vbr(const CELTMode *mode, AnalysisInfo *analysis, opus_int32
if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000)) if ((!has_surround_mask||lfe) && (constrained_vbr || bitrate<64000))
{ {
opus_val16 rate_factor; opus_val16 rate_factor = Q15ONE;
if (bitrate < 64000)
{
#ifdef FIXED_POINT #ifdef FIXED_POINT
rate_factor = MAX16(0,(bitrate-32000)); rate_factor = MAX16(0,(bitrate-32000));
#else #else
rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000)); rate_factor = MAX16(0,(1.f/32768)*(bitrate-32000));
#endif #endif
}
if (constrained_vbr) if (constrained_vbr)
rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target); target = base_target + (opus_int32)MULT16_32_Q15(rate_factor, target-base_target);

View file

@ -49,8 +49,7 @@ int p
float *lpc = _lpc; float *lpc = _lpc;
#endif #endif
for (i = 0; i < p; i++) OPUS_CLEAR(lpc, p);
lpc[i] = 0;
if (ac[0] != 0) if (ac[0] != 0)
{ {
for (i = 0; i < p; i++) { for (i = 0; i < p; i++) {

View file

@ -74,7 +74,7 @@ int log2_frac(opus_uint32 val, int frac)
/*Although derived separately, the pulse vector coding scheme is equivalent to /*Although derived separately, the pulse vector coding scheme is equivalent to
a Pyramid Vector Quantizer \cite{Fis86}. a Pyramid Vector Quantizer \cite{Fis86}.
Some additional notes about an early version appear at Some additional notes about an early version appear at
http://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering https://people.xiph.org/~tterribe/notes/cwrs.html, but the codebook ordering
and the definitions of some terms have evolved since that was written. and the definitions of some terms have evolved since that was written.
The conversion from a pulse vector to an integer index (encoding) and back The conversion from a pulse vector to an integer index (encoding) and back

View file

@ -37,16 +37,32 @@
#define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b)) #define MULT16_16SU(a,b) ((opus_val32)(opus_val16)(a)*(opus_val32)(opus_uint16)(b))
/** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */ /** 16x32 multiplication, followed by a 16-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q16(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) #define MULT16_32_Q16(a,b) ADD32(MULT16_16((a),SHR((b),16)), SHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */ /** 16x32 multiplication, followed by a 16-bit shift right (round-to-nearest). Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_P16(a,b) ((opus_val32)PSHR((opus_int64)((opus_val16)(a))*(b),16))
#else
#define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16)) #define MULT16_32_P16(a,b) ADD32(MULT16_16((a),SHR((b),16)), PSHR(MULT16_16SU((a),((b)&0x0000ffff)),16))
#endif
/** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */ /** 16x32 multiplication, followed by a 15-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT16_32_Q15(a,b) ((opus_val32)SHR((opus_int64)((opus_val16)(a))*(b),15))
#else
#define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15)) #define MULT16_32_Q15(a,b) ADD32(SHL(MULT16_16((a),SHR((b),16)),1), SHR(MULT16_16SU((a),((b)&0x0000ffff)),15))
#endif
/** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */ /** 32x32 multiplication, followed by a 31-bit shift right. Results fits in 32 bits */
#if OPUS_FAST_INT64
#define MULT32_32_Q31(a,b) ((opus_val32)SHR((opus_int64)(a)*(opus_int64)(b),31))
#else
#define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15)) #define MULT32_32_Q31(a,b) ADD32(ADD32(SHL(MULT16_16(SHR((a),16),SHR((b),16)),1), SHR(MULT16_16SU(SHR((a),16),((b)&0x0000ffff)),15)), SHR(MULT16_16SU(SHR((b),16),((a)&0x0000ffff)),15))
#endif
/** Compile-time conversion of float constant to 16-bit value */ /** Compile-time conversion of float constant to 16-bit value */
#define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits)))) #define QCONST16(x,bits) ((opus_val16)(.5+(x)*(((opus_val32)1)<<(bits))))

View file

@ -191,7 +191,7 @@ static void kf_bfly3(
kiss_fft_cpx * Fout_beg = Fout; kiss_fft_cpx * Fout_beg = Fout;
#ifdef FIXED_POINT #ifdef FIXED_POINT
epi3.r = -16384; /*epi3.r = -16384;*/ /* Unused */
epi3.i = -28378; epi3.i = -28378;
#else #else
epi3 = st->twiddles[fstride*m]; epi3 = st->twiddles[fstride*m];

View file

@ -164,7 +164,7 @@ opus_val16 celt_cos_norm(opus_val32 x)
{ {
return _celt_cos_pi_2(EXTRACT16(x)); return _celt_cos_pi_2(EXTRACT16(x));
} else { } else {
return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); return NEG16(_celt_cos_pi_2(EXTRACT16(65536-x)));
} }
} else { } else {
if (x&0x0000ffff) if (x&0x0000ffff)

View file

@ -412,6 +412,41 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
RESTORE_STACK; RESTORE_STACK;
} }
#ifdef FIXED_POINT
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
opus_val32 x2y2;
int sx, sy, shift;
opus_val32 g;
opus_val16 den;
if (xy == 0 || xx == 0 || yy == 0)
return 0;
sx = celt_ilog2(xx)-14;
sy = celt_ilog2(yy)-14;
shift = sx + sy;
x2y2 = MULT16_16_Q14(VSHR32(xx, sx), VSHR32(yy, sy));
if (shift & 1) {
if (x2y2 < 32768)
{
x2y2 <<= 1;
shift--;
} else {
x2y2 >>= 1;
shift++;
}
}
den = celt_rsqrt_norm(x2y2);
g = MULT16_32_Q15(den, xy);
g = VSHR32(g, (shift>>1)-1);
return EXTRACT16(MIN32(g, Q15ONE));
}
#else
static opus_val16 compute_pitch_gain(opus_val32 xy, opus_val32 xx, opus_val32 yy)
{
return xy/celt_sqrt(1+xx*yy);
}
#endif
static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2}; static const int second_check[16] = {0, 0, 3, 2, 3, 2, 5, 2, 3, 2, 3, 2, 5, 2, 3, 2};
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch) int N, int *T0_, int prev_period, opus_val16 prev_gain, int arch)
@ -450,18 +485,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
yy = yy_lookup[T0]; yy = yy_lookup[T0];
best_xy = xy; best_xy = xy;
best_yy = yy; best_yy = yy;
#ifdef FIXED_POINT g = g0 = compute_pitch_gain(xy, xx, yy);
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+HALF32(MULT32_32_Q31(xx,yy));
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g = g0 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g = g0 = xy/celt_sqrt(1+xx*yy);
#endif
/* Look for any pitch at T/k */ /* Look for any pitch at T/k */
for (k=2;k<=15;k++) for (k=2;k<=15;k++)
{ {
@ -484,24 +508,13 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
T1b = celt_udiv(2*second_check[k]*T0+k, 2*k); T1b = celt_udiv(2*second_check[k]*T0+k, 2*k);
} }
dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch); dual_inner_prod(x, &x[-T1], &x[-T1b], N, &xy, &xy2, arch);
xy += xy2; xy = HALF32(xy + xy2);
yy = yy_lookup[T1] + yy_lookup[T1b]; yy = HALF32(yy_lookup[T1] + yy_lookup[T1b]);
#ifdef FIXED_POINT g1 = compute_pitch_gain(xy, xx, yy);
{
opus_val32 x2y2;
int sh, t;
x2y2 = 1+MULT32_32_Q31(xx,yy);
sh = celt_ilog2(x2y2)>>1;
t = VSHR32(x2y2, 2*(sh-7));
g1 = VSHR32(MULT16_32_Q15(celt_rsqrt_norm(t), xy),sh+1);
}
#else
g1 = xy/celt_sqrt(1+2.f*xx*1.f*yy);
#endif
if (abs(T1-prev_period)<=1) if (abs(T1-prev_period)<=1)
cont = prev_gain; cont = prev_gain;
else if (abs(T1-prev_period)<=2 && 5*k*k < T0) else if (abs(T1-prev_period)<=2 && 5*k*k < T0)
cont = HALF32(prev_gain); cont = HALF16(prev_gain);
else else
cont = 0; cont = 0;
thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont); thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7f,15),g0)-cont);

View file

@ -187,25 +187,6 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch); opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR) #if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
# if defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_ASM) \
|| (defined(OPUS_ARM_MAY_HAVE_NEON_INTR) \
&& !defined(OPUS_ARM_PRESUME_NEON_INTR)))
extern
# if defined(FIXED_POINT)
opus_val32
# else
void
# endif
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
# define OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# else
#ifdef FIXED_POINT #ifdef FIXED_POINT
opus_val32 opus_val32
#else #else
@ -214,7 +195,6 @@ void
celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y, celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
opus_val32 *xcorr, int len, int max_pitch, int arch); opus_val32 *xcorr, int len, int max_pitch, int arch);
# endif
#endif #endif
#endif #endif

View file

@ -296,7 +296,7 @@ static OPUS_INLINE int interp_bits2pulses(const CELTMode *m, int start, int end,
done = 0; done = 0;
for (j=end;j-->start;) for (j=end;j-->start;)
{ {
int tmp = bits1[j] + (lo*bits2[j]>>ALLOC_STEPS); int tmp = bits1[j] + ((opus_int32)lo*bits2[j]>>ALLOC_STEPS);
if (tmp < thresh[j] && !done) if (tmp < thresh[j] && !done)
{ {
if (tmp >= alloc_floor) if (tmp >= alloc_floor)

View file

@ -271,7 +271,7 @@ unsigned alg_quant(celt_norm *X, int N, int K, int spread, int B, ec_enc *enc
best_id = 0; best_id = 0;
/* The squared magnitude term gets added anyway, so we might as well /* The squared magnitude term gets added anyway, so we might as well
add it outside the loop */ add it outside the loop */
yy = ADD32(yy, 1); yy = ADD16(yy, 1);
j=0; j=0;
do { do {
opus_val16 Rxy, Ryy; opus_val16 Rxy, Ryy;

View file

@ -102,21 +102,21 @@ opus_val32 celt_inner_prod_sse(
#if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT) #if defined(OPUS_X86_PRESUME_SSE4_1) && defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD #define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \ #define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse4_1(x, y, N)) ((void)arch, celt_inner_prod_sse4_1(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) #elif defined(OPUS_X86_PRESUME_SSE2) && defined(FIXED_POINT) && !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#define OVERRIDE_CELT_INNER_PROD #define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \ #define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse2(x, y, N)) ((void)arch, celt_inner_prod_sse2(x, y, N))
#elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT) #elif defined(OPUS_X86_PRESUME_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_CELT_INNER_PROD #define OVERRIDE_CELT_INNER_PROD
#define celt_inner_prod(x, y, N, arch) \ #define celt_inner_prod(x, y, N, arch) \
((void)arch, celt_inner_prod_sse(x, y, N)) ((void)arch, celt_inner_prod_sse(x, y, N))
#elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \ #elif ((defined(OPUS_X86_MAY_HAVE_SSE4_1) || defined(OPUS_X86_MAY_HAVE_SSE2)) && defined(FIXED_POINT)) || \
(defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)) (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT))
extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x, const opus_val16 *x,
@ -138,19 +138,19 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
#undef comb_filter_const #undef comb_filter_const
void dual_inner_prod_sse(const opus_val16 *x, void dual_inner_prod_sse(const opus_val16 *x,
const opus_val16 *y01, const opus_val16 *y01,
const opus_val16 *y02, const opus_val16 *y02,
int N, int N,
opus_val32 *xy1, opus_val32 *xy1,
opus_val32 *xy2); opus_val32 *xy2);
void comb_filter_const_sse(opus_val32 *y, void comb_filter_const_sse(opus_val32 *y,
opus_val32 *x, opus_val32 *x,
int T, int T,
int N, int N,
opus_val16 g10, opus_val16 g10,
opus_val16 g11, opus_val16 g11,
opus_val16 g12); opus_val16 g12);
#if defined(OPUS_X86_PRESUME_SSE) #if defined(OPUS_X86_PRESUME_SSE)
@ -169,7 +169,7 @@ extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
opus_val32 *xy1, opus_val32 *xy1,
opus_val32 *xy2); opus_val32 *xy2);
#define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \ #define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2)) ((*DUAL_INNER_PROD_IMPL[(arch) & OPUS_ARCHMASK])(x, y01, y02, N, xy1, xy2))
extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])( extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
@ -181,7 +181,7 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
opus_val16 g11, opus_val16 g11,
opus_val16 g12); opus_val16 g12);
#define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \ #define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12)) ((*COMB_FILTER_CONST_IMPL[(arch) & OPUS_ARCHMASK])(y, x, T, N, g10, g11, g12))
#define NON_STATIC_COMB_FILTER_CONST_C #define NON_STATIC_COMB_FILTER_CONST_C

View file

@ -72,7 +72,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
#endif #endif
#if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \ #if (defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) (!defined(OPUS_X86_MAY_HAVE_SSE_4_1) && defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2))
opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])( opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x, const opus_val16 *x,

View file

@ -46,7 +46,7 @@
#include <intrin.h> #include <intrin.h>
static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType) static _inline void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
{ {
__cpuid((int*)CPUInfo, InfoType); __cpuid((int*)CPUInfo, InfoType);
} }
#else #else

View file

@ -277,7 +277,7 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
sum = celt_inner_prod(in, in, frame_size+overlap, 0); sum = celt_inner_prod(in, in, frame_size+overlap, 0);
/* This should filter out both NaNs and ridiculous signals that could /* This should filter out both NaNs and ridiculous signals that could
cause NaNs further down. */ cause NaNs further down. */
if (!(sum < 1e9f) || celt_isnan(sum)) if (!(sum < 1e18f) || celt_isnan(sum))
{ {
OPUS_CLEAR(in, frame_size+overlap); OPUS_CLEAR(in, frame_size+overlap);
preemph_mem[c] = 0; preemph_mem[c] = 0;

View file

@ -34,9 +34,8 @@ POSSIBILITY OF SUCH DAMAGE.
/* Generates excitation for CNG LPC synthesis */ /* Generates excitation for CNG LPC synthesis */
static OPUS_INLINE void silk_CNG_exc( static OPUS_INLINE void silk_CNG_exc(
opus_int32 exc_Q10[], /* O CNG excitation signal Q10 */ opus_int32 exc_Q14[], /* O CNG excitation signal Q10 */
opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */ opus_int32 exc_buf_Q14[], /* I Random samples buffer Q10 */
opus_int32 Gain_Q16, /* I Gain to apply */
opus_int length, /* I Length */ opus_int length, /* I Length */
opus_int32 *rand_seed /* I/O Seed to random index generator */ opus_int32 *rand_seed /* I/O Seed to random index generator */
) )
@ -55,7 +54,7 @@ static OPUS_INLINE void silk_CNG_exc(
idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask ); idx = (opus_int)( silk_RSHIFT( seed, 24 ) & exc_mask );
silk_assert( idx >= 0 ); silk_assert( idx >= 0 );
silk_assert( idx <= CNG_BUF_MASK_MAX ); silk_assert( idx <= CNG_BUF_MASK_MAX );
exc_Q10[ i ] = (opus_int16)silk_SAT16( silk_SMULWW( exc_buf_Q14[ idx ], Gain_Q16 >> 4 ) ); exc_Q14[ i ] = exc_buf_Q14[ idx ];
} }
*rand_seed = seed; *rand_seed = seed;
} }
@ -85,7 +84,7 @@ void silk_CNG(
) )
{ {
opus_int i, subfr; opus_int i, subfr;
opus_int32 sum_Q6, max_Gain_Q16, gain_Q16; opus_int32 LPC_pred_Q10, max_Gain_Q16, gain_Q16, gain_Q10;
opus_int16 A_Q12[ MAX_LPC_ORDER ]; opus_int16 A_Q12[ MAX_LPC_ORDER ];
silk_CNG_struct *psCNG = &psDec->sCNG; silk_CNG_struct *psCNG = &psDec->sCNG;
SAVE_STACK; SAVE_STACK;
@ -124,8 +123,8 @@ void silk_CNG(
/* Add CNG when packet is lost or during DTX */ /* Add CNG when packet is lost or during DTX */
if( psDec->lossCnt ) { if( psDec->lossCnt ) {
VARDECL( opus_int32, CNG_sig_Q10 ); VARDECL( opus_int32, CNG_sig_Q14 );
ALLOC( CNG_sig_Q10, length + MAX_LPC_ORDER, opus_int32 ); ALLOC( CNG_sig_Q14, length + MAX_LPC_ORDER, opus_int32 );
/* Generate CNG excitation */ /* Generate CNG excitation */
gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] ); gain_Q16 = silk_SMULWW( psDec->sPLC.randScale_Q14, psDec->sPLC.prevGain_Q16[1] );
@ -138,42 +137,46 @@ void silk_CNG(
gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 ); gain_Q16 = silk_SUB_LSHIFT32(silk_SMULWW( psCNG->CNG_smth_Gain_Q16, psCNG->CNG_smth_Gain_Q16 ), gain_Q16, 5 );
gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 ); gain_Q16 = silk_LSHIFT32( silk_SQRT_APPROX( gain_Q16 ), 8 );
} }
silk_CNG_exc( CNG_sig_Q10 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, gain_Q16, length, &psCNG->rand_seed ); gain_Q10 = silk_RSHIFT( gain_Q16, 6 );
silk_CNG_exc( CNG_sig_Q14 + MAX_LPC_ORDER, psCNG->CNG_exc_buf_Q14, length, &psCNG->rand_seed );
/* Convert CNG NLSF to filter representation */ /* Convert CNG NLSF to filter representation */
silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order ); silk_NLSF2A( A_Q12, psCNG->CNG_smth_NLSF_Q15, psDec->LPC_order );
/* Generate CNG signal, by synthesis filtering */ /* Generate CNG signal, by synthesis filtering */
silk_memcpy( CNG_sig_Q10, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) ); silk_memcpy( CNG_sig_Q14, psCNG->CNG_synth_state, MAX_LPC_ORDER * sizeof( opus_int32 ) );
for( i = 0; i < length; i++ ) { for( i = 0; i < length; i++ ) {
silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 ); silk_assert( psDec->LPC_order == 10 || psDec->LPC_order == 16 );
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */ /* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
sum_Q6 = silk_RSHIFT( psDec->LPC_order, 1 ); LPC_pred_Q10 = silk_RSHIFT( psDec->LPC_order, 1 );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 1 ], A_Q12[ 0 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 2 ], A_Q12[ 1 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 3 ], A_Q12[ 2 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 4 ], A_Q12[ 3 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 5 ], A_Q12[ 4 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 6 ], A_Q12[ 5 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 7 ], A_Q12[ 6 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 8 ], A_Q12[ 7 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 9 ], A_Q12[ 8 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 10 ], A_Q12[ 9 ] );
if( psDec->LPC_order == 16 ) { if( psDec->LPC_order == 16 ) {
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 11 ], A_Q12[ 10 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 12 ], A_Q12[ 11 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 13 ], A_Q12[ 12 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 14 ], A_Q12[ 13 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 15 ], A_Q12[ 14 ] );
sum_Q6 = silk_SMLAWB( sum_Q6, CNG_sig_Q10[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] ); LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, CNG_sig_Q14[ MAX_LPC_ORDER + i - 16 ], A_Q12[ 15 ] );
} }
/* Update states */ /* Update states */
CNG_sig_Q10[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q10[ MAX_LPC_ORDER + i ], sum_Q6, 4 ); CNG_sig_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT( CNG_sig_Q14[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 );
frame[ i ] = silk_ADD_SAT16( frame[ i ], silk_RSHIFT_ROUND( CNG_sig_Q10[ MAX_LPC_ORDER + i ], 10 ) ); /* Scale with Gain and add to input signal */
frame[ i ] = (opus_int16)silk_ADD_SAT16( frame[ i ], silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( CNG_sig_Q14[ MAX_LPC_ORDER + i ], gain_Q10 ), 8 ) ) );
} }
silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q10[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) ); silk_memcpy( psCNG->CNG_synth_state, &CNG_sig_Q14[ length ], MAX_LPC_ORDER * sizeof( opus_int32 ) );
} else { } else {
silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) ); silk_memset( psCNG->CNG_synth_state, 0, psDec->LPC_order * sizeof( opus_int32 ) );
} }

View file

@ -46,8 +46,9 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
) )
{ {
opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10; opus_int i, j, nStates, ind_tmp, ind_min_max, ind_max_min, in_Q10, res_Q10;
opus_int pred_Q10, diff_Q10, out0_Q10, out1_Q10, rate0_Q5, rate1_Q5; opus_int pred_Q10, diff_Q10, rate0_Q5, rate1_Q5;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25, pred_coef_Q16; opus_int16 out0_Q10, out1_Q10;
opus_int32 RD_tmp_Q25, min_Q25, min_max_Q25, max_min_Q25;
opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ]; opus_int ind_sort[ NLSF_QUANT_DEL_DEC_STATES ];
opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ]; opus_int8 ind[ NLSF_QUANT_DEL_DEC_STATES ][ MAX_LPC_ORDER ];
opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ]; opus_int16 prev_out_Q10[ 2 * NLSF_QUANT_DEL_DEC_STATES ];
@ -74,8 +75,8 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); out0_Q10 = silk_ADD16( out0_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) ); out1_Q10 = silk_ADD16( out1_Q10, SILK_FIX_CONST( NLSF_QUANT_LEVEL_ADJ, 10 ) );
} }
out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out0_Q10, quant_step_size_Q16 ); out0_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out0_Q10, quant_step_size_Q16 ), 16 );
out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_SMULWB( (opus_int32)out1_Q10, quant_step_size_Q16 ); out1_Q10_table[ i + NLSF_QUANT_MAX_AMPLITUDE_EXT ] = silk_RSHIFT( silk_SMULBB( out1_Q10, quant_step_size_Q16 ), 16 );
} }
silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */ silk_assert( (NLSF_QUANT_DEL_DEC_STATES & (NLSF_QUANT_DEL_DEC_STATES-1)) == 0 ); /* must be power of two */
@ -85,12 +86,11 @@ opus_int32 silk_NLSF_del_dec_quant( /* O Returns
prev_out_Q10[ 0 ] = 0; prev_out_Q10[ 0 ] = 0;
for( i = order - 1; ; i-- ) { for( i = order - 1; ; i-- ) {
rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ]; rates_Q5 = &ec_rates_Q5[ ec_ix[ i ] ];
pred_coef_Q16 = silk_LSHIFT( (opus_int32)pred_coef_Q8[ i ], 8 );
in_Q10 = x_Q10[ i ]; in_Q10 = x_Q10[ i ];
for( j = 0; j < nStates; j++ ) { for( j = 0; j < nStates; j++ ) {
pred_Q10 = silk_SMULWB( pred_coef_Q16, prev_out_Q10[ j ] ); pred_Q10 = silk_RSHIFT( silk_SMULBB( (opus_int16)pred_coef_Q8[ i ], prev_out_Q10[ j ] ), 8 );
res_Q10 = silk_SUB16( in_Q10, pred_Q10 ); res_Q10 = silk_SUB16( in_Q10, pred_Q10 );
ind_tmp = silk_SMULWB( (opus_int32)inv_quant_step_size_Q6, res_Q10 ); ind_tmp = silk_RSHIFT( silk_SMULBB( inv_quant_step_size_Q6, res_Q10 ), 16 );
ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 ); ind_tmp = silk_LIMIT( ind_tmp, -NLSF_QUANT_MAX_AMPLITUDE_EXT, NLSF_QUANT_MAX_AMPLITUDE_EXT-1 );
ind[ j ][ i ] = (opus_int8)ind_tmp; ind[ j ][ i ] = (opus_int8)ind_tmp;

View file

@ -46,7 +46,7 @@ opus_int32 silk_NLSF_encode( /* O Returns
) )
{ {
opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7; opus_int i, s, ind1, bestIndex, prob_Q8, bits_q7;
opus_int32 W_tmp_Q9; opus_int32 W_tmp_Q9, ret;
VARDECL( opus_int32, err_Q26 ); VARDECL( opus_int32, err_Q26 );
VARDECL( opus_int32, RD_Q25 ); VARDECL( opus_int32, RD_Q25 );
VARDECL( opus_int, tempIndices1 ); VARDECL( opus_int, tempIndices1 );
@ -131,6 +131,7 @@ opus_int32 silk_NLSF_encode( /* O Returns
/* Decode */ /* Decode */
silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB ); silk_NLSF_decode( pNLSF_Q15, NLSFIndices, psNLSF_CB );
ret = RD_Q25[ 0 ];
RESTORE_STACK; RESTORE_STACK;
return RD_Q25[ 0 ]; return ret;
} }

View file

@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h" #include "main.h"
#include "stack_alloc.h" #include "stack_alloc.h"
#include "NSQ.h"
static OPUS_INLINE void silk_nsq_scale_states( static OPUS_INLINE void silk_nsq_scale_states(
const silk_encoder_state *psEncC, /* I Encoder State */ const silk_encoder_state *psEncC, /* I Encoder State */
@ -66,7 +68,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */ opus_int offset_Q10, /* I */
opus_int length, /* I Input length */ opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */ opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
); );
#endif #endif
@ -155,7 +158,7 @@ void silk_NSQ_c
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
x_Q3 += psEncC->subfr_length; x_Q3 += psEncC->subfr_length;
pulses += psEncC->subfr_length; pulses += psEncC->subfr_length;
@ -198,15 +201,19 @@ void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */ opus_int offset_Q10, /* I */
opus_int length, /* I Input length */ opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */ opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
) )
{ {
opus_int i, j; opus_int i;
opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13; opus_int32 LTP_pred_Q13, LPC_pred_Q10, n_AR_Q12, n_LTP_Q13;
opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20; opus_int32 n_LF_Q12, r_Q10, rr_Q10, q1_Q0, q1_Q10, q2_Q10, rd1_Q20, rd2_Q20;
opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr; opus_int32 *psLPC_Q14, *shp_lag_ptr, *pred_lag_ptr;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ]; shp_lag_ptr = &NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - lag + HARM_SHAPE_FIR_TAPS / 2 ];
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
@ -215,32 +222,16 @@ void silk_noise_shape_quantizer(
/* Set up short term AR state */ /* Set up short term AR state */
psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ]; psLPC_Q14 = &NSQ->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 ];
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) { for( i = 0; i < length; i++ ) {
/* Generate dither */ /* Generate dither */
NSQ->rand_seed = silk_RAND( NSQ->rand_seed ); NSQ->rand_seed = silk_RAND( NSQ->rand_seed );
/* Short-term prediction */ /* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); LPC_pred_Q10 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q10 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q10 = silk_SMLAWB( LPC_pred_Q10, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
/* Long-term prediction */ /* Long-term prediction */
if( signalType == TYPE_VOICED ) { if( signalType == TYPE_VOICED ) {
@ -259,23 +250,8 @@ void silk_noise_shape_quantizer(
/* Noise shape feedback */ /* Noise shape feedback */
silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */ silk_assert( ( shapingLPCOrder & 1 ) == 0 ); /* check that order is even */
tmp2 = psLPC_Q14[ 0 ]; n_AR_Q12 = silk_NSQ_noise_shape_feedback_loop(psLPC_Q14, NSQ->sAR2_Q14, AR_shp_Q13, shapingLPCOrder, arch);
tmp1 = NSQ->sAR2_Q14[ 0 ];
NSQ->sAR2_Q14[ 0 ] = tmp2;
n_AR_Q12 = silk_RSHIFT( shapingLPCOrder, 1 );
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ 0 ] );
for( j = 2; j < shapingLPCOrder; j += 2 ) {
tmp2 = NSQ->sAR2_Q14[ j - 1 ];
NSQ->sAR2_Q14[ j - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ j - 1 ] );
tmp1 = NSQ->sAR2_Q14[ j + 0 ];
NSQ->sAR2_Q14[ j + 0 ] = tmp2;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp2, AR_shp_Q13[ j ] );
}
NSQ->sAR2_Q14[ shapingLPCOrder - 1 ] = tmp1;
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, tmp1, AR_shp_Q13[ shapingLPCOrder - 1 ] );
n_AR_Q12 = silk_LSHIFT32( n_AR_Q12, 1 ); /* Q11 -> Q12 */
n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 ); n_AR_Q12 = silk_SMLAWB( n_AR_Q12, NSQ->sLF_AR_shp_Q14, Tilt_Q14 );
n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 ); n_LF_Q12 = silk_SMULWB( NSQ->sLTP_shp_Q14[ NSQ->sLTP_shp_buf_idx - 1 ], LF_shp_Q14 );

View file

@ -31,6 +31,8 @@ POSSIBILITY OF SUCH DAMAGE.
#include "main.h" #include "main.h"
#include "stack_alloc.h" #include "stack_alloc.h"
#include "NSQ.h"
typedef struct { typedef struct {
opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ]; opus_int32 sLPC_Q14[ MAX_SUB_FRAME_LENGTH + NSQ_LPC_BUF_LENGTH ];
@ -106,7 +108,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */ opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */ opus_int decisionDelay, /* I */
int arch /* I */
); );
void silk_NSQ_del_dec_c( void silk_NSQ_del_dec_c(
@ -260,7 +263,7 @@ void silk_NSQ_del_dec_c(
silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, silk_noise_shape_quantizer_del_dec( NSQ, psDelDec, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15,
delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], delayedGain_Q10, A_Q12, B_Q14, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ],
Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder, Gains_Q16[ k ], Lambda_Q10, offset_Q10, psEncC->subfr_length, subfr++, psEncC->shapingLPCOrder,
psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay ); psEncC->predictLPCOrder, psEncC->warping_Q16, psEncC->nStatesDelayedDecision, &smpl_buf_idx, decisionDelay, psEncC->arch );
x_Q3 += psEncC->subfr_length; x_Q3 += psEncC->subfr_length;
pulses += psEncC->subfr_length; pulses += psEncC->subfr_length;
@ -333,7 +336,8 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */ opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */ opus_int decisionDelay, /* I */
int arch /* I */
) )
{ {
opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@ -343,6 +347,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10; opus_int32 q1_Q0, q1_Q10, q2_Q10, exc_Q14, LPC_exc_Q14, xq_Q14, Gain_Q10;
opus_int32 tmp1, tmp2, sLF_AR_shp_Q14; opus_int32 tmp1, tmp2, sLF_AR_shp_Q14;
opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14; opus_int32 *pred_lag_ptr, *shp_lag_ptr, *psLPC_Q14;
#ifdef silk_short_prediction_create_arch_coef
opus_int32 a_Q12_arch[MAX_LPC_ORDER];
#endif
VARDECL( NSQ_sample_pair, psSampleState ); VARDECL( NSQ_sample_pair, psSampleState );
NSQ_del_dec_struct *psDD; NSQ_del_dec_struct *psDD;
NSQ_sample_struct *psSS; NSQ_sample_struct *psSS;
@ -355,6 +363,10 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ]; pred_lag_ptr = &sLTP_Q15[ NSQ->sLTP_buf_idx - lag + LTP_ORDER / 2 ];
Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 ); Gain_Q10 = silk_RSHIFT( Gain_Q16, 6 );
#ifdef silk_short_prediction_create_arch_coef
silk_short_prediction_create_arch_coef(a_Q12_arch, a_Q12, predictLPCOrder);
#endif
for( i = 0; i < length; i++ ) { for( i = 0; i < length; i++ ) {
/* Perform common calculations used in all states */ /* Perform common calculations used in all states */
@ -398,27 +410,7 @@ static OPUS_INLINE void silk_noise_shape_quantizer_del_dec(
/* Pointer used in short term prediction and shaping */ /* Pointer used in short term prediction and shaping */
psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ]; psLPC_Q14 = &psDD->sLPC_Q14[ NSQ_LPC_BUF_LENGTH - 1 + i ];
/* Short-term prediction */ /* Short-term prediction */
silk_assert( predictLPCOrder == 10 || predictLPCOrder == 16 ); LPC_pred_Q14 = silk_noise_shape_quantizer_short_prediction(psLPC_Q14, a_Q12, a_Q12_arch, predictLPCOrder, arch);
/* Avoids introducing a bias because silk_SMLAWB() always rounds to -inf */
LPC_pred_Q14 = silk_RSHIFT( predictLPCOrder, 1 );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ 0 ], a_Q12[ 0 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -1 ], a_Q12[ 1 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -2 ], a_Q12[ 2 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -3 ], a_Q12[ 3 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -4 ], a_Q12[ 4 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -5 ], a_Q12[ 5 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -6 ], a_Q12[ 6 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -7 ], a_Q12[ 7 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -8 ], a_Q12[ 8 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -9 ], a_Q12[ 9 ] );
if( predictLPCOrder == 16 ) {
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -10 ], a_Q12[ 10 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -11 ], a_Q12[ 11 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -12 ], a_Q12[ 12 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -13 ], a_Q12[ 13 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -14 ], a_Q12[ 14 ] );
LPC_pred_Q14 = silk_SMLAWB( LPC_pred_Q14, psLPC_Q14[ -15 ], a_Q12[ 15 ] );
}
LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */ LPC_pred_Q14 = silk_LSHIFT( LPC_pred_Q14, 4 ); /* Q10 -> Q14 */
/* Noise shape feedback */ /* Noise shape feedback */

View file

@ -365,7 +365,8 @@ static OPUS_INLINE void silk_PLC_conceal(
} }
/* Add prediction to LPC excitation */ /* Add prediction to LPC excitation */
sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], LPC_pred_Q10, 4 ); sLPC_Q14_ptr[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ],
silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ));
/* Scale with Gain */ /* Scale with Gain */
frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) ); frame[ i ] = (opus_int16)silk_SAT16( silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14_ptr[ MAX_LPC_ORDER + i ], prevGain_Q10[ 1 ] ), 8 ) ) );

View file

@ -219,7 +219,7 @@ void silk_decode_core(
} }
/* Add prediction to LPC excitation */ /* Add prediction to LPC excitation */
sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_LSHIFT32( pres_Q14[ i ], LPC_pred_Q10, 4 ); sLPC_Q14[ MAX_LPC_ORDER + i ] = silk_ADD_SAT32( pres_Q14[ i ], silk_LSHIFT_SAT32( LPC_pred_Q10, 4 ) );
/* Scale with gain */ /* Scale with gain */
pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) ); pxq[ i ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( silk_SMULWW( sLPC_Q14[ MAX_LPC_ORDER + i ], Gain_Q10 ), 8 ) );

View file

@ -150,8 +150,11 @@ void silk_burg_modified_c(
C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */
C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */
Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */
tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ /* We sometimes have get overflows in the multiplications (even beyond +/- 2^32),
tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ but they cancel each other and the real result seems to always fit in a 32-bit
signed integer. This was determined experimentally, not theoretically (unfortunately). */
tmp1 = silk_MLA_ovflw( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */
tmp2 = silk_MLA_ovflw( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */
} }
tmp1 = -tmp1; /* Q17 */ tmp1 = -tmp1; /* Q17 */
tmp2 = -tmp2; /* Q17 */ tmp2 = -tmp2; /* Q17 */
@ -200,12 +203,14 @@ void silk_burg_modified_c(
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
/* Newton-Raphson iteration */ if( rc_Q31 > 0 ) {
rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ /* Newton-Raphson iteration */
rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
if( num < 0 ) { rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
/* Ensure adjusted reflection coefficients has the original sign */ if( num < 0 ) {
rc_Q31 = -rc_Q31; /* Ensure adjusted reflection coefficients has the original sign */
rc_Q31 = -rc_Q31;
}
} }
invGain_Q30 = minInvGain_Q30; invGain_Q30 = minInvGain_Q30;
reached_max_gain = 1; reached_max_gain = 1;

View file

@ -300,12 +300,14 @@ void silk_burg_modified_sse4_1(
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */
rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */
/* Newton-Raphson iteration */ if( rc_Q31 > 0 ) {
rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ /* Newton-Raphson iteration */
rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */
if( num < 0 ) { rc_Q31 = silk_LSHIFT32( rc_Q31, 16 ); /* Q31 */
/* Ensure adjusted reflection coefficients has the original sign */ if( num < 0 ) {
rc_Q31 = -rc_Q31; /* Ensure adjusted reflection coefficients has the original sign */
rc_Q31 = -rc_Q31;
}
} }
invGain_Q30 = minInvGain_Q30; invGain_Q30 = minInvGain_Q30;
reached_max_gain = 1; reached_max_gain = 1;

View file

@ -34,6 +34,7 @@ POSSIBILITY OF SUCH DAMAGE.
#include "opus_types.h" #include "opus_types.h"
#include "opus_defines.h" #include "opus_defines.h"
#include "arch.h"
#if OPUS_GNUC_PREREQ(3, 0) #if OPUS_GNUC_PREREQ(3, 0)
#define opus_likely(x) (__builtin_expect(!!(x), 1)) #define opus_likely(x) (__builtin_expect(!!(x), 1))
@ -43,31 +44,32 @@ POSSIBILITY OF SUCH DAMAGE.
#define opus_unlikely(x) (!!(x)) #define opus_unlikely(x) (!!(x))
#endif #endif
/* Set this if opus_int64 is a native type of the CPU. */
#define OPUS_FAST_INT64 (defined(__x86_64__) || defined(__LP64__) || defined(_WIN64))
/* This is an OPUS_INLINE header file for general platform. */ /* This is an OPUS_INLINE header file for general platform. */
/* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */ /* (a32 * (opus_int32)((opus_int16)(b32))) >> 16 output have to be 32bit int */
#if OPUS_FAST_INT64 #if OPUS_FAST_INT64
#define silk_SMULWB(a32, b32) (((a32) * (opus_int64)((opus_int16)(b32))) >> 16) #define silk_SMULWB(a32, b32) ((opus_int32)(((a32) * (opus_int64)((opus_int16)(b32))) >> 16))
#else #else
#define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16)) #define silk_SMULWB(a32, b32) ((((a32) >> 16) * (opus_int32)((opus_int16)(b32))) + ((((a32) & 0x0000FFFF) * (opus_int32)((opus_int16)(b32))) >> 16))
#endif #endif
/* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */ /* a32 + (b32 * (opus_int32)((opus_int16)(c32))) >> 16 output have to be 32bit int */
#if OPUS_FAST_INT64 #if OPUS_FAST_INT64
#define silk_SMLAWB(a32, b32, c32) ((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)) #define silk_SMLAWB(a32, b32, c32) ((opus_int32)((a32) + (((b32) * (opus_int64)((opus_int16)(c32))) >> 16)))
#else #else
#define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16))) #define silk_SMLAWB(a32, b32, c32) ((a32) + ((((b32) >> 16) * (opus_int32)((opus_int16)(c32))) + ((((b32) & 0x0000FFFF) * (opus_int32)((opus_int16)(c32))) >> 16)))
#endif #endif
/* (a32 * (b32 >> 16)) >> 16 */ /* (a32 * (b32 >> 16)) >> 16 */
#if OPUS_FAST_INT64
#define silk_SMULWT(a32, b32) ((opus_int32)(((a32) * (opus_int64)((b32) >> 16)) >> 16))
#else
#define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16)) #define silk_SMULWT(a32, b32) (((a32) >> 16) * ((b32) >> 16) + ((((a32) & 0x0000FFFF) * ((b32) >> 16)) >> 16))
#endif
/* a32 + (b32 * (c32 >> 16)) >> 16 */ /* a32 + (b32 * (c32 >> 16)) >> 16 */
#if OPUS_FAST_INT64 #if OPUS_FAST_INT64
#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)) #define silk_SMLAWT(a32, b32, c32) ((opus_int32)((a32) + (((b32) * ((opus_int64)(c32) >> 16)) >> 16)))
#else #else
#define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16)) #define silk_SMLAWT(a32, b32, c32) ((a32) + (((b32) >> 16) * ((c32) >> 16)) + ((((b32) & 0x0000FFFF) * ((c32) >> 16)) >> 16))
#endif #endif
@ -89,14 +91,14 @@ POSSIBILITY OF SUCH DAMAGE.
/* (a32 * b32) >> 16 */ /* (a32 * b32) >> 16 */
#if OPUS_FAST_INT64 #if OPUS_FAST_INT64
#define silk_SMULWW(a32, b32) (((opus_int64)(a32) * (b32)) >> 16) #define silk_SMULWW(a32, b32) ((opus_int32)(((opus_int64)(a32) * (b32)) >> 16))
#else #else
#define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16)) #define silk_SMULWW(a32, b32) silk_MLA(silk_SMULWB((a32), (b32)), (a32), silk_RSHIFT_ROUND((b32), 16))
#endif #endif
/* a32 + ((b32 * c32) >> 16) */ /* a32 + ((b32 * c32) >> 16) */
#if OPUS_FAST_INT64 #if OPUS_FAST_INT64
#define silk_SMLAWW(a32, b32, c32) ((a32) + (((opus_int64)(b32) * (c32)) >> 16)) #define silk_SMLAWW(a32, b32, c32) ((opus_int32)((a32) + (((opus_int64)(b32) * (c32)) >> 16)))
#else #else
#define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16)) #define silk_SMLAWW(a32, b32, c32) silk_MLA(silk_SMLAWB((a32), (b32), (c32)), (b32), silk_RSHIFT_ROUND((c32), 16))
#endif #endif
@ -149,5 +151,9 @@ static OPUS_INLINE opus_int32 silk_CLZ32(opus_int32 in32)
#include "arm/macros_armv5e.h" #include "arm/macros_armv5e.h"
#endif #endif
#ifdef OPUS_ARM_PRESUME_AARCH64_NEON_INTR
#include "arm/macros_arm64.h"
#endif
#endif /* SILK_MACROS_H */ #endif /* SILK_MACROS_H */

View file

@ -62,7 +62,8 @@ static inline void silk_noise_shape_quantizer_del_dec(
opus_int warping_Q16, /* I */ opus_int warping_Q16, /* I */
opus_int nStatesDelayedDecision, /* I Number of states in decision tree */ opus_int nStatesDelayedDecision, /* I Number of states in decision tree */
opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */ opus_int *smpl_buf_idx, /* I Index to newest samples in buffers */
opus_int decisionDelay /* I */ opus_int decisionDelay, /* I */
int arch /* I */
) )
{ {
opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx; opus_int i, j, k, Winner_ind, RDmin_ind, RDmax_ind, last_smple_idx;
@ -82,6 +83,9 @@ static inline void silk_noise_shape_quantizer_del_dec(
opus_int32 cur, prev, next; opus_int32 cur, prev, next;
/*Unused.*/
(void)arch;
//Intialize b_Q14 variables //Intialize b_Q14 variables
b_Q14_0 = b_Q14[ 0 ]; b_Q14_0 = b_Q14[ 0 ];
b_Q14_1 = b_Q14[ 1 ]; b_Q14_1 = b_Q14[ 1 ];

View file

@ -41,7 +41,7 @@ void silk_process_NLSFs(
{ {
opus_int i, doInterpolate; opus_int i, doInterpolate;
opus_int NLSF_mu_Q20; opus_int NLSF_mu_Q20;
opus_int32 i_sqr_Q15; opus_int16 i_sqr_Q15;
opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ]; opus_int16 pNLSF0_temp_Q15[ MAX_LPC_ORDER ];
opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ]; opus_int16 pNLSFW_QW[ MAX_LPC_ORDER ];
opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ]; opus_int16 pNLSFW0_temp_QW[ MAX_LPC_ORDER ];
@ -79,7 +79,8 @@ void silk_process_NLSFs(
/* Update NLSF weights with contribution from first half */ /* Update NLSF weights with contribution from first half */
i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 ); i_sqr_Q15 = silk_LSHIFT( silk_SMULBB( psEncC->indices.NLSFInterpCoef_Q2, psEncC->indices.NLSFInterpCoef_Q2 ), 11 );
for( i = 0; i < psEncC->predictLPCOrder; i++ ) { for( i = 0; i < psEncC->predictLPCOrder; i++ ) {
pNLSFW_QW[ i ] = silk_SMLAWB( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), (opus_int32)pNLSFW0_temp_QW[ i ], i_sqr_Q15 ); pNLSFW_QW[ i ] = silk_ADD16( silk_RSHIFT( pNLSFW_QW[ i ], 1 ), silk_RSHIFT(
silk_SMULBB( pNLSFW0_temp_QW[ i ], i_sqr_Q15 ), 16) );
silk_assert( pNLSFW_QW[ i ] >= 1 ); silk_assert( pNLSFW_QW[ i ] >= 1 );
} }
} }
@ -100,6 +101,7 @@ void silk_process_NLSFs(
} else { } else {
/* Copy LPC coefficients for first half from second half */ /* Copy LPC coefficients for first half from second half */
silk_assert( psEncC->predictLPCOrder <= MAX_LPC_ORDER );
silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) ); silk_memcpy( PredCoef_Q12[ 0 ], PredCoef_Q12[ 1 ], psEncC->predictLPCOrder * sizeof( opus_int16 ) );
} }
} }

View file

@ -33,7 +33,7 @@ POSSIBILITY OF SUCH DAMAGE.
/* Best case: O(n) for an already sorted array */ /* Best case: O(n) for an already sorted array */
/* Worst case: O(n^2) for an inversely sorted array */ /* Worst case: O(n^2) for an inversely sorted array */
/* */ /* */
/* Shell short: http://en.wikipedia.org/wiki/Shell_sort */ /* Shell short: https://en.wikipedia.org/wiki/Shell_sort */
#include "SigProc_FIX.h" #include "SigProc_FIX.h"

View file

@ -77,7 +77,7 @@ void silk_stereo_LR_to_MS(
ALLOC( LP_mid, frame_length, opus_int16 ); ALLOC( LP_mid, frame_length, opus_int16 );
ALLOC( HP_mid, frame_length, opus_int16 ); ALLOC( HP_mid, frame_length, opus_int16 );
for( n = 0; n < frame_length; n++ ) { for( n = 0; n < frame_length; n++ ) {
sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 ); sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 2 );
LP_mid[ n ] = sum; LP_mid[ n ] = sum;
HP_mid[ n ] = mid[ n + 1 ] - sum; HP_mid[ n ] = mid[ n + 1 ] - sum;
} }
@ -86,7 +86,7 @@ void silk_stereo_LR_to_MS(
ALLOC( LP_side, frame_length, opus_int16 ); ALLOC( LP_side, frame_length, opus_int16 );
ALLOC( HP_side, frame_length, opus_int16 ); ALLOC( HP_side, frame_length, opus_int16 );
for( n = 0; n < frame_length; n++ ) { for( n = 0; n < frame_length; n++ ) {
sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + side[ n + 2 ], side[ n + 1 ], 1 ), 2 ); sum = silk_RSHIFT_ROUND( silk_ADD_LSHIFT( side[ n ] + (opus_int32)side[ n + 2 ], side[ n + 1 ], 1 ), 2 );
LP_side[ n ] = sum; LP_side[ n ] = sum;
HP_side[ n ] = side[ n + 1 ] - sum; HP_side[ n ] = side[ n + 1 ] - sum;
} }
@ -207,7 +207,7 @@ void silk_stereo_LR_to_MS(
pred0_Q13 += delta0_Q13; pred0_Q13 += delta0_Q13;
pred1_Q13 += delta1_Q13; pred1_Q13 += delta1_Q13;
w_Q24 += deltaw_Q24; w_Q24 += deltaw_Q24;
sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );
@ -217,7 +217,7 @@ void silk_stereo_LR_to_MS(
pred1_Q13 = -pred_Q13[ 1 ]; pred1_Q13 = -pred_Q13[ 1 ];
w_Q24 = silk_LSHIFT( width_Q14, 10 ); w_Q24 = silk_LSHIFT( width_Q14, 10 );
for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) { for( n = STEREO_INTERP_LEN_MS * fs_kHz; n < frame_length; n++ ) {
sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */ sum = silk_LSHIFT( silk_ADD_LSHIFT( mid[ n ] + (opus_int32)mid[ n + 2 ], mid[ n + 1 ], 1 ), 9 ); /* Q11 */
sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */ sum = silk_SMLAWB( silk_SMULWB( w_Q24, side[ n + 1 ] ), sum, pred0_Q13 ); /* Q8 */
sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */ sum = silk_SMLAWB( sum, silk_LSHIFT( (opus_int32)mid[ n + 1 ], 11 ), pred1_Q13 ); /* Q8 */
x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) ); x2[ n - 1 ] = (opus_int16)silk_SAT16( silk_RSHIFT_ROUND( sum, 8 ) );

View file

@ -221,7 +221,7 @@ void silk_NSQ_sse4_1(
{ {
silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14, silk_noise_shape_quantizer( NSQ, psIndices->signalType, x_sc_Q10, pulses, pxq, sLTP_Q15, A_Q12, B_Q14,
AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10, AR_shp_Q13, lag, HarmShapeFIRPacked_Q14, Tilt_Q14[ k ], LF_shp_Q14[ k ], Gains_Q16[ k ], Lambda_Q10,
offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder ); offset_Q10, psEncC->subfr_length, psEncC->shapingLPCOrder, psEncC->predictLPCOrder, psEncC->arch );
} }
x_Q3 += psEncC->subfr_length; x_Q3 += psEncC->subfr_length;

View file

@ -207,7 +207,8 @@ void silk_noise_shape_quantizer(
opus_int offset_Q10, /* I */ opus_int offset_Q10, /* I */
opus_int length, /* I Input length */ opus_int length, /* I Input length */
opus_int shapingLPCOrder, /* I Noise shaping AR filter order */ opus_int shapingLPCOrder, /* I Noise shaping AR filter order */
opus_int predictLPCOrder /* I Prediction filter order */ opus_int predictLPCOrder, /* I Prediction filter order */
int arch /* I Architecture */
); );
/**************************/ /**************************/