2010-03-14 19:59:47 +00:00
|
|
|
/*
|
|
|
|
|
* Header file for hardcoded AAC cube-root table
|
|
|
|
|
*
|
|
|
|
|
* Copyright (c) 2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
|
|
|
|
|
*
|
|
|
|
|
* This file is part of FFmpeg.
|
|
|
|
|
*
|
|
|
|
|
* FFmpeg is free software; you can redistribute it and/or
|
|
|
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
|
|
|
* License as published by the Free Software Foundation; either
|
|
|
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
|
|
|
*
|
|
|
|
|
* FFmpeg is distributed in the hope that it will be useful,
|
|
|
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
* Lesser General Public License for more details.
|
|
|
|
|
*
|
|
|
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
|
|
|
* License along with FFmpeg; if not, write to the Free Software
|
|
|
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
|
|
|
|
*/
|
|
|
|
|
|
2011-05-17 16:58:04 +02:00
|
|
|
#ifndef AVCODEC_CBRT_TABLEGEN_H
|
|
|
|
|
#define AVCODEC_CBRT_TABLEGEN_H
|
2010-03-14 19:59:47 +00:00
|
|
|
|
|
|
|
|
#include <stdint.h>
|
|
|
|
|
#include <math.h>
|
2014-08-30 17:35:18 +02:00
|
|
|
#include "libavutil/attributes.h"
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
#include "libavutil/intfloat.h"
|
2015-07-20 13:36:20 +02:00
|
|
|
#include "libavcodec/aac_defines.h"
|
2010-03-14 19:59:47 +00:00
|
|
|
|
2015-06-30 11:53:04 +02:00
|
|
|
#if USE_FIXED
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
#define CBRT(x) lrint((x) * 8192)
|
2015-06-30 11:53:04 +02:00
|
|
|
#else
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
#define CBRT(x) av_float2int((float)(x))
|
2015-06-30 11:53:04 +02:00
|
|
|
#endif
|
|
|
|
|
|
2025-09-02 20:47:21 +02:00
|
|
|
#define LUT_SIZE (1 << 13)
|
|
|
|
|
#define TMP_LUT_SIZE (LUT_SIZE/2)
|
|
|
|
|
|
|
|
|
|
uint32_t AAC_RENAME(ff_cbrt_tab)[LUT_SIZE];
|
2010-03-14 19:59:47 +00:00
|
|
|
|
2016-03-12 19:08:21 +01:00
|
|
|
av_cold void AAC_RENAME(ff_cbrt_tableinit)(void)
|
2010-03-14 19:59:47 +00:00
|
|
|
{
|
2025-09-02 20:47:21 +02:00
|
|
|
// LUT of k^{4/3} for odd k; element idx corresponds to 2 * idx + 1.
|
|
|
|
|
static double cbrt_tab_dbl[TMP_LUT_SIZE];
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
|
2025-09-02 20:47:21 +02:00
|
|
|
for (int idx = 0; idx < TMP_LUT_SIZE; ++idx)
|
|
|
|
|
cbrt_tab_dbl[idx] = 1;
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
|
2025-09-02 20:47:21 +02:00
|
|
|
/* have to take care of non-squarefree numbers; notice that sqrt(LUT_SIZE) = 90;
|
|
|
|
|
* idx == 44 corresponds to 89. */
|
|
|
|
|
for (int idx = 1; idx < 45; ++idx) {
|
|
|
|
|
if (cbrt_tab_dbl[idx] == 1) {
|
|
|
|
|
int i = 2 * idx + 1;
|
|
|
|
|
double cbrt_val = i * cbrt(i);
|
|
|
|
|
for (int k = i; k < LUT_SIZE; k *= i) {
|
|
|
|
|
// We only have to handle k, 3 * k, 5 * k,...,
|
|
|
|
|
// because only these are odd. The corresponding indices are
|
|
|
|
|
// k >> 1, (k >> 1) + k, (k >> 1) + 2 * k,...
|
|
|
|
|
for (int idx2 = k >> 1; idx2 < TMP_LUT_SIZE; idx2 += k)
|
|
|
|
|
cbrt_tab_dbl[idx2] *= cbrt_val;
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
}
|
2010-03-14 19:59:47 +00:00
|
|
|
}
|
2025-09-02 20:47:21 +02:00
|
|
|
}
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
|
2025-09-02 20:47:21 +02:00
|
|
|
for (int idx = 45; idx < TMP_LUT_SIZE; ++idx) {
|
|
|
|
|
if (cbrt_tab_dbl[idx] == 1) {
|
|
|
|
|
int i = 2 * idx + 1;
|
|
|
|
|
double cbrt_val = i * cbrt(i);
|
|
|
|
|
for (int idx2 = idx; idx2 < TMP_LUT_SIZE; idx2 += i)
|
|
|
|
|
cbrt_tab_dbl[idx2] *= cbrt_val;
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
}
|
2025-09-02 20:47:21 +02:00
|
|
|
}
|
lavc/cbrt_tablegen: speed up tablegen
This exploits an approach based on the sieve of Eratosthenes, a popular
method for generating prime numbers.
Tables are identical to previous ones.
Tested with FATE with/without --enable-hardcoded-tables.
Sample benchmark (Haswell, GNU/Linux+gcc):
prev:
7860100 decicycles in cbrt_tableinit, 1 runs, 0 skips
7777490 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
7582339 decicycles in cbrt_tableinit, 256 runs, 0 skips
7563556 decicycles in cbrt_tableinit, 512 runs, 0 skips
new:
2099480 decicycles in cbrt_tableinit, 1 runs, 0 skips
2044470 decicycles in cbrt_tableinit, 2 runs, 0 skips
[...]
1796544 decicycles in cbrt_tableinit, 256 runs, 0 skips
1791631 decicycles in cbrt_tableinit, 512 runs, 0 skips
Both small and large run count given as this is called once so small run
count may give a better picture, small numbers are fairly consistent,
and there is a consistent downward trend from small to large runs,
at which point it stabilizes to a new value.
Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Ganesh Ajjanagadde <gajjanagadde@gmail.com>
2016-01-11 17:09:44 -05:00
|
|
|
|
2025-09-02 20:47:21 +02:00
|
|
|
double cbrt_2 = 2 * cbrt(2);
|
|
|
|
|
for (int idx = 0; idx < TMP_LUT_SIZE; ++idx) {
|
|
|
|
|
double cbrt_val = cbrt_tab_dbl[idx];
|
|
|
|
|
for (int i = 2 * idx + 1; i < LUT_SIZE; i *= 2) {
|
|
|
|
|
AAC_RENAME(ff_cbrt_tab)[i] = CBRT(cbrt_val);
|
|
|
|
|
cbrt_val *= cbrt_2;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
AAC_RENAME(ff_cbrt_tab)[0] = CBRT(0);
|
2010-03-14 19:59:47 +00:00
|
|
|
}
|
|
|
|
|
|
2011-05-17 16:58:04 +02:00
|
|
|
#endif /* AVCODEC_CBRT_TABLEGEN_H */
|