Don't compile x86 cpu detection without RTCD.

Also #error if RTCD is enabled without a detection method, like Arm.
A number of SILK functions also still used the lookup tables, even
 when RTCD was disabled.
Fix those, too.
This commit is contained in:
Timothy B. Terriberry 2022-07-06 15:21:16 -07:00
parent affb551e47
commit 71fb707875
No known key found for this signature in database
GPG Key ID: 5A1149C19C699E4F
6 changed files with 36 additions and 24 deletions

View File

@ -43,10 +43,11 @@
*/
#define OPUS_ARCHMASK 3
#elif (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:

View File

@ -35,11 +35,11 @@
#include "pitch.h"
#include "x86cpu.h"
#if (defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
#if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX))
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
#if defined(_MSC_VER)
@ -91,6 +91,9 @@ static void cpuid(unsigned int CPUInfo[4], unsigned int InfoType)
what we want on CPUs that don't support CPUID. */
CPUInfo[3] = CPUInfo[2] = CPUInfo[1] = CPUInfo[0] = 0;
}
#else
# error "Configured to use x86 RTCD, but no CPU detection method available. " \
"Reconfigure with --disable-rtcd (or send patches)."
#endif
}

View File

@ -609,10 +609,12 @@ static OPUS_INLINE opus_int64 silk_max_64(opus_int64 a, opus_int64 b)
/* the following seems faster on x86 */
#define silk_SMMUL(a32, b32) (opus_int32)silk_RSHIFT64(silk_SMULL((a32), (b32)), 32)
#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
#if !defined(OVERRIDE_silk_burg_modified)
#define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_c(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
#endif
#if !defined(OVERRIDE_silk_inner_prod16)
#define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_c(inVec1, inVec2, len))
#endif

View File

@ -46,10 +46,12 @@ void silk_burg_modified_sse4_1(
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((void)(arch), silk_burg_modified_sse4_1(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
opus_int32 *res_nrg, /* O Residual energy */
@ -62,6 +64,7 @@ extern void (*const SILK_BURG_MODIFIED_IMPL[OPUS_ARCHMASK + 1])(
const opus_int D, /* I Order */
int arch /* I Run-time architecture */);
# define OVERRIDE_silk_burg_modified
# define silk_burg_modified(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch) \
((*SILK_BURG_MODIFIED_IMPL[(arch) & OPUS_ARCHMASK])(res_nrg, res_nrg_Q, A_Q16, x, minInvGain_Q30, subfr_length, nb_subfr, D, arch))
@ -76,16 +79,18 @@ opus_int64 silk_inner_prod16_sse4_1(
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((void)(arch),silk_inner_prod16_sse4_1(inVec1, inVec2, len))
# else
# elif defined(OPUS_HAVE_RTCD)
extern opus_int64 (*const SILK_INNER_PROD16_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *inVec1,
const opus_int16 *inVec2,
const opus_int len);
# define OVERRIDE_silk_inner_prod16
# define silk_inner_prod16(inVec1, inVec2, len, arch) \
((*SILK_INNER_PROD16_IMPL[(arch) & OPUS_ARCHMASK])(inVec1, inVec2, len))

View File

@ -34,8 +34,6 @@
# if defined(OPUS_X86_MAY_HAVE_SSE4_1)
# define OVERRIDE_silk_VQ_WMat_EC
void silk_VQ_WMat_EC_sse4_1(
opus_int8 *ind, /* O index of best codebook vector */
opus_int32 *res_nrg_Q15, /* O best residual energy */
@ -53,12 +51,13 @@ void silk_VQ_WMat_EC_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((void)(arch),silk_VQ_WMat_EC_sse4_1(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
opus_int8 *ind, /* O index of best codebook vector */
@ -75,6 +74,7 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int L /* I number of vectors in codebook */
);
# define OVERRIDE_silk_VQ_WMat_EC
# define silk_VQ_WMat_EC(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
subfr_len, max_gain_Q7, L, arch) \
((*SILK_VQ_WMAT_EC_IMPL[(arch) & OPUS_ARCHMASK])(ind, res_nrg_Q15, rate_dist_Q8, gain_Q7, XX_Q17, xX_Q17, cb_Q7, cb_gain_Q7, cl_Q5, \
@ -82,8 +82,6 @@ extern void (*const SILK_VQ_WMAT_EC_IMPL[OPUS_ARCHMASK + 1])(
# endif
# define OVERRIDE_silk_NSQ
void silk_NSQ_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@ -104,12 +102,13 @@ void silk_NSQ_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_sse4_1(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@ -129,6 +128,7 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
# define OVERRIDE_silk_NSQ
# define silk_NSQ(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x_Q3, pulses, PredCoef_Q12, LTPCoef_Q14, AR2_Q13, \
@ -136,8 +136,6 @@ extern void (*const SILK_NSQ_IMPL[OPUS_ARCHMASK + 1])(
# endif
# define OVERRIDE_silk_NSQ_del_dec
void silk_NSQ_del_dec_sse4_1(
const silk_encoder_state *psEncC, /* I Encoder State */
silk_nsq_state *NSQ, /* I/O NSQ state */
@ -158,12 +156,13 @@ void silk_NSQ_del_dec_sse4_1(
# if defined OPUS_X86_PRESUME_SSE4_1
# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((void)(arch),silk_NSQ_del_dec_sse4_1(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14))
# else
# elif defined(OPUS_HAVE_RTCD)
extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const silk_encoder_state *psEncC, /* I Encoder State */
@ -183,6 +182,7 @@ extern void (*const SILK_NSQ_DEL_DEC_IMPL[OPUS_ARCHMASK + 1])(
const opus_int LTP_scale_Q14 /* I LTP state scaling */
);
# define OVERRIDE_silk_NSQ_del_dec
# define silk_NSQ_del_dec(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, pitchL, Lambda_Q10, LTP_scale_Q14, arch) \
((*SILK_NSQ_DEL_DEC_IMPL[(arch) & OPUS_ARCHMASK])(psEncC, NSQ, psIndices, x16, pulses, PredCoef_Q12, LTPCoef_Q14, AR_Q13, \
@ -221,25 +221,26 @@ void silk_VAD_GetNoiseLevels(
silk_VAD_state *psSilk_VAD /* I/O Pointer to Silk VAD state */
);
# define OVERRIDE_silk_VAD_GetSA_Q8
opus_int silk_VAD_GetSA_Q8_sse4_1(
silk_encoder_state *psEnC,
const opus_int16 pIn[]
);
# if defined(OPUS_X86_PRESUME_SSE4_1)
# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) ((void)(arch),silk_VAD_GetSA_Q8_sse4_1(psEnC, pIn))
# else
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
# elif defined(OPUS_HAVE_RTCD)
extern opus_int (*const SILK_VAD_GETSA_Q8_IMPL[OPUS_ARCHMASK + 1])(
silk_encoder_state *psEnC,
const opus_int16 pIn[]);
# define OVERRIDE_silk_VAD_GetSA_Q8
# define silk_VAD_GetSA_Q8(psEnC, pIn, arch) \
((*SILK_VAD_GETSA_Q8_IMPL[(arch) & OPUS_ARCHMASK])(psEnC, pIn))
# endif
# endif

View File

@ -35,7 +35,7 @@
#include "pitch.h"
#include "main.h"
#if !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(OPUS_HAVE_RTCD) && !defined(OPUS_X86_PRESUME_SSE4_1)
#if defined(FIXED_POINT)