Compare commits

...

997 Commits

Author SHA1 Message Date
Jean-Marc Valin
23c376e45b
Delaying new DRED data when just out of silence
Some checks failed
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install config:Release name:MacOSX/Framework/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_SHARED_LIBS=ON config:Release name:Linux/So/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_SHARED_LIBS=ON config:Release name:MacOSX/So/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/ARMv8/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a config:Release name:Android/Lib/ARMv8/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/X86/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 config:Release name:Android/Lib/X86/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 config:Release name:Android/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/MacOSX/Lib/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_CUSTOM_MODES=ON config:Release name:CustomModes/Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Framework/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DBUILD_SHARED_LIBS=ON -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Dll/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Lib/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/ARMv8/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 config:Release name:Windows/Lib/armv8/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/X64/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/X86/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 config:Release name:Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 config:Release name:Windows/Lib/X86/Release os:windows-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-DOPUS_X86_PRESUME_AVX2=OFF config:Release name:MacOSX/Lib/X64/Release os:macos-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-DOPUS_X86_PRESUME_AVX2=ON config:Release name:Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Lib/arm64/Release os:macos-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 config:Release name:Windows/Lib/armv8/Release os:windows-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -DOPUS_X86_PRESUME_AVX2=ON config:Release name:Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
DRED / AutoTools/${{ matrix.config.name }} (map[automakeconfig:<nil> compiler:clang name:Linux/Clang os:ubuntu-latest]) (push) Has been cancelled
DRED / AutoTools/${{ matrix.config.name }} (map[automakeconfig:<nil> compiler:gcc name:Linux/GCC os:ubuntu-latest]) (push) Has been cancelled
Repository / Check trailing white spaces (push) Has been cancelled
We don't need redundancy for the first active frame
since we already have the main Opus payload.
2024-02-06 22:13:48 -05:00
Jean-Marc Valin
c45120217b
Add dred_end return value to opus_dred_parse() 2024-02-06 20:53:51 -05:00
Jean-Marc Valin
bf7440b8ef
Support for extra offset
Allows us to exclude the most recent silence from DRED
2024-02-06 20:53:50 -05:00
Jean-Marc Valin
57a7306b92
Refactoring: store all states 2024-02-06 20:53:50 -05:00
Jean-Marc Valin
6a66cd143f
Chopping the oldest silence in a DRED payload 2024-02-06 20:53:50 -05:00
Jean-Marc Valin
65b131ec09
Add basic testing for Deep PLC, DRED, and OSCE
Still need more targeted tests, DRED decoding
2024-02-06 20:52:21 -05:00
Jean-Marc Valin
7070dfec4f
Make opus_packet_unpad() discard extensions too
Same for opus_multistream_packet_unpad()
2024-02-06 19:48:29 -05:00
Jean-Marc Valin
17922c2a28
Fix internal error on DRED
Forgot to account for padding length bytes when DRED payload is large.
2024-02-06 15:48:21 -05:00
Jean-Marc Valin
562587e91b
Avoid size-zero OPUS_COPY() with NULL pointer
Fails ubsan because memcpy declares args as non-null
2024-02-06 15:38:50 -05:00
Jean-Marc Valin
2582ca9259
Allow wrap-around in silk_LPC_analysis_filter_avx2()
Matches the C version (see 4a7027b)
2024-02-02 18:32:55 -05:00
Jean-Marc Valin
e12c7f584a
Fix log(0) on silence for fixed-point 2024-02-02 15:07:01 -05:00
Jean-Marc Valin
0e2d56d675
Add missing NULL pointer check 2024-02-02 14:46:51 -05:00
luzpaz
009d7412e1
Fix various typos
Found using `codespell -q 3 -L caf,highe,inlin,nd,ordert,shft`

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2024-02-02 11:03:50 -05:00
Jean-Marc Valin
f20575dd86
Fix OSCE using uninitialized range coder for PLC
Some checks failed
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 config:Release name:Windows/Lib/X86/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install config:Release name:MacOSX/Framework/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_SHARED_LIBS=ON config:Release name:Linux/So/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DBUILD_SHARED_LIBS=ON config:Release name:MacOSX/So/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/ARMv8/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a config:Release name:Android/Lib/ARMv8/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/X86/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 config:Release name:Android/Lib/X86/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DBUILD_SHARED_LIBS=ON config:Release name:Android/So/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 config:Release name:Android/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/MacOSX/Lib/X64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-DOPUS_CUSTOM_MODES=ON config:Release name:CustomModes/Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Framework/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DBUILD_SHARED_LIBS=ON -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Dll/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Lib/arm64/Release os:macos-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/ARMv8/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 config:Release name:Windows/Lib/armv8/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/X64/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON config:Release name:Windows/Dll/X86/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON config:Release name:AssertionsFuzz/Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
CMake / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A Win32 config:Release name:Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-DOPUS_X86_PRESUME_AVX2=OFF config:Release name:MacOSX/Lib/X64/Release os:macos-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-DOPUS_X86_PRESUME_AVX2=ON config:Release name:Linux/Lib/X64/Release os:ubuntu-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64 config:Release name:iOS/Lib/arm64/Release os:macos-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -A ARM64 config:Release name:Windows/Lib/armv8/Release os:windows-latest]) (push) Has been cancelled
DRED / CMake/${{ matrix.config.name }} (map[args:-G "Visual Studio 17 2022" -DOPUS_X86_PRESUME_AVX2=ON config:Release name:Windows/Lib/X64/Release os:windows-latest]) (push) Has been cancelled
DRED / AutoTools/${{ matrix.config.name }} (map[automakeconfig:<nil> compiler:clang name:Linux/Clang os:ubuntu-latest]) (push) Has been cancelled
DRED / AutoTools/${{ matrix.config.name }} (map[automakeconfig:<nil> compiler:gcc name:Linux/GCC os:ubuntu-latest]) (push) Has been cancelled
Repository / Check trailing white spaces (push) Has been cancelled
2024-01-31 21:52:08 -05:00
Jean-Marc Valin
53c2313c58
Fix lossgen shared build 2024-01-31 18:30:28 -05:00
Jean-Marc Valin
6c8acc21dd
Avoid padding multi-frame DTX packets 2024-01-31 13:08:37 -05:00
Jean-Marc Valin
648a9f24b4
Allow for DRED in DTX refresh packets 2024-01-31 13:07:51 -05:00
Jean-Marc Valin
4350819785
Handle the offset from the DRED frame id 2024-01-31 12:59:08 -05:00
Jean-Marc Valin
f4ee2925f6
Fix frame separator parsing 2024-01-31 00:02:57 -05:00
Jean-Marc Valin
0fed741a87
Fix c90 build 2024-01-30 21:53:58 -05:00
Jean-Marc Valin
468a693dd4
Cleanup previous commits
Rename, reindent, change arg order
2024-01-25 02:19:37 -05:00
Jean-Marc Valin
b778271d53
divide max payload too 2024-01-25 02:19:36 -05:00
Jean-Marc Valin
073bec9160
First shot at multi-frame CBR with DRED 2024-01-25 02:19:36 -05:00
Jean-Marc Valin
fe86db66f4
More activity handling to opus_encode_native_process() 2024-01-25 02:19:36 -05:00
Jean-Marc Valin
452abeeac9
Handle rangeFinal, delay_compensation 2024-01-25 02:19:36 -05:00
Jean-Marc Valin
fd88e22391
Refactor multi-frame encoding to be non-recursive 2024-01-25 02:19:35 -05:00
Jean-Marc Valin
f44069f58b
Splitting opus_encode_native() 2024-01-25 02:19:35 -05:00
Jean-Marc Valin
231caa3720
Fix Hybrid CBR with DRED and CELT->SILK redundancy
Need to move the redundant frame even in CBR because the hybrid
frame now gets encoded as VBR, with DRED picking up the rest.
Fixes an issue introduced in 4600e77.
2024-01-25 02:19:35 -05:00
Jean-Marc Valin
b63e22cff9
Fix desync for CBR DRED
The encoder wouldn't reserve enough bits for CELT, causing it
to not have enough bits to code the switching redundancy flag
when it should have.
2024-01-25 02:19:35 -05:00
Jean-Marc Valin
7b73c9bc7f
More DRED tuning 2024-01-25 02:16:28 -05:00
Jean-Marc Valin
19dd96b3fa
Initial DRED tuning
Adjust q0, qD and duration based on bitrate and loss.
2024-01-25 02:15:50 -05:00
Jan Buethe
7df2c67be1
fixes in osce python code 2024-01-23 17:10:34 +01:00
Jan Buethe
3499d0aac7
switched to smaller NoLACE model 2024-01-22 15:23:09 +01:00
Jan Buethe
ec04a94eb2
bugfix in SilkFeatureNetPL 2024-01-22 15:12:52 +01:00
Jan Buethe
5f8201c71e
OSCE_MAX_RNN_UNITS now derived from osce model parameters 2024-01-22 15:12:26 +01:00
Jean-Marc Valin
6a9831a6b0
Remove run-time code for old TF2 models
No longer needed now that PLC is trained with PyTorch stack
2024-01-21 02:11:50 -05:00
Jean-Marc Valin
1ddfcfd48c
Using PyTorch model (same architecture for now) 2024-01-21 02:11:50 -05:00
Jean-Marc Valin
e699263660
Improving PLC
Should handle the history in a more consistent way.
Slightly increase the model size and re-enable biased band loss in
training.
2024-01-21 02:05:26 -05:00
Jan Buethe
299e38cab7
Updated LACE and NoLACE models to version 2 2024-01-20 14:44:22 +01:00
Jean-Marc Valin
4f311a1ad4
PLC export script
mostly untested
2024-01-17 02:26:48 -05:00
Jean-Marc Valin
26ddfd7135
PyTorch code for training the PLC model
Should match the TF2 code, but mostly untested
2024-01-15 18:11:47 -05:00
Jean-Marc Valin
6ad03ae03e
Prevent overshoots from CELT PLC with prediction
Constrains the energy prediction to something safe.
2023-12-22 20:14:10 -05:00
Jean-Marc Valin
bd2e9a34fb
Add simulated loss to opus_demo 2023-12-22 03:42:16 -05:00
Jean-Marc Valin
caca188b5a
Make loss simulator standalone 2023-12-21 23:05:40 -05:00
Jean-Marc Valin
bd710e97f3
C code for packet loss simulator 2023-12-21 21:30:53 -05:00
Jean-Marc Valin
b923fd1e28
lossgen: better training, README.md 2023-12-21 18:01:57 -05:00
Jean-Marc Valin
c40add59af
lossgen: can now dump weights 2023-12-21 16:57:35 -05:00
Jean-Marc Valin
627aa7f5b3
Packet loss generation model 2023-12-21 15:34:33 -05:00
Jan Buethe
7d328f5bfa
Merge LACE/NoLACE under OSCE framework 2023-12-20 03:42:44 -05:00
Jean-Marc Valin
591c8bad70
Initialize padding pointers to zero
Avoids valgrind complaining about use of uninitialized memory
2023-12-16 22:04:47 -05:00
Michael Klingbeil
12fbd8111a
use opus_(re)alloc and opus_free for dnn and DRED related functions 2023-12-15 15:48:58 -05:00
Michael Klingbeil
f5a1efdc17
handle extensions in opus_repacketizer_out_range_impl 2023-12-13 21:49:43 -05:00
Michael Klingbeil
6d7ae213ce
add extensions of the first frame of a multiframe packet 2023-12-06 12:15:32 -05:00
Jean-Marc Valin
f27798da7b
Fix RESYNTH bit rot 2023-12-05 16:58:45 -05:00
Michael Klingbeil
c7bfc72d07 use vec_avx.h for MSVC builds 2023-11-29 21:40:21 -05:00
Michael Klingbeil
8090aaca9f don't redefine _mm_loadu_si32 on MSVC 2023-11-29 20:11:10 -05:00
Jean-Marc Valin
88fc293799
Defining __SSEx__ macros when needed for MSVC 2023-11-29 18:19:19 -05:00
Michael Klingbeil
f126bfc531 fix autogen.bat model download 2023-11-29 13:38:45 -05:00
Jean-Marc Valin
0d823c137c
Add a script to shrink the DNN models
Removes float debug weights, as well as useless spaces
2023-11-29 02:50:24 -05:00
Jean-Marc Valin
443510c2d1
Fix Windows path 2023-11-28 23:49:24 -05:00
Jean-Marc Valin
5578824258
Fix model download path for windows 2023-11-28 23:39:48 -05:00
Jean-Marc Valin
ddfa48046b
Opus github ci files
Use OPUS_DRED instead of NEURAL_FEC
2023-11-28 23:34:06 -05:00
Jean-Marc Valin
08eefed7cc
Add dotprod support to meson
Also default to disabling dnn float debugging
2023-11-28 23:18:50 -05:00
Jean-Marc Valin
c28b0f10bc
Trying to fix/update meson build
Still don't quite know what I'm doing
2023-11-28 19:14:27 -05:00
Jean-Marc Valin
147b72293f
Oops, fix the fixed-point build 2023-11-28 15:34:29 -05:00
Jean-Marc Valin
db26e381a4
Trying to use fma instructions when possible
Compilers sometimes replace vmlaq*() with fmul+fadd instead of fmla.
Trying to use vfmaq*() instead when possible.
2023-11-28 14:16:57 -05:00
Jean-Marc Valin
72cc88dfdd
FARGAN model update
Finished adversarial training on 800k model. Also, move weights to a new
location.
2023-11-28 04:11:53 -05:00
Jean-Marc Valin
df637713aa
Fixes for ARMv7/AArch32
1) Enable asm/intrinsics even for floating-point
2) Make sure ARMv8 asimd enables EDSP/MEDIA/Neon
3) Add dotp architecture to rtcd table since AArch *can* have dotp
2023-11-27 23:08:56 -05:00
Jean-Marc Valin
c143b72c4c
Enabling DNN optimizations for ARMv7
Adds RTCD tables for compute_activation() and compute_conv2d()
2023-11-27 18:08:20 -05:00
Jean-Marc Valin
ee1bb69f2d
Only force auto-vectorization for GCC >= 5.1 2023-11-27 17:55:27 -05:00
Jean-Marc Valin
7cc30ec681
Force vectorization for DNN primitives
Avoids having to write intrinsics for simple loops
2023-11-27 16:44:11 -05:00
Jean-Marc Valin
d4506af5a9
Enable floating-point approximations by default
Enabling only on platforms that have been tested just in case we
run into a non-IEEE754 platform where they would break.
2023-11-27 15:41:41 -05:00
Jean-Marc Valin
db6dad446c
Fix ARMv7 optimizations for DNN code 2023-11-26 22:21:29 -05:00
Jean-Marc Valin
cc11c078cd
First step towards DNN optimization for ARMv7 Neon
Still missing some intrinsics
2023-11-26 03:36:46 -05:00
Jean-Marc Valin
c9af8f80f7
Fix potential read out of bounds in fargan 2023-11-26 03:16:34 -05:00
Jean-Marc Valin
5c3795b287
Adding dotprod instruction to ARM rtcd
Used for DNN matrix multiplies
2023-11-25 03:15:51 -05:00
Jean-Marc Valin
984f35b313
Speed up cross-correlation normalization 2023-11-24 18:28:08 -05:00
Jean-Marc Valin
d65b7de3c5
Use arch-specific celt_inner_prod() for features 2023-11-24 18:08:01 -05:00
Jean-Marc Valin
ddbdbec444
Optimize biquad() to reduce dependency chains 2023-11-24 18:02:35 -05:00
Jean-Marc Valin
176507e4fc
Remove process_single_frame()
Code moved to compute_frame_features()
2023-11-24 13:33:04 -05:00
Jean-Marc Valin
9d0425d88b
Remove feature writing (fwrite()) from libopus 2023-11-24 13:23:52 -05:00
Jean-Marc Valin
f5821193e6
Using the same condition for enabling rtcd
for cmake, force PRESEUME_SSE4_1 on PRESUME_AVX2
2023-11-21 16:59:28 -05:00
Jean-Marc Valin
3e18d96759
Trying to fix CMake build
aka banging on it until it builds on my machine.
Further improvements welcome
2023-11-21 16:13:20 -05:00
Jean-Marc Valin
239d223d84
Add rtcd for silk_inner_product_FLP() 2023-11-21 02:56:04 -05:00
Jean-Marc Valin
b93e4a149c
Start enabling AVX2 silk_inner_product_FLP()
Not yet with rtcd
2023-11-21 02:13:06 -05:00
Jean-Marc Valin
ed90060389
Avoids AVX2 optimizations being disabled 2023-11-21 01:26:40 -05:00
Jean-Marc Valin
c066af1bf1
Use SILK VBR when using CBR with DRED
DRED will absorb the bitrate variation
2023-11-21 01:13:30 -05:00
Jean-Marc Valin
6f99a3382d
Misc fixes on previous patch
Fixes warnings, undefined behaviour, and check-asm failure
2023-11-20 17:58:54 -05:00
Victor Ding
735c40706f
Optimize NSQ_del_dec() for AVX2
The optimization is bit-exact with C function.

This optimization speeds up SILK encoder (floating point) as following:

AMD Zen:
Complexity 0-5 :      0%
Complexity 6-7 : 3 -  7%
Complexity 8-10: 8 - 15%

Intel Skylake:
Complexity 0-5 :       0%
Complexity 6-7 : 14 - 18%
Complexity 8-10: 17 - 22%

Adapted by Jean-Marc Valin
2023-11-20 17:55:37 -05:00
Jean-Marc Valin
452aa95211
AVX2 version of silk_inner_product_FLP()
Not hooked up
2023-11-20 17:53:18 -05:00
Jean-Marc Valin
1085126049
Remove AVX pitch code for fixed-point 2023-11-20 16:18:13 -05:00
Jean-Marc Valin
161358d6c4
Speeding up transient_analysis()
Reducing dependency chains
2023-11-20 16:12:46 -05:00
Jean-Marc Valin
f42940bef9
Make sure weights files are marked as modified 2023-11-20 14:13:23 -05:00
Jean-Marc Valin
d4b04d3275
Speed up silk_warped_autocorrelation_FLP()
Reducing the dependency chain between tmp1 and tmp2 at the
cost of an extra multiply.
2023-11-17 23:10:59 -05:00
Jean-Marc Valin
b2cfd87783
Add rtcd support for celt_pitch_xcorr_avx2() 2023-11-17 19:36:19 -05:00
Jean-Marc Valin
029385467d
Fix non-RTCD case when SSE is not assumed present
Should never occur on amd64, but it could on 32-bit x86
2023-11-17 18:08:10 -05:00
Jean-Marc Valin
7423ce59e5
Use celt_pitch_xcorr_avx2() when guaranteed
No RTCD yet
2023-11-17 16:59:45 -05:00
Jean-Marc Valin
a93b09e241
Adding RTCD for compute_conv2d() 2023-11-17 14:20:09 -05:00
Jean-Marc Valin
91d1f7539e
FARGAN model update 2023-11-16 12:45:50 -05:00
Jean-Marc Valin
7f7b2a1c66
Smaller version of fargan
800k parameters, 600 MFLOPS, with a receptive field of 3 feature vectors
2023-11-16 02:06:14 -05:00
Jean-Marc Valin
19a5d6ec03
Remove C99 comment 2023-11-16 01:27:40 -05:00
Jean-Marc Valin
4bfc0f8555
Adding RTCD for compute_activation() 2023-11-15 23:46:01 -05:00
Jean-Marc Valin
2e034f6f31
Adding RTCD for DNN code
Starting with compute_linear()
2023-11-15 23:45:32 -05:00
Jean-Marc Valin
b0620c0bf9
Using sparse GRUs in DRED decoder
Saves ~270 kB of weights in the decoder
2023-11-15 04:08:50 -05:00
Jean-Marc Valin
58923f61c2
Fix non-AVX builds 2023-11-11 03:24:21 -05:00
Jean-Marc Valin
77594bf158
Dumping RDOVAE stats from XML 2023-11-08 17:32:43 -05:00
Jean-Marc Valin
222662dac8
DRED: quantize scale and dead zone to 8 bits 2023-11-07 18:10:50 -05:00
Jan Buethe
4e104555e9
added weight export script for LACE/NoLACE 2023-11-07 15:12:12 +01:00
Jan Buethe
8af5c6b4a1
added transposed 1d convolutions to wexchange 2023-11-07 11:54:22 +01:00
Jean-Marc Valin
b6095cf22d
DRED code cleanup
Removing some indirections
2023-11-07 02:52:40 -05:00
Jean-Marc Valin
0ab0640d4a
Split stats in two and remove useless dimensions 2023-11-07 00:07:14 -05:00
Jan Buethe
2386a60ec6
updated moc to match results in ietf118 presentation 2023-11-06 17:50:48 +01:00
Jean-Marc Valin
544b3e576c
DRED: quantize r and p0 parameters with 8 bits
Only code non-degenerate symbols, which makes the encoder faster
2023-11-06 03:16:43 -05:00
Jean-Marc Valin
98b8be09d5
Vectorize DRED quantization 2023-11-06 03:12:00 -05:00
Felicia Lim
fa5e960cb1
Match silenced overflow checks in the sse4.1 version
Update silk/x86/NSQ_del_dec_sse4_1.c to match the remaining
silk/NSQ_del_dec.c changes made in
https://gitlab.xiph.org/xiph/opus/-/commit/c913dc38

Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2023-11-03 23:38:24 -04:00
Felicia Lim
cfeddc49f9
Silence some overflow checks
Co-authored-by: James Zern <jzern@google.com>
2023-11-03 02:50:17 -04:00
Jean-Marc Valin
1ada7d4d6f
Vectorizing sgemv for multiples of 4 with SSE 2023-11-03 02:48:38 -04:00
Jean-Marc Valin
166a6c8e49
Fix silly bug in CELT Deep PLC 2023-11-03 02:48:38 -04:00
Jean-Marc Valin
74c67a8df5
Fix CELT PLC for single packet between losses
Avoids switching to CNG unless we just have a "refresh packet"
2023-11-03 02:48:37 -04:00
Jan Buethe
da60266f6e
updated moc method 2023-11-02 16:52:50 +01:00
Jean-Marc Valin
feb3282887
Don't try to use models that aren't loaded 2023-10-30 14:08:07 -04:00
Jean-Marc Valin
62b546436f
Speed up general case for float matrix multiply 2023-10-30 00:08:53 -04:00
Jean-Marc Valin
61fb3b1689
Don't use reserved identifiers for include guards 2023-10-29 21:19:51 -04:00
Jean-Marc Valin
d53531d0bd
Update blob loading code 2023-10-29 18:06:18 -04:00
Jean-Marc Valin
0b75501270
Use log approximation when possible 2023-10-29 02:38:21 -04:00
Jean-Marc Valin
4259d354df
Reusing already-optimized celt_fir() 2023-10-29 02:20:35 -04:00
Jean-Marc Valin
b22b11a412
Silence some warnings
Including removing useless code
2023-10-29 00:12:58 -04:00
Jean-Marc Valin
ddd5669e79
Pitch and fargan model updates
Removing one of the 2d conv layers for pitch estimation reduces
complexity without noticeable degradation. FARGAN model has more
adversarial training.
Also, no need for the double precision in the low-pass filter.
2023-10-28 23:33:47 -04:00
Jean-Marc Valin
c99054dad9
Fix CELT deep PLC bugs
The sinc filter offset was incorrectly handled. Since it perfectly compensates
for the analysis offset, nothing has to be done.
Also, the preemphasis memory was never initialized.
2023-10-27 01:29:56 -04:00
Jean-Marc Valin
ccb244a732
cleanup 2023-10-24 09:27:31 -04:00
Jean-Marc Valin
bc102f5fab
Slightly more continuous analysis 2023-10-24 09:19:51 -04:00
Jean-Marc Valin
64236e5201
Removing more useless code 2023-10-21 02:26:44 -04:00
Jean-Marc Valin
ef8115bd9a
Stop using tansig_table.h (both copies) 2023-10-20 22:07:58 -04:00
Jean-Marc Valin
a30c96aa8a
Cleanup 2023-10-20 21:31:19 -04:00
Jean-Marc Valin
88c58cfaf3
nnet.h no longer needs to #include "vec.h" 2023-10-20 17:25:27 -04:00
Jean-Marc Valin
1032e47d3f
more cleanup 2023-10-20 15:13:43 -04:00
Jean-Marc Valin
7f0d456c4b
Remove unneeded functions in nnet.c 2023-10-20 15:05:14 -04:00
Jean-Marc Valin
4598fe5409
Quantizing pitchdnn and rdovae weights 2023-10-20 12:54:13 -04:00
Jan Buethe
290be25b98
added 16kHz version of opus_compare in python 2023-10-20 14:24:27 +02:00
Jan Buethe
1accd2472e
finalized quantization option in export_rdovae_weights.py 2023-10-20 14:14:31 +02:00
Jean-Marc Valin
88c8b30785
Doing some unrolling on ARM/Neon 2023-10-20 03:28:17 -04:00
Jean-Marc Valin
f512c9206b
Unroll the 3x3 convolution case
Gets us about 2x speedup on x86
2023-10-20 01:33:49 -04:00
Jean-Marc Valin
d720955d61
Marking RDOVAE layers to quantize 2023-10-19 16:06:52 -04:00
Jan Buethe
60ac1c6c99
prepared quantization implementation for DRED 2023-10-19 21:54:39 +02:00
Jan Buethe
2192e85b91
restructured osce readme 2023-10-19 21:45:45 +02:00
Jan Buethe
055c683018
added LACE/NoLACE checkpoint URL 2023-10-19 21:34:13 +02:00
Jean-Marc Valin
8d43b185b2
Support OPUS_SET_COMPLEXITY() on decoder side
Controls whether deep PLC is enabled
2023-10-18 17:44:00 -04:00
Jean-Marc Valin
000af0340a
Avoiding work on the PLC update side
Shift computation to concealment
2023-10-18 17:44:00 -04:00
Jean-Marc Valin
d1309dd2b6
Simplifying the DRED/PLC code 2023-10-18 17:43:27 -04:00
Jean-Marc Valin
8c7c03e568
Don't call the libm tanh() 2023-10-18 01:44:50 -04:00
Jean-Marc Valin
0e397a7241
Making the build possible without the models
No dependency on the data files if no DNN code enabled
2023-10-18 00:43:03 -04:00
Jean-Marc Valin
aca04ce269
Default to int8 matrix multiplies when available 2023-10-17 01:52:32 -04:00
Jean-Marc Valin
f82f9d1ebb
oops 2023-10-16 23:10:23 -04:00
Jean-Marc Valin
e51d3da901
Fix tests 2023-10-16 23:01:17 -04:00
Jean-Marc Valin
828f2553d6
Remove references to kiss99 2023-10-16 22:13:19 -04:00
Jean-Marc Valin
e7c9bfbbe2
Finish removing LPCNet
And references to nnet_data.h
2023-10-16 22:01:09 -04:00
Jean-Marc Valin
ca035ef1d2
Force Deep PLC on when enabling DRED 2023-10-16 16:57:21 -04:00
Jean-Marc Valin
6471f8013d
update weight blob code 2023-10-15 03:53:49 -04:00
Jean-Marc Valin
da2121abff
Default Deep PLC/DRED to off 2023-10-15 03:43:42 -04:00
Jean-Marc Valin
6ea9312a93
Only compile PLC/DRED conditionally 2023-10-15 03:39:40 -04:00
Jean-Marc Valin
9ed3c7c982
Rename ENABLE_NEURAL_FED to ENABLE_DRED
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-10-15 02:55:01 -04:00
Jean-Marc Valin
5c24975c3a
Rename NEURAL_PLC to ENABLE_DEEP_PLC
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-10-15 02:54:55 -04:00
Jean-Marc Valin
98726c4ca6
Fix PLC in opus_demo when DRED is not present 2023-10-14 23:19:28 -04:00
Jean-Marc Valin
cbd3a80552
minor tweaks 2023-10-14 21:19:46 -04:00
Jean-Marc Valin
0b7c02caf4
Remove LPCNet from the build 2023-10-14 17:21:30 -04:00
Jean-Marc Valin
c0f9436623
Remove FWGAN from the build for now 2023-10-14 17:17:40 -04:00
Jean-Marc Valin
60f151b87d
Use FARGAN instead of LPCNet in DRED/PLC 2023-10-14 16:42:34 -04:00
Jean-Marc Valin
35cb8d7f66
C implementation of FARGAN 2023-10-14 16:42:33 -04:00
Jean-Marc Valin
9e76a7bfb8
update fargan to match version 45 2023-10-10 00:51:57 -04:00
Jean-Marc Valin
d1c5b32add
Fix warning from casting between 1D and 2D arrays 2023-10-07 18:52:22 -04:00
Jean-Marc Valin
58f3647a04
Fix misc warnings 2023-10-07 17:54:17 -04:00
Jean-Marc Valin
81624caf9c
Silencing alignment warnings on x86 intrinsics
Those intrinsics don't actually require alignment so we're OK
2023-10-07 17:45:39 -04:00
Jan Buethe
0563d71b25
updated osce readme 2023-10-07 18:52:38 +02:00
Jean-Marc Valin
8f9a7e23c8
New model with wider range of bitrates
Using a max lambda of 0.04
2023-10-06 03:50:46 -04:00
Jean-Marc Valin
f0ec990dba
Switching to neural pitch estimator
Remove old pitch estimator and retrain all models
2023-10-06 03:14:56 -04:00
Jean-Marc Valin
da7f4c6c99
update model 2023-10-02 01:47:46 -04:00
Jean-Marc Valin
27663d3641
Using a DenseNet for DRED 2023-10-02 01:43:44 -04:00
Jean-Marc Valin
8e8edf71bd
Remove unneeded (I think) tanh at the end 2023-10-01 21:34:58 -04:00
Jean-Marc Valin
33adba02c7
First version of pitch DNN C code
Totally untested -- most likely doesn't work
2023-10-01 03:59:17 -04:00
Jean-Marc Valin
966a2d22eb
Code for 2D convolution
Untested
2023-09-30 23:43:51 -04:00
Jean-Marc Valin
f3b86f9414
Fix model saving 2023-09-30 02:48:26 -04:00
Jan Buethe
0459a572f5
updated PitchDNN export script 2023-09-29 15:34:59 +02:00
Jan Buethe
ce28695844
refactoring and cleanup 2023-09-29 15:31:45 +02:00
Jan Buethe
4901445490
fixed type in error message 2023-09-29 14:34:11 +02:00
Jan Buethe
c5c214df1b
added rudimentary support for dumping nn.Conv2d layers 2023-09-29 14:25:26 +02:00
Jean-Marc Valin
25c65a0c0b
Fix stats indexing for state 2023-09-27 19:46:40 -04:00
Jean-Marc Valin
9a7bb764d4
No features skip needed to align pitch features 2023-09-27 13:01:21 -04:00
Jean-Marc Valin
a6b4fe375a
Script to compute the groundtruth data using CREPE 2023-09-27 13:00:12 -04:00
Jean-Marc Valin
217c40d4ac
dump cleanup, change alignment
Remove already-disabled silence chopping code and make time alignment
the same as lpcnet_demo -features
2023-09-27 12:57:08 -04:00
Jan Buethe
ae0a140c40
updated default values in adv_train_fargan.py 2023-09-26 21:42:01 +02:00
Jean-Marc Valin
a71ba10f89
Fixes stack overflow for some custom modes
This only affects custom modes (builds with --enable-custom-modes) with frame
sizes 2.5, 5, 10, and 20ms and sampling rates below 40 kHz. The problem does
not affect normal use of Opus (using OpusEncoder/OpusDecoder) even when built
with custom modes enabled, but only special applications that use
OpusCustomEncoder/OpusCustomDecoder.
2023-09-26 13:57:04 -04:00
Jan Buethe
41a4c9515d
changed checkpoint format 2023-09-26 12:12:47 -04:00
Jean-Marc Valin
733a095ba2
Adapting to new data format/model 2023-09-26 12:12:47 -04:00
Krishna Subramani
f38b4a317f
Python code for neural pitch 2023-09-26 12:12:47 -04:00
Jean-Marc Valin
d88dd89358
Add noise augmentation to pitch dumping 2023-09-26 12:12:46 -04:00
Jean-Marc Valin
0100cd95de
Add pitch feature computation 2023-09-26 12:12:46 -04:00
Jean-Marc Valin
96d89e99d8
Band-limiting the voicing parameter 2023-09-26 12:12:46 -04:00
Jean-Marc Valin
c4b83ae62d
RDOVAE model update 2023-09-26 12:11:25 -04:00
Jean-Marc Valin
574c766c0c
Infinite loops are bad 2023-09-23 17:48:12 -04:00
Jan Buethe
00580a63aa
bugfix 2023-09-22 11:39:22 +02:00
Jan Buethe
aad74fdfd6
pulse-related bugfix 2023-09-22 11:18:41 +02:00
Jan Buethe
0a92bc5eaa
more lavoce stuff 2023-09-21 15:01:11 +02:00
Jean-Marc Valin
52c15629ee
Handle the case where the initial state didn't fit 2023-09-20 18:04:08 -04:00
Jean-Marc Valin
71da9781eb
updated model 2023-09-20 18:04:08 -04:00
Jean-Marc Valin
b88644b9c7
Quantizing initial state with rdovae too
More efficient than PVQ
2023-09-20 18:04:08 -04:00
Jan Buethe
2ec31cc5cc
added FARGAN adversarial training script 2023-09-20 19:06:10 +02:00
Jan Buethe
82f48d368b
removed trailing whitespace in fargan
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-13 16:57:28 +02:00
Jan Buethe
e7beaec3fb
integrated JM's FFT ada conv
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-13 16:31:29 +02:00
Jean-Marc Valin
b24c7b433a
Remove --has-gain and --passthrough-size args
Couldn't be used anymore
2023-09-12 22:58:34 -04:00
Jean-Marc Valin
2f8b36d691
Add conditioning interpolation, fwconv layer 2023-09-12 22:50:48 -04:00
Jean-Marc Valin
72c5ea4129
Only use one frame of pre-loading 2023-09-12 22:50:48 -04:00
Jean-Marc Valin
108b75c4b1
Randomly double the training sequence length
Helps with stability with little cost in training speed
2023-09-12 22:50:48 -04:00
Jean-Marc Valin
d54b9fb49a
Adds skip connections 2023-09-12 22:50:47 -04:00
Jean-Marc Valin
fb570ed8bb
5-tap pitch predictor 2023-09-12 22:50:47 -04:00
Jean-Marc Valin
2e0c1ad3ae
Also use previous frame 2023-09-12 22:50:47 -04:00
Jean-Marc Valin
4f63743f8f
explicit signal gain, explicit pitch predictor 2023-09-12 22:50:46 -04:00
Jean-Marc Valin
1b13f6313e
FARGAN initial commit in Opus
Copied/adapted from LPCNet repo
2023-09-12 22:50:46 -04:00
Jan Buethe
4f4b624209
fix
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-12 16:28:58 +02:00
Jan Buethe
5467539c15
added requirements.txt to osce
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-12 16:22:49 +02:00
Jan Buethe
2f290d32ed
added more enhancement stuff
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-12 14:50:24 +02:00
Jan Buethe
7b8ba143f1
added copyright headers
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-05 22:31:19 +02:00
Jan Buethe
35ee397e06
added LPCNet torch implementation
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-05 12:29:38 +02:00
Jan Buethe
90a171c1c2
brought NoLACE up to date
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-05 12:14:17 +02:00
Michael Klingbeil
d431c321f1
Fixes vnni macro redefinition with clang 2023-09-01 23:18:21 -04:00
Jan Buethe
4a47b1a15b
renamed ShapeNet to NoLACE
Signed-off-by: Jan Buethe <jbuethe@amazon.de>
2023-09-01 16:54:49 +02:00
Jean-Marc Valin
1fbdec8755
Changing DRED exp. ID so we can reserve 127
There was a suggestion to use 127 for extending the extension ID space
2023-08-09 18:28:02 -04:00
Jean-Marc Valin
bbe4dcc443
Don't allocate DRED bits if we don't have enough 2023-08-09 18:25:52 -04:00
Jean-Marc Valin
0886828eed
Making it easier to remove DRED experimental ID
When ready, change DRED_EXTENSION_ID to the final ID, remove
DRED_EXPERIMENTAL_VERSION completely, and change DRED_EXPERIMENTAL_BYTES
to zero (eventually remove it).
2023-08-09 18:25:52 -04:00
Jan Buethe
5160d7fdfa
improved auto-scaling in wexchange 2023-08-08 10:46:11 +02:00
Jean-Marc Valin
6cba42f999
Add fwgan_load_model() 2023-08-04 16:16:58 -04:00
Jean-Marc Valin
022f2b7ebc
int version for convenience 2023-08-04 14:34:09 -04:00
Jean-Marc Valin
f7ee713742
Adding some comments 2023-08-04 01:32:15 -04:00
Jean-Marc Valin
bd23d9115b
Continuation now working
Added required filtering/delay/memory to get continuation
2023-08-04 01:20:21 -04:00
Jean-Marc Valin
6a184fc764
More scaling/filtering to run_fwgan_subframe() 2023-08-03 16:42:59 -04:00
Jean-Marc Valin
b1a601fb0c
More FWGAN refactoring 2023-08-03 16:14:04 -04:00
Jean-Marc Valin
5179896be9
FWGAN refactoring 2023-08-03 15:13:44 -04:00
Jean-Marc Valin
b0e1a2eb95
Applying continuation after first subframe
Continuation matches Python code now
2023-08-03 02:06:15 -04:00
Jean-Marc Valin
3eac8c12e4
Avoid sin()/cos() calls for pitch reference 2023-08-01 23:48:34 -04:00
Jean-Marc Valin
83e95a5ce6
Don't compute linear activation in-place
saves a few cycles
2023-08-01 21:26:16 -04:00
Jean-Marc Valin
bf5eb5bf8d
Add FWGAN to lpcnet_demo 2023-08-01 19:19:14 -04:00
Jean-Marc Valin
e62fd5c5c9
C implementation of FWGAN 2023-08-01 19:19:13 -04:00
Jean-Marc Valin
155367d280
Skeleton for FWGAN code 2023-08-01 19:16:27 -04:00
Jean-Marc Valin
e9f8402a71
Handle float matrices with multiple of 8 rows 2023-08-01 19:16:27 -04:00
Jean-Marc Valin
5eaa4a504f
Add Gated Linear Unit (GLU) 2023-08-01 17:52:49 -04:00
Jan Buethe
5e04540573
updated dump_model_weights.py 2023-08-01 21:58:08 +02:00
Jan Buethe
902d763622
added FWGAN weight dumping code 2023-08-01 18:18:28 +02:00
Jan Buethe
9691440a5f
updadet wexchange version number 2023-08-01 10:42:55 +02:00
Jan Buethe
e916cf426d
added .copy() to weights in wexchange 2023-08-01 10:35:29 +02:00
Jan Buethe
1fbc5fdd4e
added auto-scaling to wexchange 2023-08-01 08:28:25 +02:00
Jan Buethe
aca390df18
fixed wexchange for GRUs without bias 2023-07-28 17:20:23 -07:00
Jean-Marc Valin
b50ddccf0e
Fixes weights parsing with DISABLE_DEBUG_FLOAT
It's the name that's NULL when an array isn't found
2023-07-28 18:29:33 -04:00
Jean-Marc Valin
8cc769ea3a
Switch RDO-VAE decoder to LinearLayer 2023-07-28 02:21:12 -04:00
Jean-Marc Valin
ad057305f7
Make RDOVAE encoder use LinearLayer directly 2023-07-27 20:16:02 -04:00
Jan Buethe
eb72d29a15
Support for dumping LinearLayer in weight-exchange 2023-07-27 19:55:17 -04:00
Jean-Marc Valin
b075eb535a
oops, fix linear_init() 2023-07-27 19:54:10 -04:00
Jean-Marc Valin
b1f94b1e92
Add compute_generic_dense()
And missing prototypes
2023-07-27 19:54:10 -04:00
Jean-Marc Valin
60d67b1112
New compute_generic_conv1d() 2023-07-27 19:54:10 -04:00
Jean-Marc Valin
4171532c80
Add int8 type 2023-07-27 19:54:09 -04:00
Jan Buethe
101fd2411a
added dataset for SILK to LPCNet feature conversion 2023-07-24 14:06:07 -07:00
Jean-Marc Valin
8e7080903d
Make float_weights optional 2023-07-23 18:25:14 -04:00
Jean-Marc Valin
d15be43af4
Make bias/subias/diag/scale optional 2023-07-23 18:11:15 -04:00
Jean-Marc Valin
9d40e5cb08
Add loading for LinearLayer
Untested
2023-07-23 14:49:13 -04:00
Jan Buethe
587c1020fe
clean-up 2023-07-22 15:16:23 -07:00
Jan Buethe
7487168d52
added copyright headers 2023-07-22 14:55:41 -07:00
Jan Buethe
4f3761b019
added verbose option to run_test.py 2023-07-22 14:47:49 -07:00
Jan Buethe
0763a8f785
replaces multiprocessing module by multiprocess module in testsuite 2023-07-22 14:45:43 -07:00
Jan Buethe
81fe5f0261
added matplotlib to testsuite requirements (needed by librosa) 2023-07-22 14:13:08 -07:00
Jan Buethe
f9aee675dc
added ShapeNet and ShapeUp48 models 2023-07-22 13:31:22 -07:00
Jan Buethe
57ab4949a8
removed whitespace 2023-07-22 13:19:37 -07:00
Jan Buethe
ba44bac435
added testsuite 2023-07-22 13:10:54 -07:00
Jan Buethe
0e5c103d1a
added weight-exchange library 2023-07-22 13:01:06 -07:00
Jean-Marc Valin
8f7c72a662
Always define USE_SU_BIAS in vec_avx.h 2023-07-22 14:56:05 -04:00
Jean-Marc Valin
4710bdf712
Add SSE2 support
Not so much for old machines, as for getting decent performance
when not setting -march= (SSE2 is part of the amd64 ABI).
2023-07-22 14:56:05 -04:00
Jean-Marc Valin
9261eb5c37
Refactoring to make VNNI and SSE2 easier 2023-07-22 14:56:04 -04:00
Jan Buethe
be5f58d679
neon related alignment requirement warning fix 2023-07-22 11:11:49 -07:00
Jean-Marc Valin
cfc118d52e
Merge branch 'dred_bitrate4' into opus-ng 2023-07-20 01:42:07 -04:00
Jean-Marc Valin
62cd1c963b
Transition to LinearLayer and remove unused code 2023-07-20 01:01:34 -04:00
Jean-Marc Valin
f5a68a41b0
Add generic linear layer
Should be able to handle all previous GRU variants and more.
2023-07-20 01:01:32 -04:00
Jean-Marc Valin
8423ef1de2
Remove unused code 2023-07-20 01:01:29 -04:00
Jean-Marc Valin
4600e77583
Implement allocation of the DRED bits
Should work for both CBR and VBR. In the CBR case, we can make
CELT VBR and use DRED to fill the rest.
2023-07-12 17:06:59 -04:00
Jean-Marc Valin
9fdc489c35
Fix a CELT encoder CBR corner case
If configuring CELT for CBR but controlling the bitrate with
OPUS_SET_BITRATE rather than nbCompressedBytes, then the range
coder buffer would never get resized. AFAICT this could never
be triggered in Opus because CBR was also controlled by
nbCompressedBytes.
2023-07-12 17:06:59 -04:00
Jean-Marc Valin
1736ae3f5e
Properly account for SILK bits in CELT CBR code
CELT encoding would just fail when setting CELT to CBR in hybrid mode.
It was never a problem because hybrid CBR was always used with
OPUS_BITRATE_MAX.
2023-07-12 17:06:59 -04:00
Jean-Marc Valin
aa8b99cbc3
Refactoring: simplifying CELT encoder settings
Avoids interactions with redundancy settings
2023-07-12 17:06:58 -04:00
Jean-Marc Valin
650e290103
Reserve some bits for DRED
SET_BITRATE is now the total bitrate again
2023-07-12 17:06:58 -04:00
Jean-Marc Valin
363275b5b3
Fix DRED failure
We weren't reserving enough bytes for the DRED extension
2023-07-12 17:06:58 -04:00
Jean-Marc Valin
ec249d25f9
Make hybrid CBR use VBR SILK 2023-07-12 17:06:57 -04:00
Jean-Marc Valin
fe84c3bcee
Make "VBR with cap" less aggressive
The bits we don't use won't be wasted, so it's less important to
get exactly the optimal number of bits below the cap.
2023-07-12 17:06:57 -04:00
Jean-Marc Valin
eb1759736f
Some general SILK CBR tuning
The gain*2 when overshooting was too aggressive and the undershoot
case wasn't aggressive enough. This now seems to work reasonably well.
2023-07-12 17:06:47 -04:00
Jean-Marc Valin
bbb4cf554c
Fix DRED/neural PLC for SILK stereo
Don't attempt to run the neural PLC on the side channel since
we only have one state.
2023-07-12 03:13:58 -04:00
Jean-Marc Valin
3510404ad5
Properly compute and use the DRED offset field
Also, don't code DRED that's redundant with the main packet
2023-07-03 02:15:40 -04:00
Michael Klingbeil
17bb81934b
add undefs in mathops.h and remove OPUS_INLINE in vec_neon.h 2023-06-30 22:54:09 -04:00
Jan Buethe
105e1d83fa Opus ng lace 2023-06-30 21:15:56 +00:00
Jean-Marc Valin
178672ed18
Silencing some warnings 2023-06-28 17:16:53 -04:00
Jean-Marc Valin
2824bd1f66
Adjusting offsets to synthesize 10 ms at a time
Should make synthesis easier in the future
2023-06-28 17:01:17 -04:00
Jean-Marc Valin
f9f35904f4
No longer need to #include "common.h" 2023-06-27 17:13:06 -04:00
Jean-Marc Valin
f94bd54302
Handle the sign of the DRED offset 2023-06-27 16:43:39 -04:00
Marcus Asteborg
115edd9c06
Move build and tests for x86 to avx2 machines. 2023-06-27 13:32:55 -07:00
Jean-Marc Valin
9f4fc8bbfa
Replacing RNN_ macros with existing OPUS_ ones 2023-06-23 00:02:12 -04:00
Jean-Marc Valin
5af9e9524a
no longer need that hack 2023-06-22 18:12:29 -04:00
Jean-Marc Valin
b64a89feca
Using opus_int16 instead of short in LPCNet code 2023-06-22 18:07:26 -04:00
Jean-Marc Valin
abe817c3fc
Remove pcount that's no longer useful
We're back to processing 10 ms at a time and have no need for 40-ms
"superframes".
2023-06-22 18:07:25 -04:00
Marcus Asteborg
f36685fc97
Remove trailing whitespace in dnn 2023-06-22 13:58:37 -07:00
Marcus Asteborg
26ab10d0c8
Remove submodule init 2023-06-22 13:58:33 -07:00
Jean-Marc Valin
f12371bcee
Remove the duplicated pitch.h
Add arch to the LPCNet states, but right now it's always set to zero
2023-06-22 15:41:28 -04:00
Jean-Marc Valin
bde43ed7cf
misc fixes 2023-06-22 13:55:04 -04:00
Jean-Marc Valin
41700b9ac6
oops, forgot to update that 2023-06-22 13:05:59 -04:00
Jean-Marc Valin
abf60c33f7
remove LPCNET_EXPORT 2023-06-22 03:41:30 -04:00
Jean-Marc Valin
3c9ada30ef
Move LPCNet headers 2023-06-22 03:38:24 -04:00
Jean-Marc Valin
42a0972a11
Remove more useless code 2023-06-22 03:31:33 -04:00
Jean-Marc Valin
db48088e9f
Should fix some missing #include problems 2023-06-22 03:22:20 -04:00
Jean-Marc Valin
254b5ee7b2
Remove more LPCNet useless files (e.g. build) 2023-06-22 01:54:26 -04:00
Jean-Marc Valin
ab6f93ad30
Remove LPCNet's copy of arch.h 2023-06-22 01:23:02 -04:00
Jean-Marc Valin
bf0eaada0a
Remove useless LPCNet files 2023-06-22 01:14:08 -04:00
Jean-Marc Valin
07bb3f01b4
cleanup: get rid of non-causal PLC and DC handling 2023-06-22 00:58:25 -04:00
Jean-Marc Valin
247e6a587c
Remove support for LPCNet quantization 2023-06-22 00:58:24 -04:00
Jean-Marc Valin
bfa01f1a1c
Update build for LPCNet merge 2023-06-21 17:33:53 -04:00
Jean-Marc Valin
7458c1bdc6
Merge LPCNet repo into Opus
Repo was filtered to move everything to the dnn/ directory
2023-06-21 13:08:08 -04:00
Marcus Asteborg
7dae7b178c
Add missing include paths for lpcnet for test binaries 2023-06-21 05:59:39 -07:00
Marcus Asteborg
10fd05d999
Retire Visual Studio solutiom, use CMake or Meson as replacement 2023-06-21 05:59:38 -07:00
Marcus Asteborg
a8de4d8f91
Remove Appveyor CI, replaced with Github actions 2023-06-21 05:59:38 -07:00
Marcus Asteborg
763bd34549
Remove Travis CI, replaced with Github actions 2023-06-21 05:59:38 -07:00
Marcus Asteborg
ca62cca926
Add note about other build systems in README 2023-06-21 05:59:37 -07:00
Marcus Asteborg
62fcd556c8
Repo CI for Github actions 2023-06-21 05:59:37 -07:00
Marcus Asteborg
8760490d64
Autotools CI for Github actions 2023-06-21 05:59:37 -07:00
Marcus Asteborg
acb67a873f
CMake CI for Github actions 2023-06-21 05:59:37 -07:00
Marcus Asteborg
f730f47a11
Rename DRED CI 2023-06-21 05:59:33 -07:00
Jean-Marc Valin
0f6b202312
Fix segfault 2023-06-20 14:08:24 -04:00
Jean-Marc Valin
cbb9f535c2
Add support the DRED in CELT 2023-06-20 03:54:58 -04:00
Jean-Marc Valin
3ea6a6dc60
Move PLC prefilter compensation
Makes it possible to run the pre-emphasis with the correct period/coeffs
2023-06-20 03:36:22 -04:00
Jean-Marc Valin
a08c5cef55
Always use at least one DRED feature vector 2023-06-20 01:02:35 -04:00
Jean-Marc Valin
152d57de5c
Rename LPC_ORDER to CELT_LPC_ORDER to avoid clash 2023-06-19 15:55:39 -04:00
Jean-Marc Valin
dd3ec4fab3
No longer need to force custom modes 2023-06-19 14:01:38 -04:00
Marcus Asteborg
44ed6bc3f7
Add missing depedency wget 2023-06-18 18:56:25 -07:00
Marcus Asteborg
7e78f34d96
Add Meson build documentation 2023-06-18 18:56:25 -07:00
Marcus Asteborg
07f201a973
Add dependencies for model download in Gitlab CI 2023-06-16 20:13:35 -07:00
Jean-Marc Valin
93d0ef5b2e
Making DRED encoder run for CELT too 2023-06-16 13:49:15 -04:00
Marcus Asteborg
3b147202ee
Github actions for neural fec 2023-06-16 13:02:30 -04:00
Marcus Asteborg
d819cde563
Autogen.bat for windows to download models 2023-06-16 13:02:29 -04:00
Marcus Asteborg
84484a6790
CMake - change avx to avx2 and fma 2023-06-16 13:02:29 -04:00
Jean-Marc Valin
e9dc5d1793
Make AVX2 test actually include AVX2 and FMA 2023-06-16 13:02:28 -04:00
Jean-Marc Valin
87427377cd
fix AVX2 compile option 2023-06-16 13:02:28 -04:00
Jean-Marc Valin
124be9ea52
lpcnet update (optimizations) 2023-06-16 13:02:28 -04:00
Jean-Marc Valin
9a2c0e34ca
Detect AVX/AVX2/FMA instead of just AVX 2023-06-16 13:02:27 -04:00
Jean-Marc Valin
31a8028e97
AVX version of celt_pitch_xcorr()
Not used by anything yet
2023-06-16 13:02:27 -04:00
Jean-Marc Valin
02f352c75e
Fix non-DRED build 2023-06-16 13:02:27 -04:00
Jean-Marc Valin
3e6a736cbb
LPCNet update (symbols) 2023-06-16 13:02:26 -04:00
Jean-Marc Valin
27b1286917
Remove LPCNet VQ codebooks from build 2023-06-16 13:02:26 -04:00
Jean-Marc Valin
a8cb719d05
Add blob loading for DRED encoder and decoder 2023-06-16 13:02:26 -04:00
Jean-Marc Valin
0dad5e06ab
Add resampling/downmix support to DRED encoder
8k, 12k and stereo are mostly untested
2023-06-16 13:02:25 -04:00
Jean-Marc Valin
8b42b00647
Fix DRED for 10-ms frames 2023-06-16 13:02:25 -04:00
Jean-Marc Valin
28503d92e8
Fix units for dred_offset in API 2023-06-16 13:02:25 -04:00
Jean-Marc Valin
0f70854396
Handle DRED frames != 20 ms 2023-06-16 13:02:24 -04:00
Jean-Marc Valin
5b547e0d2e
Pull DRED encoding up to Opus layer 2023-06-16 13:02:24 -04:00
Jean-Marc Valin
114c8aa251
Update RDOVAE model to blob format 2023-06-16 13:02:24 -04:00
Marcus Asteborg
7db2f0c6cf
add neural fec to cmake in gitlab ci
Signed-off-by: Marcus Asteborg <maastebo@microsoft.com>
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-06-16 13:02:23 -04:00
Marcus Asteborg
75872236da
add option for neural fec and add dred test
Signed-off-by: Marcus Asteborg <maastebo@microsoft.com>
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-06-16 13:02:23 -04:00
Marcus Asteborg
4d8b52b804
change comment to dred
Signed-off-by: Marcus Asteborg <maastebo@microsoft.com>
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-06-16 13:02:23 -04:00
Jean-Marc Valin
6fb930956f
Add blob loading support to decoder 2023-06-16 13:02:22 -04:00
Jean-Marc Valin
d43eb241e3
Reset PLC instead of reinitializing 2023-06-16 13:02:22 -04:00
Jean-Marc Valin
e57dfb824c
Move LPCNet PLC state to top-level decoder
So it can be used outside of the SILK PLC
2023-06-16 13:02:22 -04:00
Jean-Marc Valin
ef4f459ec3
Cleanup (no change in behaviour) 2023-06-16 13:02:21 -04:00
Marcus Asteborg
40b944b0ba
Actually fix timeout for extension test
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2023-06-16 13:02:21 -04:00
xnorpx
284471a6da
increase timeout for extension test
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2023-06-16 13:02:21 -04:00
Jean-Marc Valin
c46d2d2762
LPCNet update 2023-06-16 13:02:20 -04:00
Jean-Marc Valin
38d226383a
Fix LPCNet warnings 2023-06-16 13:02:20 -04:00
Jean-Marc Valin
e52d240e88
Add blob writing 2023-06-16 13:02:20 -04:00
Jean-Marc Valin
9c137d4933
Add offset and quantization to DRED bitstream
Brings the DRED implementation in line with the -00 draft
2023-06-16 13:02:19 -04:00
Jean-Marc Valin
4fbf6dcaf4
Adding DRED garbage decoding test 2023-06-16 13:02:19 -04:00
Jean-Marc Valin
5eefa61adc
Making the extension test a bit shorter 2023-06-16 13:02:19 -04:00
Jean-Marc Valin
b2faa89507
Test parsing of random extensions 2023-06-16 13:02:18 -04:00
Jean-Marc Valin
e4ca78928c
LPCNet update 2023-06-16 13:02:18 -04:00
Michael Klingbeil
fccd7272b9
fix bug in extensions test
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-06-16 13:02:18 -04:00
Jean-Marc Valin
b1394c5811
Add missing include guards and config.h 2023-06-16 13:02:17 -04:00
Jean-Marc Valin
8dc345fe59
Fix whitespace errors 2023-06-16 13:02:17 -04:00
Jean-Marc Valin
9ec1683f32
A non-bit-exact C90 fix 2023-06-16 13:02:17 -04:00
Jean-Marc Valin
47b78667a1
C90 fixes 2023-06-16 13:02:16 -04:00
Jean-Marc Valin
24d05a1b92
Misc LPCNet windows fixes 2023-06-16 13:02:16 -04:00
Jean-Marc Valin
c64b321e7a
DRED API update
output() renamed to decode(), dred objects using alloc() and free(),
OpusDRED now passed as cost for decoding.
2023-06-16 13:02:16 -04:00
Tim-Philipp Müller
39f68ce356
meson: fix build for lpcnet additions 2023-06-16 13:02:15 -04:00
Jean-Marc Valin
82b945ea38
update to PLC blob 2023-06-16 13:01:42 -04:00
Jean-Marc Valin
8c7de54ed1
Oops, update the model link 2023-06-16 13:01:41 -04:00
Jean-Marc Valin
fa86a030e7
Add blob validation (model update required) 2023-06-16 13:01:41 -04:00
Jean-Marc Valin
5b2869ee9d
Fix copy-paste error 2023-06-16 13:01:41 -04:00
Jean-Marc Valin
d8290d3fc2
Deferred updates in PLC 2023-06-16 13:01:40 -04:00
Jean-Marc Valin
26d3a3593e
Oops, fixed build with DRED disabled 2023-06-16 13:01:40 -04:00
Jean-Marc Valin
d7ce091b1c
Don't run the DRED encoder unless DRED is enabled 2023-06-16 13:01:40 -04:00
Jean-Marc Valin
e0c6eae8cc
Update the API to add an OpusDREDDecoder 2023-06-16 13:01:39 -04:00
Jean-Marc Valin
a561f120c9
Fixing model download 2023-06-16 13:01:39 -04:00
Jean-Marc Valin
cb7f7056de
Update LPCNet for blob 2023-06-16 13:01:39 -04:00
Jean-Marc Valin
0fc69e5feb
Remove assert in opus_packet_extensions_generate()
Makes it possible to do a "dry run" generation for in-place padding
2023-06-16 13:01:38 -04:00
Jean-Marc Valin
90d174f5f5
Fix test_opus_extensions.c
can't link woth libopus since it's already #including C files
2023-06-16 13:01:38 -04:00
Michael Klingbeil
4b9c620a8d
Fixup extensions implementation and add unit tests
Add unit tests for opus_packet_extensions_count, opus_packet_extensions_parse,
and opus_packet_extensions_generate. Add various assertions for NULL buffers or
negative length arguments. Add explicit check for invalid length in
opus_packet_extensions_count. Check for extension id >127 in
opus_packet_extensions_generate. Check for invalid length input in
opus_packet_extensions_generate.

Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-06-16 13:01:38 -04:00
Jean-Marc Valin
df2f98f809
Add LPCNet headers to make dist 2023-06-16 13:01:37 -04:00
Jean-Marc Valin
3c74ef1d73
Increase state size limit for API test 2023-06-16 13:01:37 -04:00
Jean-Marc Valin
bb225aa2a8
Bring in LPCNet constant tables 2023-06-16 13:01:37 -04:00
Jean-Marc Valin
aedab6a538
Avoid warnings when not compiling with DRED 2023-06-16 13:01:36 -04:00
Jean-Marc Valin
3d56939193
Fix dred object leak in opus_demo.c 2023-06-16 13:01:36 -04:00
Jean-Marc Valin
c8a3659265
No longer include pointers in the SILK structs
Everything can now be shallow-copied again
2023-06-16 13:01:35 -04:00
Jean-Marc Valin
71d5edcffb
Directly include LPCNet state in SILK structs
Makes shallow copy of decoder possible again
2023-06-16 13:01:35 -04:00
Jean-Marc Valin
112b160a28
Rename Opus DNN functions
Avoids conflict with LPCNet, so that LPCNet headers can be included in
Opus
2023-06-16 13:01:35 -04:00
Jean-Marc Valin
322e5bb615
Update LPCNet 2023-06-16 13:01:34 -04:00
Jean-Marc Valin
b14cbcc71f
More cleanup 2023-06-16 13:01:34 -04:00
Jean-Marc Valin
4402b00fd3
Add API doc, change DRED offsets to be samples 2023-06-16 13:01:34 -04:00
Jean-Marc Valin
75f0dbcad0
Cleanup, remove "old" DRED API 2023-06-16 13:01:33 -04:00
Jean-Marc Valin
5e2440fafc
Complete switching to DRED object 2023-06-16 13:01:33 -04:00
Jean-Marc Valin
ebfa29c7e1
Remove opus_dred_init() 2023-06-16 13:01:33 -04:00
Jean-Marc Valin
1312642f08
More DRED refactoring
progressive decode, avoid storing DRED decoder state
2023-06-16 13:01:32 -04:00
Jean-Marc Valin
906ee4b236
DRED refactoring/renaming 2023-06-16 13:01:32 -04:00
Jean-Marc Valin
34a4ba0d4f
LPCNet update 2023-06-16 13:01:32 -04:00
Jean-Marc Valin
e4f0734374
merge LPCNet fix 2023-06-16 13:01:31 -04:00
Jean-Marc Valin
7d4aeb756f
Fix DRED PVQ for case when K=0 on a split 2023-06-16 13:01:31 -04:00
Jean-Marc Valin
d7e1bd507f
Properly check to see if there's room for DRED 2023-06-16 13:01:30 -04:00
Jean-Marc Valin
b446e96ad1
Fix extensions padding for CBR 2023-06-16 13:01:30 -04:00
Jean-Marc Valin
41bf2a0ac6
Fixes corruption when using extensions
Now generating the extension in place once all the data is already
in the right place.
2023-06-16 13:01:30 -04:00
Jean-Marc Valin
cd5b268879
DRED versioning in bitstream
Adding a 'D' byte to signal the DRED experiment, along with a version
number byte. This entire commit will be reverted once DRED is finalized
and given a non-experimental extension number.
2023-06-16 13:01:29 -04:00
Jean-Marc Valin
0f0ee98386
LPCNet update 2023-06-16 13:01:29 -04:00
Jean-Marc Valin
2c74526b65
Fix encoder infinite loop when decay is too small 2023-06-16 13:01:29 -04:00
Jean-Marc Valin
05942e77a2
Fix padding overwriting the packet content 2023-06-16 13:01:28 -04:00
Jean-Marc Valin
f1db6b9d75
Only decode the DRED frames we need 2023-06-16 13:01:28 -04:00
Jean-Marc Valin
094eaf8bf8
Reenable dot product instructions 2023-06-16 13:01:28 -04:00
Jean-Marc Valin
26c0becdd9
Fixes build when ENABLE_NEURAL_FEC is off 2023-06-16 13:01:27 -04:00
Jean-Marc Valin
ee44943c95
Making sure we don't bust the DRED buffer 2023-06-16 13:01:27 -04:00
Jean-Marc Valin
d0e7a7af0c
Don't encode empty DRED packets 2023-06-16 13:01:27 -04:00
Jean-Marc Valin
8bbfb2ec47
Controlling DRED on the encode side 2023-06-16 13:01:26 -04:00
Jean-Marc Valin
6b12431488
Splitting up DRED encoding 2023-06-16 13:01:26 -04:00
Jean-Marc Valin
579c9d6b0e
Clear FEC buffer on new dred packet
Fixes "FEC buffer full" issue
2023-06-16 13:01:26 -04:00
Jean-Marc Valin
68eea61cb0
Fix DRED segfault
Properly re-initialize DRED (only) when needed
2023-06-16 13:01:25 -04:00
Jean-Marc Valin
72c93e0523
reenable DRED 2023-06-16 13:01:25 -04:00
Jean-Marc Valin
2d98cedd0e
Should handle mixes of PLC and DRED 2023-06-16 13:01:25 -04:00
Jean-Marc Valin
37a4e2e3d0
Fix the normal PLC case 2023-06-16 13:01:24 -04:00
Jean-Marc Valin
9a3f87391a
Adds -lossfile option to opus_demo 2023-06-16 13:01:24 -04:00
Jean-Marc Valin
54ea26241c
DRED: First version that (kinda) works
Probably still has many bugs
2023-06-16 13:01:24 -04:00
Jean-Marc Valin
885d7038d6
Make FEC handling in opus_demo more general
Now only running the decoder on "received" packets
2023-06-16 13:01:23 -04:00
Jean-Marc Valin
d1b48b716a
DRED: better naming 2023-06-16 13:01:23 -04:00
jbuethe
2d25ea19d5
updated lpcnet: qframes decoded by dred_decoder are now in reverse order 2023-06-16 13:01:22 -04:00
jbuethe
078166bbaa
removed debug code 2023-06-16 13:01:22 -04:00
Jean-Marc Valin
c4cb071f75
DRED: Decode variable number of frames 2023-06-16 13:01:22 -04:00
Jean-Marc Valin
634defacdc
DRED cleanup, support for variable number of frames 2023-06-16 13:01:21 -04:00
Jean-Marc Valin
8623012b30
DRED integration work in progress 2023-06-16 13:01:21 -04:00
Jean-Marc Valin
0edb3954e8
Minor cleanup 2023-06-16 13:01:21 -04:00
Jean-Marc Valin
6888403bde
DRED: Fix infinite loop on "impossible" symbols 2023-06-16 13:01:20 -04:00
Jean-Marc Valin
e12a7a445b
Add missing #include "config.h" 2023-06-16 13:01:20 -04:00
Jean-Marc Valin
5ad2aebd90
Code for inserting/extracting DRED in/from packets 2023-06-16 13:01:20 -04:00
Jean-Marc Valin
3dc9c6eda6
Experimentng with padding extensions 2023-06-16 13:01:19 -04:00
Jan Buethe
20be7cd2a8
added missing header file 2023-06-16 13:01:19 -04:00
Jan Buethe
c6f2411f91
lpcnet update 2023-06-16 13:01:19 -04:00
Jan Buethe
034a9c3c4e
clean-up 2023-06-16 13:01:18 -04:00
Jan Buethe
0ba6458ba1
added --enable-neural-fec option to configure 2023-06-16 13:01:18 -04:00
Jan Buethe
e04826d422
implemented DRED packet decoder 2023-06-16 13:01:18 -04:00
Jan Buethe
2767ac303a
changed data types for r, dead_zone, quant_scale and p0 to opus_uint16 2023-06-16 13:01:17 -04:00
Jan Buethe
5a1a676a28
added copyright headers to new files 2023-06-16 13:01:17 -04:00
Jan Buethe
972c02c5eb
reduced buffer size for neural FEC packet encoding 2023-06-16 13:01:17 -04:00
Jan Buethe
8c3b4de74f
lpcnet updates 2023-06-16 13:01:16 -04:00
Jan Buethe
a2a4662f44
addressed compiler warnings 2023-06-16 13:01:16 -04:00
Jan Buethe
b7f2a3439c
fixed debug code 2023-06-16 13:01:16 -04:00
Jan Buethe
5ad80f37eb
updated lpcnet repo 2023-06-16 13:01:15 -04:00
Jan Buethe
ba1a3ecb1d
new lpcnet version 2023-06-16 13:01:15 -04:00
Jan Buethe
a113011b9e
added some prototypes to header file 2023-06-16 13:01:15 -04:00
Jan Buethe
423c673a3f
updated source file list 2023-06-16 13:01:14 -04:00
Jan Buethe
fea85b89f5
finished encoder implementation 2023-06-16 13:01:14 -04:00
Jan Buethe
2df55d3583
added dred encoder to silk encoder 2023-06-16 13:01:14 -04:00
Jean-Marc Valin
59dce643b1
More general Laplace encoder 2023-06-16 13:01:13 -04:00
Jean-Marc Valin
4414db08f9
Update build instructions 2023-06-16 13:01:13 -04:00
Jean-Marc Valin
a674f84a7c
enable neural PLC by default 2023-06-16 13:01:13 -04:00
Jean-Marc Valin
14539a7ef2
Add LPCNet submodule 2023-06-16 13:01:12 -04:00
Jean-Marc Valin
c943d4b145
update 2023-06-16 13:01:12 -04:00
Jean-Marc Valin
4086a691af
Add new PLC files 2023-06-16 13:01:12 -04:00
Jean-Marc Valin
d0f6df5949
WIP: Using LPCNet for PLC 2023-06-16 13:01:06 -04:00
Jean-Marc Valin
3bd86da9a9 Make the pitch search less inefficient 2023-06-14 17:45:57 -04:00
Jean-Marc Valin
5282f0260b Avoiding potential for clashing symbols
also leading underscore in _lpcnet_lpc() wasn't a good idea
2023-06-12 18:57:20 -04:00
Jean-Marc Valin
47bcd4a7f5 Don't use the VQ codebooks when building Opus 2023-06-06 23:01:00 -04:00
Jean-Marc Valin
d749351ae5 Add lpcnet_compute_single_frame_features_float() 2023-06-05 14:13:37 -04:00
Jean-Marc Valin
8e2b539338 Model update 2023-06-01 23:35:51 -04:00
Jean-Marc Valin
f867f61e8b Convert RDOVAE to blob format 2023-06-01 23:33:38 -04:00
Jean-Marc Valin
ba5dde539a Add reset functions that don't clear the model 2023-06-01 04:09:57 -04:00
Jean-Marc Valin
ec6e42ba3d misc fixes 2023-05-31 15:58:11 -04:00
Jean-Marc Valin
c708e68bc6 another missing #include 2023-05-29 20:16:47 -04:00
Jean-Marc Valin
887346674f Fix warnings about undeclared memcpy() 2023-05-29 19:13:34 -04:00
Jean-Marc Valin
fa7b432eed Initial blob loading support 2023-05-28 01:57:30 -04:00
Jean-Marc Valin
d98c59fb9a Add missing #include 2023-05-25 17:30:19 -04:00
Jean-Marc Valin
c9b9570970 Add missing include guards 2023-05-24 12:58:05 -04:00
Jean-Marc Valin
c6db01d2a6 Misc fixes 2023-05-24 01:24:12 -04:00
Jean-Marc Valin
93bc94ba79 Merge branch 'exp_rdovae6' 2023-05-24 00:57:20 -04:00
Jean-Marc Valin
886d647bb1 Link to DRED paper 2023-05-24 00:56:11 -04:00
xnorpx
7122abde59 Rename celt_exp to lpcnet_exp
Depending on what defines are set there is collisions with the ones
in Opus. To avoid these errors we rename the exp functions and
macros.

Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-05-24 00:46:20 -04:00
Jean-Marc Valin
85750ce868 Rest of double-to-float patch
That part was not bit-exact
2023-05-24 00:35:49 -04:00
xnorpx
879084f6f0 Fix some of C4244 double to float warnings 2023-05-24 00:30:19 -04:00
xnorpx
919b7a1f58 Add download model script for windows
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-05-23 20:00:43 -04:00
xnorpx
702fffb70a Include math.h to make header self-contained.
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-05-23 11:24:35 -04:00
xnorpx
5b96946277 Use pragma message instead of warning on MSVC
Signed-off-by: Jean-Marc Valin <jmvalin@amazon.com>
2023-05-23 02:31:09 -04:00
Jean-Marc Valin
a9564f37d4 Dump PLC blob next to the LPCNet one 2023-05-23 02:23:01 -04:00
Jean-Marc Valin
ec5cfdbe7a update model 2023-05-22 03:35:37 -04:00
Jean-Marc Valin
98da335009 Convert PLC weights to blob format 2023-05-22 03:32:53 -04:00
Jean-Marc Valin
c7b6935bf2 Add validation for weights blob 2023-05-20 14:23:02 -04:00
Jean-Marc Valin
0098fe70ac Defer calls to run_frame_network() to save CPU
Calls are deferred to the actual loss and we only process the minimum
required.
2023-05-19 18:12:18 -04:00
Jean-Marc Valin
87f9fbc50c Don't preserve owner for model 2023-05-18 20:40:16 -04:00
Jean-Marc Valin
76c090dc25 Update model for blob 2023-05-18 17:34:07 -04:00
Jean-Marc Valin
b9ea868380 Define M_PI when needed 2023-05-18 00:41:14 -04:00
Jean-Marc Valin
3cd7588dae Avoid potential integer wrap-around 2023-05-18 00:04:11 -04:00
Jean-Marc Valin
580614f062 Make the code C90-compliant 2023-05-17 04:09:04 -04:00
Jean-Marc Valin
ebbf5721a2 Use new model struct 2023-05-17 01:08:28 -04:00
Jean-Marc Valin
71c8a23fc1 Code for building a model struct 2023-05-16 23:15:49 -04:00
Jean-Marc Valin
cc714cc5b0 binary weights work in progress 2023-05-16 16:47:12 -04:00
Jean-Marc Valin
1074e5f03b Properly handle constant tables
LPCNet code should now be fully reentrant
2023-05-16 01:07:44 -04:00
Jean-Marc Valin
2fc6c71d09 Avoid opus_alloc() conflicting with Opus 2023-05-15 15:00:56 -04:00
Jean-Marc Valin
0c6c569cab Decode RDO-VAE from stack-allocated state 2023-05-15 02:40:56 -04:00
Michael Klingbeil
a81a620d99 Remove unused variable that was causing warnings 2023-05-10 03:09:57 -04:00
Jan Buethe
ea722db29e added -addlpc option to lpcnet_demo 2023-02-21 20:22:03 +01:00
Jean-Marc Valin
5fbc037fa0 Skip sample rate update on received packets 2023-01-27 18:14:49 -05:00
Jean-Marc Valin
1bfa0e2bd0 update model 2023-01-13 15:50:23 -05:00
jbuethe
aa474553b5 updated torch framework to include quantization 2023-01-13 11:48:04 +00:00
jbuethe
a8673d0e25 gru2 -> gruB and dotp included in dump_rdovae 2023-01-12 14:15:39 +00:00
Jean-Marc Valin
5b9b4381eb Add call to clear FEC buffer 2022-12-20 21:55:28 -05:00
Jean-Marc Valin
70ce2bd71b Cleanup, simplifying the no-crossfade PLC case 2022-12-19 17:11:33 -05:00
Jean-Marc Valin
c0cfcbcfbd update model
New model that can handle absent Burg features
2022-12-19 17:11:33 -05:00
Jean-Marc Valin
e7f2360936 Making it possible to mix DRED with PLC 2022-12-16 12:53:26 -05:00
jbuethe
f178da1100 changed ordering of qframe to reverse in dred_rdovae_decode_qframe 2022-12-07 11:29:52 +00:00
jbuethe
fdb04d0eef added pytorch implementation of RDOVAE 2022-11-23 11:02:29 +00:00
jbuethe
a13aa3a077 fixed use of quant_levels argument in fec_encoder 2022-11-10 15:04:28 +00:00
jbuethe
861f6739a4 added import script for exchange format 2022-11-09 11:41:28 +00:00
jbuethe
ecb5cbcf30 added quant-levels argument to dump_rdovae and rdovae_exchange 2022-11-08 11:35:38 +00:00
jbuethe
9859d68bb0 changed distortion loss weighting back to 0.5, 0.5 2022-11-07 16:14:11 +00:00
jbuethe
0e5a38fac6 removed deprecated lambda from fec_encoder 2022-11-07 16:13:48 +00:00
jbuethe
2607386198 fixed scaling/quantization order 2022-11-04 16:52:04 +00:00
Jan Buethe
a8170986ec updated rdovae_exchange 2022-10-31 15:21:12 +01:00
Jan Buethe
eab9472d0d added script for exporting RDOVAE weights (external dependency not added yet) 2022-10-31 12:49:20 +01:00
Jan Buethe
a223122b89 clean-up 2022-10-26 12:42:34 +00:00
Jan Buethe
585de8e467 changed data types for r, dead_zone, quant_scale and p0 to opus_uint16 2022-10-26 10:15:39 +00:00
Jan Buethe
d58faea390 added copyright headers to new files 2022-10-26 09:48:55 +00:00
Jan Buethe
1f7c39d7c3 added config include to all new C files 2022-10-25 13:28:25 +00:00
Jan Buethe
159da40890 fixed calculation of p0 2022-10-25 12:59:17 +00:00
Jan Buethe
818a0496d5 added re-ordering to dred_rdovae_decode_qframe (frames now in correct order) 2022-10-25 12:15:25 +00:00
Jan Buethe
23bb11dd48 added destroy functions for encoder/decoder states 2022-10-25 12:14:31 +00:00
Jan Buethe
7cac35e699 fixed theta and r values 2022-10-25 12:13:11 +00:00
Jan Buethe
80383b6377 Merge branch 'exp_rdovae6' of https://github.com/xiph/LPCNet into exp_rdovae6 2022-10-24 11:00:50 +00:00
Jan Buethe
79d4f12677 added api functions for retrieving pointers to statistical model parameters 2022-10-24 11:00:02 +00:00
Jan Buethe
0f04488116 fixed type error for statistical model 2022-10-24 10:59:39 +00:00
Jan Buethe
8accadcc85 more fixes 2022-10-21 19:00:30 +00:00
Jan Buethe
d0b1cd886a bugfixes 2022-10-21 16:35:43 +00:00
Jan Buethe
5b49421648 bugfix in dred_rdovae.c 2022-10-21 15:58:58 +00:00
Jan Buethe
f74bff74f6 aded api for DRED/RDOVAE to circumvent conflicts between Opus and LPCNet repo 2022-10-21 15:37:23 +00:00
Jan Buethe
d80f99f78b added void to shut up missing prototype warning 2022-10-21 15:33:41 +00:00
Jan Buethe
e6390e34c7 removed compute_dense function (conflict with opus mlp) 2022-10-21 12:33:34 +00:00
Jan Buethe
1978cc6094 refactoring 2022-10-21 12:13:38 +00:00
Jan Buethe
d1646a680a added NFEC decoder C implementation 2022-10-20 17:27:39 +00:00
Jan Buethe
ea4d8f54c3 added statistical model to dump_nfec_model 2022-10-19 17:18:25 +00:00
Jan Buethe
50966eecc5 bugfixes in nfec encoder 2022-10-19 14:43:12 +00:00
Jan Buethe
2112f3dd76 some fixes 2022-10-19 10:58:24 +02:00
Jan Buethe
c1b357ed47 first attempt of C implementation of fec encoder (not tested yet due to NEON/DOT_PROD not being separable) 2022-10-18 19:30:23 +02:00
Jean-Marc Valin
9629ea6a70 Fine-tuning the scripts 2022-10-14 01:01:39 -04:00
Jean-Marc Valin
0f7fe64d5a Compute FEC features based on loss pattern 2022-10-05 01:56:53 -04:00
Jean-Marc Valin
89db314efb Updating fec_encoder.py for recent changes 2022-10-04 15:46:13 -04:00
Jean-Marc Valin
61459c24e0 Change decoder architecture to be like the encoder 2022-10-04 00:27:36 -04:00
Jean-Marc Valin
79d1a916d0 Weighting loss by 1/sqrt(lambda) 2022-10-02 12:34:42 -04:00
Jean-Marc Valin
0b01863732 Larger range of quantizers 2022-09-30 22:21:30 -04:00
Jan Buethe
524f84800f removed dump_data delay from total_delay in fec_encoder.py 2022-09-30 16:00:02 +02:00
Jean-Marc Valin
4c82d3b419 Completely move quantization out of encoder and decoder 2022-09-30 03:53:40 -04:00
Jan Buethe
cd0993fd8c clarifications in help printout 2022-09-29 21:14:34 +02:00
Jan Buethe
97ffa94d5c fixed decoder bug (non-quantized input) 2022-09-29 21:13:30 +02:00
jbuethe
589e674116 alignment fix in fec_encoder 2022-09-29 17:54:34 +02:00
jbuethe
a866abe3c3 ported debug changes from fec_encoder_torch.py 2022-09-29 14:18:41 +02:00
Jean-Marc Valin
0a2d6dfcb6 Use the encoder state as decoder initial state
Helps reduce the error on the most recent frames
2022-09-28 15:34:02 -04:00
Jean-Marc Valin
38dda0f950 Oops, forgot to run PVQ quantization for the state 2022-09-28 15:33:20 -04:00
jbuethe
b43f077ba8 corrected offset between decoder output on even/odd latent frames 2022-09-27 16:37:46 +00:00
jbuethe
e4e5958a14 Merge branch 'exp_rdovae6' of https://github.com/xiph/LPCNet into exp_rdovae6 2022-09-27 16:32:46 +00:00
jbuethe
01baf1a0fc added missing dead-zone to encode_rdovae.py 2022-09-27 16:31:04 +00:00
jbuethe
be42c3b514 added fec_encoder.py and corresponding fec_packets.[chpy] 2022-09-27 16:29:13 +00:00
jbuethe
2de335d83c added fec_encoder.py and fec_packets.[chpy] 2022-09-27 16:22:36 +00:00
Jean-Marc Valin
ef12c29f14 Update encoder/decoder 2022-09-27 02:28:13 -04:00
Jean-Marc Valin
405aa7cf69 WIP: training with different alignment 2022-09-27 02:28:12 -04:00
Jean-Marc Valin
981d06eefd Refactoring towards multiple offset decoding 2022-09-27 02:28:12 -04:00
Jean-Marc Valin
a4f7c157cf Stop decimating in the encoder 2022-09-27 02:28:12 -04:00
Jean-Marc Valin
fdd51eb760 RepeatVector no longer likes tensor input 2022-09-27 02:28:11 -04:00
Jean-Marc Valin
8569121f6c RDO-VAE work in progress 2022-09-27 02:28:11 -04:00
Jean-Marc Valin
b6ac1c78bb FEC hooks in the PLC code
Can now inject FEC features to be used by the PLC when available
2022-09-27 02:24:21 -04:00
Jean-Marc Valin
4befd8bb39 Decreasing look-ahead of default model to 1 frame 2022-09-24 03:25:33 -04:00
Jean-Marc Valin
d45ab6fcb6 Move back to tanh for frame rate network
Swish has lower loss, but doesn't seem to improve quality
2022-09-24 03:22:57 -04:00
Jean-Marc Valin
f5c251c5d5 Properly align LPC with lookahead in data loader 2022-09-24 03:21:36 -04:00
Jean-Marc Valin
dd114baf4d Fix causal PLC for models with non-zero lookahead 2022-09-16 01:44:53 -04:00
janpbuethe
f3c738d45f removed debug prints in dump_lpcnet.py 2022-09-07 09:10:19 +00:00
Jean-Marc Valin
60450472a6 Merge branch 'plc_challenge' into master 2022-09-07 00:38:55 -04:00
Jean-Marc Valin
340ab3089b model update 2022-09-06 23:31:31 -04:00
janpbuethe
920300c546 Add lpc weighting and model parameter handling
Model now stores LPC gamma, look-ahead, and end-to-end.
Parameters aren't quite reliable yet, YMMV
2022-09-06 23:14:39 -04:00
Jean-Marc Valin
c1da818f39 PLC instructions 2022-05-13 00:43:56 -04:00
Jean-Marc Valin
ee08ef0a9c Auto-download PLC model weights 2022-03-17 18:21:57 -04:00
Jean-Marc Valin
27348d1b88 Running new features twice on resync
Helps forget concealment features faster
2022-03-04 16:23:40 -05:00
Jean-Marc Valin
d2d847bcae Boosting pitch correlation at inference time 2022-03-04 16:20:56 -05:00
Jean-Marc Valin
099f0872f7 Compensating for DC filter offset during blending 2022-03-04 16:19:56 -05:00
Jean-Marc Valin
8ca2ccf17a fix another warning 2022-02-25 13:57:51 -05:00
Jean-Marc Valin
a814a6235a Silencing a warning
Variable cannot be uninitialized
2022-02-25 13:56:09 -05:00
Jean-Marc Valin
e034b1096b Biasing for overestimating the pitch correlation 2022-02-25 13:55:21 -05:00
Jean-Marc Valin
05f02aaa49 Adaptive bias
Bias the energy for vowels, but not for consonants
2022-02-24 13:03:44 -05:00
Jean-Marc Valin
3982144f8e Revert PLC state to use Burg in the causal overlap 2022-02-24 03:22:04 -05:00
Jean-Marc Valin
ea405cc684 Add delay-compensation for non-causal PLC 2022-02-21 22:52:39 -05:00
Jean-Marc Valin
a7b2420300 Setting PLC options at run-time
Causal vs non-causal, DC filter, and blending are now configurable
2022-02-21 21:36:08 -05:00
Jean-Marc Valin
0eb9f0bd38 Updating the DC tracker even during packet loss
Makes the offset signal more continuous
2022-02-21 02:34:44 -05:00
Jean-Marc Valin
3883f3d372 Synchronizing DC removal for noncausal PLC 2022-02-19 18:03:22 -05:00
Jean-Marc Valin
a11e68dd74 Evening out CPU load for non-causal PLC
Defer some of the extra resync work to the next packet
2022-02-19 17:28:28 -05:00
Jean-Marc Valin
022ddc31fb Enable pitch xcorr refining 2022-02-16 23:09:27 -05:00
Jean-Marc Valin
86bf0cb9d5 Remove pitch periodicity attenuation 2022-02-16 03:44:02 -05:00
Jean-Marc Valin
cb4dbe05d0 oops 2022-02-16 03:28:13 -05:00
Jean-Marc Valin
78e682950b Handle DC offset
Remove DC, conceal, add it back at the end
2022-02-16 02:14:59 -05:00
Jean-Marc Valin
527841d03e Avoiding discontinuities in the RNN update 2022-02-16 01:58:49 -05:00
Jean-Marc Valin
1ae958acf3 Make LPCNet frame rate network use swish 2022-02-16 01:01:51 -05:00
Jean-Marc Valin
d1a14ac5d4 Making state update more robust to discontinuities 2022-02-16 00:58:40 -05:00
Jean-Marc Valin
2a8bcf4c0f 3-part pitch loss function 2022-02-13 02:51:22 -05:00
Jean-Marc Valin
2e06c07893 more emphasis on pitch loss 2022-02-11 03:20:20 -05:00
Jean-Marc Valin
8e3ce6e839 Add no-blending version 2022-02-09 14:14:23 -05:00
Jean-Marc Valin
b2847687e0 Getting rid of prediction delay 2022-02-09 03:06:41 -05:00
Jean-Marc Valin
9297fe52ee Add -plc_file option 2022-02-09 00:51:44 -05:00
Jean-Marc Valin
7a7913f388 cleanup 2022-02-07 15:14:56 -05:00
Jean-Marc Valin
fd45fba905 Reduce look-ahead to 5 ms 2022-02-07 04:01:36 -05:00
Jean-Marc Valin
5d32ab8f3a optional bi-directional concealment 2022-02-06 01:14:03 -05:00
Jean-Marc Valin
dc539a9ce9 WIP non-causal PLC 2022-02-05 22:57:53 -05:00
Jean-Marc Valin
2e18f0d160 Using Burg cepstrum for feature prediction 2022-02-04 22:04:23 -05:00
Jean-Marc Valin
b93dbfc0bc Adding Burg spectral estimation code 2022-02-03 00:28:23 -05:00
Jean-Marc Valin
f3bc6bacd2 Avoiding tmp buffer overflows 2022-02-03 00:27:20 -05:00
Jean-Marc Valin
3e2198c6e1 Learning to predict time t+1 from time t
Instead of t from t
2022-02-02 15:00:24 -05:00
Jean-Marc Valin
d816477c58 Add decay 2022-02-02 03:54:05 -05:00
Jean-Marc Valin
cd2e568bb6 Using lost packet file instead of uniform random 2022-02-02 00:22:57 -05:00
Jean-Marc Valin
1db1946f77 Support for biased loss 2022-02-01 02:57:50 -05:00
Jean-Marc Valin
186fa61680 oops, fix initialization 2022-02-01 02:57:23 -05:00
Jean-Marc Valin
4866e632cd minus stupid bug 2022-02-01 01:31:49 -05:00
Jean-Marc Valin
32a63fd31d WIP: PLC prediction 2022-01-31 23:21:55 -05:00
Jean-Marc Valin
c45963d40a Code for testing PLC models 2022-01-31 18:49:19 -05:00
Jean-Marc Valin
e1181bcad0 oops, fix band loss 2022-01-30 17:29:33 -05:00
Jean-Marc Valin
c8cbfa7e9b Adding feature prediction (totally untested) 2022-01-29 02:54:48 -05:00
Jean-Marc Valin
42cbb9ed07 zero delay 2022-01-28 02:44:23 -05:00
Jean-Marc Valin
5ad75a54f5 Opus compat 2022-01-28 02:44:07 -05:00
Jean-Marc Valin
6b0705065f Implement lpcnet_plc_get_size() 2022-01-27 02:45:47 -05:00
Jean-Marc Valin
227537c994 Avoiding more symbol clashes with Opus 2022-01-25 00:08:27 -05:00
Jean-Marc Valin
2f5b51c94a Avoiding symbol clashes with Opus 2022-01-24 23:21:31 -05:00
Jean-Marc Valin
805fed733a Fix warnings 2022-01-24 16:33:32 -05:00
Jean-Marc Valin
57f5681987 Add swish activation support 2022-01-24 16:22:29 -05:00
Jean-Marc Valin
93d6c3975a Fix prototypes for funcs that take no args 2022-01-24 15:35:37 -05:00
Jean-Marc Valin
60a009b457 Making codebase C90-compliant 2022-01-19 18:10:44 -05:00
Jean-Marc Valin
0f2b8d4a09 Remove useless code carried over from Opus 2022-01-19 18:10:23 -05:00
Jean-Marc Valin
969bd7662f Adding API for PLC
Packet loss concealment based on LPCNet (work in progress)
2022-01-19 17:07:10 -05:00
Jean-Marc Valin
b2b2e226c3 Optional code for interpolating the pitch correlation 2022-01-18 03:52:14 -05:00
Jean-Marc Valin
976729dcd0 Fix minor bug in dynamic programming pitch search 2022-01-18 03:52:14 -05:00
Timothy B. Terriberry
48a7e25032 Abort download_model.sh on errors.
Otherwise, in addition to preventing autogen.sh from failing
 cleanly, the touch command at the end creates a spurious file
 named 'src/nnet_data.[ch]' (because the globbing fails if the
 files do not already exist).
2021-11-10 19:15:58 -05:00
Timothy B. Terriberry
5c3cc55614 Minor fixes to kiss99 2021-11-10 18:01:42 -05:00
Jean-Marc Valin
3a47548536 Using KISS99 (taken from Daala) as RNG 2021-11-10 17:58:51 -05:00
Jean-Marc Valin
81229a7412 Fix the "no lookahead" case 2021-10-30 02:40:52 -04:00
Jean-Marc Valin
71a2656768 Fix potential overflow in training data for large signals 2021-10-30 02:40:47 -04:00
Jean-Marc Valin
d490fbf50a model update 2021-10-25 16:03:55 -04:00
Jean-Marc Valin
ad374be052 more updates to the instructions 2021-10-25 16:01:00 -04:00
Jean-Marc Valin
153e43aa44 update instructions 2021-10-21 10:13:45 -04:00
Jean-Marc Valin
7fdca7f01d Minor cleanup 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
0e523aa3f4 controllable look-ahead 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
a9bf6cee8a Don't hardcode the number of bands 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
b5b1d5013e Add noise before sample embedding
Simulates rounding
2021-10-20 23:35:59 -04:00
Jean-Marc Valin
3f7756c53f Bring test_lpcnet.py up-to-date 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
144b7311bc Dumping 16-bit linear training data 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
a3ef596822 auto-detect end-to-end models 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
d5b6087f48 Add tensorboard logging 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
b24e53fdfa Adding option to change frame rate network size 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
fe7b54c0e8 Combine LAR+L1 regularization 2021-10-20 23:35:59 -04:00
Jean-Marc Valin
054d984bf3 Freeze LPCs when quantizing e2e models 2021-10-19 15:39:41 -04:00
Jean-Marc Valin
8cdc8081d8 Fix non-128 batch sizes
Avoid hardcoding the batch size in the model
2021-10-09 03:20:22 -04:00
Jean-Marc Valin
37c9bd8d28 Making sure we recompile when updating the model 2021-10-05 03:00:03 -04:00
Jean-Marc Valin
2a131ff6fe Add lpcnet_compute_single_frame_features() to header 2021-10-04 15:59:32 -04:00
Jean-Marc Valin
be4f70bc7e split model downloading away from autogen.sh 2021-10-04 15:51:06 -04:00
Jean-Marc Valin
82c31b4c63 update instructions 2021-10-04 03:24:44 -04:00
Jean-Marc Valin
2275853ac4 Merge branch 'exp_quant_grub2' 2021-10-04 03:14:06 -04:00
Jean-Marc Valin
444b4370d4 New model with quantized GRU B 2021-10-04 03:12:48 -04:00
Jean-Marc Valin
4a7ce81fb3 Re-enabling quantized feature dump 2021-10-04 03:04:36 -04:00
Jean-Marc Valin
c5a17a0716 Hard quantization for training
Also, using stateful GRU to randomize initialization
2021-10-04 02:53:46 -04:00
Jean-Marc Valin
3b8d64d746 single-frame features for -features option 2021-09-15 16:02:54 -04:00
Jean-Marc Valin
e4b4613d05 Fix signed-unsigned biases 2021-09-02 02:34:08 -04:00
Jean-Marc Valin
51ef273e06 Using 8-bit recurrent weights for GRU B 2021-09-02 02:33:55 -04:00
Jean-Marc Valin
8783ef0088 Same as 47, without the sign augmentation 2021-09-01 12:31:30 -04:00
Jean-Marc Valin
9776e8e828 Refactoring frame rate network 2021-08-17 18:21:55 -04:00
Jean-Marc Valin
2c9b847454 noise tuning, flip sign of speech signal 2021-08-17 18:17:25 -04:00
Jean-Marc Valin
80751bad70 frame-wise features 2021-08-14 02:06:55 -04:00
Jean-Marc Valin
4c6a5e0e60 oops, fix single-frame pitch 2021-08-13 15:36:32 -04:00
Jean-Marc Valin
6b4e3c56c8 WIP: single-frame inference 2021-08-13 12:43:44 -04:00
Jean-Marc Valin
66c29fb620 Remove some useless buffers 2021-08-13 00:20:19 -04:00
Jean-Marc Valin
adc50cab5b dump_lpcnet.py should work the same for end2end 2021-08-04 14:56:02 -04:00
Jean-Marc Valin
7331e17e92 Don't remove silence from training data 2021-08-04 14:03:44 -04:00
Jean-Marc Valin
c5364153a8 Add more training options 2021-08-04 14:02:59 -04:00
Jean-Marc Valin
ab9a09266f Sharing conditioning network with LPC 2021-08-02 19:30:22 -04:00
Krishna Subramani
c1532559a2 Adds end-to-end LPC training
Making LPC computation and prediction differentiable
2021-08-02 19:28:27 -04:00
Jean-Marc Valin
cba0ecd483 Fix warnings about ignoring fread() return value 2021-08-02 19:02:29 -04:00
Jean-Marc Valin
6ea726d401 Avoiding feature copies 2021-08-02 19:02:29 -04:00
Jean-Marc Valin
4f2caa35f0 Update model
Also providing links to suitable training data
2021-08-02 19:02:29 -04:00
Jean-Marc Valin
6585843237 Removing the unused features
Down to 20 features
2021-07-29 03:20:59 -04:00
Jean-Marc Valin
b90729b83b dump_lpcnet.py now checks the size of GRU B 2021-07-20 17:01:54 -04:00
Jean-Marc Valin
4322c16335 Oops, actually use the size of GRU B for training 2021-07-20 15:36:15 -04:00
Jean-Marc Valin
346a96fa81 Training options for sparse GRU B 2021-07-20 02:35:42 -04:00
Jean-Marc Valin
c76756e18a Adding sparse training for GRU B inputs 2021-07-18 02:24:21 -04:00
Jean-Marc Valin
8bdbbfa18d Support for sparse GRU B input matrices
Only on the C side, no sparse GRU B training yet
2021-07-16 03:07:26 -04:00
Jean-Marc Valin
4c0e224865 Model update
Weights are the same, but they are dumped to C differently.
2021-07-15 16:12:42 -04:00
Jean-Marc Valin
c74330e850 Pre-compute GRU B conditioning
Adapted from PR: https://github.com/mozilla/LPCNet/pull/134
by zhuxiaoxu <zhuxiaoxu@ainirobot.com>
but had to be reworked due to previous weight quantization changes.
2021-07-15 16:06:56 -04:00
Jean-Marc Valin
0d53fad50d Using np.memmap() to load the training data
Makes loading faster
2021-07-14 13:47:23 -04:00
Jean-Marc Valin
5a51e2eed1 Adding command-line options to training script 2021-07-13 03:09:04 -04:00
Jean-Marc Valin
1edf5d7986 README.md update 2021-07-11 03:46:25 -04:00
Jean-Marc Valin
4298f2f9e1 Adding support for SSE2 and SSSE3 2021-07-11 03:36:20 -04:00
Jean-Marc Valin
116bcb38fb Adding SSE 4.1 for older platforms
AVX without AVX2 should now work again too.
2021-07-10 14:08:01 -04:00
Jean-Marc Valin
3e223e6015 Fixes Python inference for the binary probability tree 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
f8f12e7f3c NEON float->char conversion (same as the AVX2 version) 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
a1079c2ce3 Again, same conversion as 3206cec, for NEON 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
7d8b00f11d Sampling directly from the logit
Avoids having to compute a sigmoid
2021-07-10 01:59:49 -04:00
Jean-Marc Valin
e8f70128d5 same conversion cleanup as 3206cec for sgemv_accum8x4() 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
7cef98ec8c Minor optimization: merging all 3 embeddings 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
714380e71b More manual unrolling 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
006556036a Cleaning up the sparse GRU
It no longer overwrites its input vector
2021-07-10 01:59:49 -04:00
Jean-Marc Valin
44fe055682 cleanup float<->int conversions 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
60d6eab63d Doing a bit of unrolling to speed things up 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
3e7ab9ff87 update model 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
54abdb6f5d Sparse matrix indexing optimization
The 4* is now stored in the table to avoid computing it in the loop
2021-07-10 01:59:49 -04:00
Jean-Marc Valin
2681822c18 update model 2021-07-10 01:59:49 -04:00
Jean-Marc Valin
d332100808 Representing output pdf as binary probability tree
Saves on the MDense/softmax computation since we only need to compute
8 values instead of 256.
2021-07-10 01:59:49 -04:00
Jean-Marc Valin
c151fc1853 Merge branch 'exp_improved_simd2' 2021-06-30 18:56:04 -04:00
Jean-Marc Valin
f0ce43389a Update test_lpcnet.py, remove old TF1 code 2021-06-30 18:54:27 -04:00
Jean-Marc Valin
d428b0d32a Update model 2021-06-30 18:27:31 -04:00
Jean-Marc Valin
8c4b88cfab Using a bisection search for sampling 2021-06-30 18:14:12 -04:00
Jean-Marc Valin
e35441f2cc Faster activation functions for AVX
Using rational function approximation for tanh() and sigmoid.
2021-06-29 04:05:48 -04:00
Jean-Marc Valin
5571ef1b8e minor optimization: removing some copying 2021-06-26 01:27:03 -04:00
Jean-Marc Valin
d61f7e00f8 Fix missing transpose in the sparity code
CuDNNGRU and GRU don't use the same weight format
2021-06-25 13:43:37 -04:00
Jean-Marc Valin
ca0a43bee9 Update README.md 2021-06-24 17:47:51 -04:00
Jean-Marc Valin
c1535c8ccf Adding option to disable int8 dot products 2021-06-24 17:31:05 -04:00
Jean-Marc Valin
0b9f6bab81 Remove unnecessary mask in exp() approximation
This isn't necessary since valid exponents can't flip the sign bit
2021-06-21 01:34:38 -04:00
Jean-Marc Valin
ae2ae5ead6 Remove useless multiply by one
See https://github.com/mozilla/LPCNet/commit/bffdcee95#commitcomment-46372726
2021-06-21 01:30:51 -04:00
Jean-Marc Valin
c7ba313a67 Adding extra constraint to avoid saturation for SSE/AVX2
When implementing using SSSE3 or AVX2, our dot products can saturate
if two adjacent weights sum to more than 127.
2021-06-18 17:39:35 -04:00
Jean-Marc Valin
237245f815 Support for multi-GPU training
Not sure why CuDNNGRU doesn't get used by default, but we need
to explicitly use it to get things to run fast.
2021-06-18 13:20:43 -04:00
Jean-Marc Valin
ebc9483b4c update model 2021-02-01 01:07:35 -05:00
Jean-Marc Valin
79980b2044 Minor update to training scripts 2021-01-18 02:13:52 -05:00
Jean-Marc Valin
20fea538c2 more reasonable noise
was increased too much in 713d53e8a
2021-01-17 21:39:42 -05:00
Jean-Marc Valin
b9c230b346 Add NEON intrinsics 2021-01-16 02:11:22 -05:00
Jean-Marc Valin
b214e684c1 Neon WIP: Compiles but very slow 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
8c3fe6f31d Cleaning up float version 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
40b9fd0a75 Fix some quantization issues 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
83657d0e43 Dot product AVX2 code for non-sparse multiply 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
1707b960de cleanup, add signed-unsigned biases 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
40b309d92b WIP: 8-bit SIMD for GRU B 2021-01-16 02:11:21 -05:00
Jean-Marc Valin
e695355ba5 some cleanup 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
06489b42dd oops, fix number of columns 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
d87f974431 Vectorizing conversion 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
6b582edbed WIP: remove scalar code from AVX2 code 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
be392e3857 WIP: Got some AVX2 code working 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
2b4652f9f6 WIP: cleanup 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
bce779886d WIP: signed*unsigned arithmetic 2021-01-16 02:11:20 -05:00
Jean-Marc Valin
11736ca9e3 WIP: 8-bit mul 2021-01-16 02:11:19 -05:00
Jean-Marc Valin
1657bae024 WIP: Adding a constraint 2021-01-16 02:11:19 -05:00
Jean-Marc Valin
c045702e51 Add non-dot-product AVX code 2021-01-16 02:11:19 -05:00
Jean-Marc Valin
73a05f55c7 wip 8x4 2021-01-16 02:11:19 -05:00
Jean-Marc Valin
cc28518699 wip 8x4 sparseness 2021-01-16 02:11:19 -05:00
Jean-Marc Valin
8e405b44e0 Improve accuracy of AVX sigmoid
Reciprocal approximation could cause the sigmoid output to be
greater than 1.0.
2021-01-16 01:51:39 -05:00
Jean-Marc Valin
56d9f13efd Fix quantization bug where pitch can get too low
Would cause unused pitch embedding vectors to be used
2021-01-16 01:51:39 -05:00
Jean-Marc Valin
f0df3e82ec Update to model trained with Tensorflow 2
No change otherwise
2020-12-19 00:21:03 -05:00
Jean-Marc Valin
078d90cbdf Merge Tensorflow 2 code 2020-12-19 00:20:23 -05:00
Jean-Marc Valin
90fec91b12 Convert training code to Tensorflow 2 2020-08-19 14:27:07 -04:00
Marcus Asteborg
171b1ba0ce Initialize excitation memory value to unquantized 0 2020-08-17 13:39:24 -04:00
Jean-Marc Valin
88a7878fdb Fix pitch-related bugs
preventing the pitch from going above 255
2020-08-15 02:18:59 -04:00
Marcus Asteborg
9a7f3e4a3d Open files in binary mode to enable dump_data to work properly on Windows 2020-08-02 20:45:07 -04:00
Marcus Asteborg
eeb7615bd7 Add conda env file with working tensorflow and keras version for LPCNet 2020-07-31 17:25:29 -04:00
Jean-Marc Valin
14fb264a0f Fix sampling bug for 16-bit rand()
According to David Rowe, when rand() returns RAND_MAX (which is likely
for 16-bit output), we end up producing a click.
2020-06-20 23:47:04 -04:00
Jean-Marc Valin
8f887b62f0 Download the model 2019-04-14 03:20:17 -04:00
Jean-Marc Valin
60790784a4 training 2019-04-11 11:07:33 -04:00
Jean-Marc Valin
9b8d72ea87 more noise 2019-04-10 21:30:54 -04:00
Jean-Marc Valin
a3300d7b24 Fix bias in LPC spectrum 2019-04-10 21:13:44 -04:00
Jean-Marc Valin
fd1fc693aa adaptation flag to avoid training the sample rate network 2019-04-01 15:22:00 -04:00
Jean-Marc Valin
2a7a9fa085 Minor README.md tweak again 2019-03-28 11:33:38 -04:00
Jean-Marc Valin
49fc5f741a nothing to see here 2019-03-28 11:29:34 -04:00
Jean-Marc Valin
63a67d4654 Add README.md to tarball 2019-03-28 11:23:52 -04:00
Jean-Marc Valin
ee63653fd4 Update README.md with new paper 2019-03-28 11:19:05 -04:00
Jean-Marc Valin
1f45081548 Implement -feature option 2019-03-28 10:54:33 -04:00
Jean-Marc Valin
77d02dbd2f Using macros for sizes in the demo 2019-03-27 14:12:52 -04:00
Jean-Marc Valin
9f78e58392 Make param ordering consistent for lpcnet_synthesize() 2019-03-27 14:06:46 -04:00
Jean-Marc Valin
623ac9545c Adding API doc 2019-03-24 12:10:36 -04:00
Jean-Marc Valin
219fbff4e6 Making it easier to adapt (or not) a model 2019-03-24 03:48:26 -04:00
Jean-Marc Valin
edee9cd8f2 README.md update 2019-03-22 14:37:01 -04:00
Jean-Marc Valin
a8fb25f11c Remove NaN checks 2019-03-20 13:36:42 -04:00
Jean-Marc Valin
a09815925a Neon: Make gcc actually generate VMLA instructions for sparse mul
Otherwise it was splitting the mla into a mul and an add
2019-03-20 12:58:39 -04:00
Jean-Marc Valin
2bc20e65c7 Remove hack to match Python code 2019-03-20 03:21:03 -04:00
Jean-Marc Valin
492ef9b362 Neon implementation of the activation functions 2019-03-20 03:03:44 -04:00
Jean-Marc Valin
ddd38bd208 Merge branch 'autotools1' 2019-03-19 14:50:38 -04:00
Jean-Marc Valin
3d2b26b7ca Add dump_data 2019-03-19 14:42:23 -04:00
Jean-Marc Valin
df8a0ac3fb build lpcnet_demo in root dir 2019-03-19 14:22:39 -04:00
Jean-Marc Valin
a9871fe6b4 Add README 2019-03-19 04:08:12 -04:00
Jean-Marc Valin
30c7545a04 Fixing Makefile 2019-03-18 21:54:31 -04:00
Jean-Marc Valin
2c0e96796e Fixing dynamic libraries 2019-03-18 21:53:28 -04:00
Jean-Marc Valin
849f3abf32 Making autogen.sh download and unpack the model 2019-03-18 21:43:36 -04:00
Jean-Marc Valin
a0b0ece171 Add missing headers 2019-03-18 20:50:53 -04:00
Jean-Marc Valin
89b2e064a0 Fixes 2019-03-18 20:09:10 -04:00
Jean-Marc Valin
55a15a93e0 s/rnnoise/lpcnet/ (untested) 2019-03-18 20:05:14 -04:00
Jean-Marc Valin
f41c97767f Copied from RNNoise directly 2019-03-18 19:57:40 -04:00
Jean-Marc Valin
ba38c160eb Fix auto-download (even more of a hack) 2019-03-18 17:49:40 -04:00
Jean-Marc Valin
77eb006190 Auto-download data (still a hack) 2019-03-18 17:40:33 -04:00
Jean-Marc Valin
54bd6208a6 Adding lpcnet_demo 2019-03-18 16:50:39 -04:00
Jean-Marc Valin
54b057c9cd Add LPCNet decoder object 2019-03-18 14:13:07 -04:00
Jean-Marc Valin
fe608dfc51 Moving LPCNetState 2019-03-18 13:42:30 -04:00
Jean-Marc Valin
e63292bd56 Split off decoder code 2019-03-17 13:25:43 -04:00
Jean-Marc Valin
8dcccc8934 library encoder... 2019-03-15 13:52:24 -04:00
Jean-Marc Valin
7086dd73dc WIP: Splitting off the encoder 2019-03-15 02:44:56 -04:00
Jean-Marc Valin
e198d9beef renaming 2019-03-14 19:16:37 -04:00
Jean-Marc Valin
100ace67bb cleanup 2019-03-14 19:11:22 -04:00
Jean-Marc Valin
40ceb487c9 Now encoding to a binary bit-stream (64 bytes/packet) 2019-03-14 18:52:59 -04:00
Jean-Marc Valin
23859d74ff Add bitpacker (untested) 2019-03-14 18:31:26 -04:00
Jean-Marc Valin
accd7a2bd1 getting rid of the vq_mem global 2019-03-14 18:05:19 -04:00
Jean-Marc Valin
7cee743c4e minor update to training code 2019-03-12 14:43:13 -04:00
Jean-Marc Valin
7388486c35 Generating samples using quantized cepstrum LPC 2019-03-12 14:41:37 -04:00
Jean-Marc Valin
6b279094aa decrease resolution of C0 to fit 7 bits, add one bit to mid VQ 2019-03-12 11:56:34 -04:00
Jean-Marc Valin
b5ff531bf0 Optional quantization 2019-03-12 02:31:15 -04:00
Jean-Marc Valin
0077f4b872 add decoder 2019-03-11 23:56:49 -04:00
Jean-Marc Valin
c9b7efd274 dump as ASCII 2019-03-11 18:00:27 -04:00
Jean-Marc Valin
6f8db93929 Add M-best VQ search 2019-03-11 17:06:29 -04:00
Jean-Marc Valin
bfcf94de2a cleanup 2019-03-11 15:00:12 -04:00
Jean-Marc Valin
5f830b4578 3-bit interpolation 2019-03-11 12:04:36 -04:00
Jean-Marc Valin
fc4f594e25 Better quantization 2019-03-01 16:34:41 -05:00
Jean-Marc Valin
3fc183df55 adjusting quantization bitrate 2019-02-19 00:14:36 -05:00
Jean-Marc Valin
56820f0d10 quantize cepstrum 2019-02-18 20:40:22 -05:00
Jean-Marc Valin
29610751ac oops, include DC in difference codebook 2019-02-16 01:37:50 -05:00
Jean-Marc Valin
90d74bbbe9 Add bidirectional quantizer 2019-02-15 18:12:38 -05:00
Jean-Marc Valin
543ee94037 20-bit VQ 2019-02-15 15:13:14 -05:00
Jean-Marc Valin
5be0e59ff0 quantize period but not correlation yet 2019-02-14 17:34:42 -05:00
Jean-Marc Valin
a2d03c2880 more cleaning up 2019-02-01 03:15:42 -05:00
Jean-Marc Valin
8cb54041b0 reindent 2019-02-01 03:04:23 -05:00
Jean-Marc Valin
6ef718c474 splitting function 2019-02-01 02:59:48 -05:00
Jean-Marc Valin
6318467f74 Removing static variables 2019-02-01 02:44:33 -05:00
Jean-Marc Valin
e1741e3763 cleaning up 2019-02-01 02:36:34 -05:00
Jean-Marc Valin
ba2404a507 cleanup 2019-01-31 15:53:15 -05:00
Jean-Marc Valin
d3ec557540 Output unquantized pitch info 2019-01-31 15:49:52 -05:00
Jean-Marc Valin
785c4b2e57 tweak frame weighting 2019-01-31 15:14:59 -05:00
Jean-Marc Valin
b3198a09da Add frame weighting, doubling prevention 2019-01-31 14:55:43 -05:00
Jean-Marc Valin
e634718036 add backward pass 2019-01-30 22:17:04 -05:00
Jean-Marc Valin
3a6bae1782 Dynamic programming approach (wip) 2019-01-30 21:31:52 -05:00
Jean-Marc Valin
24595ea2da pitch tweaks 2019-01-30 14:17:08 -05:00
Jean-Marc Valin
823be85778 comments 2019-01-30 02:29:54 -05:00
Jean-Marc Valin
a15cc20165 cleanup 2019-01-30 02:25:49 -05:00
Jean-Marc Valin
cd2065f725 replace pitch features 2019-01-30 02:21:59 -05:00
Jean-Marc Valin
1d70c61aa9 dump 4 frames at a time 2019-01-29 17:53:05 -05:00
Jean-Marc Valin
71e6bbb520 more refactoring 2019-01-29 17:36:05 -05:00
Jean-Marc Valin
80dcc3f3b3 put back training code 2019-01-29 17:31:09 -05:00
Jean-Marc Valin
d5a01822d1 refactoring 2019-01-29 17:30:15 -05:00
Jean-Marc Valin
43ffd19bc1 pitch quantization 2019-01-29 17:08:53 -05:00
Jean-Marc Valin
48a8f973b5 remove earlier attempt 2019-01-29 14:35:10 -05:00
Jean-Marc Valin
6d88dfe3c1 misc improvements 2019-01-29 14:34:32 -05:00
Jean-Marc Valin
a1ab7c5f6b new pitch 2019-01-29 13:34:43 -05:00
Jean-Marc Valin
da456b09fa fix warning 2019-01-24 14:16:30 -05:00
Jean-Marc Valin
b2940ed212 Use real features at the chunk edges rather than zeros 2019-01-24 14:16:30 -05:00
Jean-Marc Valin
9fd3e45fd3 Avoid glitch at the beginning 2019-01-21 16:53:26 -05:00
Jean-Marc Valin
b84a06dd08 Use a single u-law embedding 2019-01-21 16:52:57 -05:00
Jean-Marc Valin
dc082d7c1c Making it easier to change the frame size 2019-01-21 15:16:11 -05:00
Jean-Marc Valin
38cd5cf08f Remove useless (and possibly hurtful) residual connection
I guess it's a bad idea to forward inputs directly
2019-01-17 23:17:42 -05:00
Jean-Marc Valin
4698b28345 Making dump_lpcnet.py a bit more robust
Avoid relying on the order of the layers
2019-01-17 17:03:48 -05:00
Jean-Marc Valin
d181139930 Cleanup
Remove the metric because it wasn't too useful and it's buggy in
Keras 2.2.4.
2019-01-09 16:52:26 -05:00
Jean-Marc Valin
a06e9a96ad doc update 2019-01-01 14:40:00 -05:00
Jean-Marc Valin
ba46bb93da Biasing noise std 2019-01-01 14:37:19 -05:00
Jean-Marc Valin
800a659cc9 Using log approximations 2019-01-01 14:37:19 -05:00
Jean-Marc Valin
677182fcaa Making the update and reset gate more sparse 2019-01-01 14:37:19 -05:00
Jean-Marc Valin
ea02ef7e02 Computing signals in C 2019-01-01 14:37:18 -05:00
Jean-Marc Valin
d75a4aec72 refactoring 2018-12-28 01:19:56 -05:00
Jean-Marc Valin
293f76ee33 README update 2018-12-16 01:15:38 -05:00
David
8bd917667b added concat.sh script 2018-12-16 09:31:50 +10:30
David
8f8ca9c9e3 updated README 2018-12-16 09:30:53 +10:30
David
611944682c added Makefile 2018-12-16 09:30:13 +10:30
Jean-Marc Valin
470a0a7e3e Properly delaying the pitch gain 2018-12-15 01:25:44 -05:00
Jean-Marc Valin
6367aa8a78 Controlling training offset 2018-12-13 18:25:21 -05:00
Jean-Marc Valin
088ee4ffa6 Should make dump_data able to handle partial overlap 2018-12-13 18:03:58 -05:00
Jean-Marc Valin
12f16df6b7 More work on making freq.[ch] more generic 2018-12-13 17:40:05 -05:00
Jean-Marc Valin
fb3112ade6 Removing most of the full-overlap assumptions from freq.[ch] 2018-12-13 15:42:11 -05:00
Jean-Marc Valin
e8c9621285 Missing #include 2018-12-13 14:37:04 -05:00
Jean-Marc Valin
06b9bfa8f4 Avoiding an infinite loop
Thanks to changeforan on Github
2018-12-12 11:00:33 -05:00
Jean-Marc Valin
7d9affc385 Moving the frame out of lpcnet.c and into test_lpcnet.c 2018-12-11 16:59:07 -05:00
Jean-Marc Valin
ca4a8d4f09 More cleanup 2018-12-11 16:53:22 -05:00
Jean-Marc Valin
242198ea66 Get rid of the TRAINING macro 2018-12-11 16:29:31 -05:00
Jean-Marc Valin
590e9ce41d Move the common functions from dump_data.c to freq.c 2018-12-11 16:28:50 -05:00
Jean-Marc Valin
9b581a13b0 Rename denoise.c to dump_data.c, rnnoise.h to freq.h 2018-12-11 16:10:57 -05:00
Jean-Marc Valin
3cb52ecef7 ... 2018-12-11 15:37:05 -05:00
Jean-Marc Valin
86c15a7a36 Remove more useless code 2018-12-11 13:31:04 -05:00
Jean-Marc Valin
4eb2d34465 Cleanup: Remove useless code (more to come) 2018-12-11 11:49:13 -05:00
Jean-Marc Valin
37ddc0a8b4 Add -test or -train option
-train does data augmentation, cuts silence, and outputs the preemphasis
-test only outputs the features for resynthesis
2018-12-11 02:00:51 -05:00
David Rowe
74b98437ba Vectorization testing code
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2018-12-11 01:41:27 -05:00
David Rowe
03dcb8195f Error messages
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2018-12-11 01:40:29 -05:00
David Rowe
7dc696b9a4 refactored for different machines, sgemv_accum16 using NEON intrisics
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2018-12-10 21:28:29 -05:00
David
bc108e9aa6 checks that files opened OK 2018-12-10 16:53:53 -05:00
Jean-Marc Valin
8c271d60c4 Controlling per-gate sparsity 2018-12-10 16:15:50 -05:00
Jean-Marc Valin
b9e0ea23e0 Fix flooring of the pitch period
Without the 0.1 bias, the rounding error could cause an offset of -1
2018-12-10 11:23:31 -05:00
Jean-Marc Valin
d533e4024d Fix DCT normalization 2018-12-10 11:22:13 -05:00
Jean-Marc Valin
a02d9c85e5 reduce memory use of training code 2018-12-09 22:48:46 -05:00
Jean-Marc Valin
2facc08bb4 Avoid rounding the prediction multiple times 2018-12-09 21:20:36 -05:00
Jean-Marc Valin
3e71248298 Fix compile 2018-12-09 16:00:49 -05:00
Jean-Marc Valin
0fb031a921 Produce at least ~14 hours of augmented speech 2018-12-09 15:48:12 -05:00
Jean-Marc Valin
3dcbb012a0 Add variable gain and response 2018-12-09 15:48:12 -05:00
Jean-Marc Valin
f933725677 Chopping silence from the training data 2018-12-09 15:48:11 -05:00
Jean-Marc Valin
407eec127c make dump_data output LPC as converted from features 2018-12-09 15:45:34 -05:00
Jean-Marc Valin
e0d2b105a2 Compute LPC from features 2018-12-07 18:16:19 -05:00
Jean-Marc Valin
1dcd57323f Do proper saturation 2018-12-06 16:27:08 -05:00
Jean-Marc Valin
771cc7868a Support for plain AVX with no FMA 2018-12-04 07:58:13 -05:00
Jean-Marc Valin
91d90676e1 Remove the need for useless exc and pred files 2018-12-01 12:05:23 -05:00
Jean-Marc Valin
b05f950e38 Using the right name: s/gemm/sgemv/ 2018-11-30 10:56:44 -05:00
Jean-Marc Valin
c395a68b7d moving code around 2018-11-30 10:46:32 -05:00
Jean-Marc Valin
05f4851dcd Making the code work even without AVX2/FMA 2018-11-30 10:32:04 -05:00
Jean-Marc Valin
1956467d79 Add AVX2/FMA to gcc options 2018-11-30 01:55:23 -05:00
Jean-Marc Valin
d7f0abcd19 Delaying the softmax() to avoid the pow()
Now at 5x real-time, with all the low-hanging fruit done.
2018-11-29 20:09:36 -05:00
Jean-Marc Valin
faf3fe3d24 gemm_accum16() doesn't need a multiple of 16 columns (just lines). 2018-11-29 19:50:09 -05:00
Jean-Marc Valin
7ee79b63df Add AXV versions of exp(), tanh() and sigmoid()
Now 3x faster than real-time
2018-11-29 19:43:59 -05:00
Jean-Marc Valin
d961d009a0 Managing to actually use sparse matrices
Now 2x real-time!
2018-11-28 20:20:17 -05:00
Jean-Marc Valin
4de3e53a73 Adding some sparse GRU support
Still need to properly dump as sparse.
2018-11-28 18:49:19 -05:00
Jean-Marc Valin
ec671ed90e Quick and dirty AVX2 implementation of gemm_accum
Brings us very close to real-time
2018-11-28 14:57:22 -05:00
Jean-Marc Valin
15fb1b3c77 Moving GRU_A's condition computation to the frame rate network
Completes optimizations from Section 3.6 of the LPCNet paper.
2018-11-28 14:13:59 -05:00
Jean-Marc Valin
732fce9ab2 Pre-computing GRU_A's input contribution. 2018-11-28 14:05:36 -05:00
Jean-Marc Valin
040aa437c3 Simper GRU implementation just for reset_after. 2018-11-28 12:37:18 -05:00
Jean-Marc Valin
6c2f7e58fd compiling synthesis 2018-11-27 15:08:04 -05:00
Jean-Marc Valin
36a0bf8c75 Wow, managed two bugs in a 25-character line 2018-11-27 14:50:38 -05:00
Jean-Marc Valin
c7b978b923 Fix reset_after GRU 2018-11-27 14:37:10 -05:00
Jean-Marc Valin
3c694db226 Better rounding 2018-11-27 13:11:41 -05:00
Jean-Marc Valin
0ddfdfc7c0 Add deemphasis 2018-11-27 12:34:39 -05:00
Jean-Marc Valin
5ac0ac7acc Add code for copying the LPC 2018-11-27 11:44:04 -05:00
Jean-Marc Valin
e25a585de8 Match Python boundary condition 2018-11-27 00:23:26 -05:00
Jean-Marc Valin
4ccfbdff04 Frame network seems to be working 2018-11-26 18:41:54 -05:00
Jean-Marc Valin
538f25565a Starting to actually test this -- fix a few OOB reads 2018-11-26 16:02:49 -05:00
Jean-Marc Valin
8d62ba067e ... 2018-11-26 15:39:06 -05:00
Jean-Marc Valin
c0e8f37c8b Cleaning up the API 2018-11-26 13:12:17 -05:00
Jean-Marc Valin
575d8d6fa4 Adding sampling 2018-11-26 11:04:41 -05:00
Jean-Marc Valin
91c9524af3 Getting there 2018-11-26 02:49:25 -05:00
Jean-Marc Valin
4cf177412b More plumbing 2018-11-26 02:33:49 -05:00
Jean-Marc Valin
7119eaf33b Plumbing for the frame rate network 2018-11-25 17:20:24 -05:00
Jean-Marc Valin
70fdf47471 Copyright headers 2018-11-25 13:15:19 -05:00
Jean-Marc Valin
60c97b9723 Proper ordering for MDense weights 2018-11-24 16:19:02 -05:00
Jean-Marc Valin
141830ce5a Fixing includes 2018-11-24 16:00:30 -05:00
Jean-Marc Valin
37fbcaee0b mdense max size 2018-11-24 15:51:08 -05:00
Jean-Marc Valin
94ac0841df Precomputing sizes 2018-11-24 15:47:48 -05:00
Jean-Marc Valin
c025744e34 Fix conv1d, default to size 384 2018-11-24 15:30:17 -05:00
Jean-Marc Valin
66486004ba Implement MDense 2018-11-24 12:23:11 -05:00
Jean-Marc Valin
d4046036a9 Dump Conv1D (didn't check weight ordering at all) 2018-11-24 11:32:01 -05:00
Jean-Marc Valin
477d08734d Dump embedding 2018-11-23 23:33:35 -05:00
Jean-Marc Valin
d93239e955 Using non-cudnn version of the GRU for the weights
Not sure how the layout of the CuDNN version is
2018-11-23 20:07:42 -05:00
Jean-Marc Valin
b0c61158f7 More meaningful names 2018-11-23 19:51:34 -05:00
Jean-Marc Valin
b9cd61be8b Work in progress translation to C 2018-11-23 19:43:58 -05:00
Jean-Marc Valin
8caaa5e917 Output directly to 16-bit (raw) PCM 2018-11-07 03:26:10 -05:00
Jean-Marc Valin
cf926d11e0 Project name is LPCNet 2018-10-25 19:19:06 -04:00
David Rowe
bf04b53a44 Cleanup
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2018-10-25 16:19:45 -04:00
Jean-Marc Valin
db7569c3da moving hyper-parameters to new_lpcnet_model() arguments
more cleaning up too
2018-10-24 14:57:34 -04:00
Jean-Marc Valin
5ff58fa117 remove import that no longer exists 2018-10-24 14:09:05 -04:00
Jean-Marc Valin
92281bbe31 s/CELPNet/LPCNet/ 2018-10-23 23:10:41 -04:00
Jean-Marc Valin
7c28191b60 Rename the current files to use the LPCNet name since they're no longer WaveNet 2018-10-22 13:40:54 -04:00
Jean-Marc Valin
97dcf52a01 Remove no longer used files (old wavenet and LPCNet implementations) 2018-10-22 13:40:11 -04:00
Jean-Marc Valin
3122b6b3bc most promising model for now 2018-10-21 02:45:22 -04:00
Jean-Marc Valin
f9fe6c0ed8 clear pitch features in testing too 2018-10-21 02:45:22 -04:00
Jean-Marc Valin
4e331f377f LPC generation from the cepstral coefficients 2018-10-21 02:45:21 -04:00
Jean-Marc Valin
fb1d4fdec2 ... 2018-10-21 02:45:21 -04:00
Jean-Marc Valin
fa1d2824fa Add diagonal to sparsity mask 2018-10-21 02:45:21 -04:00
Jean-Marc Valin
f13debcf65 Arbitrary 16x1 sparseness 2018-10-21 02:45:21 -04:00
Jean-Marc Valin
62f330eca3 better training params 2018-10-16 01:00:52 -04:00
David Rowe
a263f7c1f5 Adding comments 2018-10-14 12:36:56 -04:00
David Rowe
beaa370a7f Adding a README.md
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
2018-10-14 12:33:44 -04:00
Jean-Marc Valin
d75b51b18a Reduce sampling temperature for voiced frames 2018-10-13 14:52:30 -04:00
Jean-Marc Valin
c74876bbc6 Adding some instructions 2018-10-13 03:54:58 -04:00
Jean-Marc Valin
f3eb616455 Cleaning up the synthesis code
Remove all kinds of useless code. Making it use all features continuously
and fixing a bug that skipped one every 15 frames.
2018-10-13 03:41:17 -04:00
Jean-Marc Valin
9756feefbd ... 2018-10-10 18:05:27 -04:00
Jean-Marc Valin
495f8ea5f3 second RNN 2018-10-10 18:05:27 -04:00
Jean-Marc Valin
3698977292 add license 2018-10-10 17:28:14 -04:00
Jean-Marc Valin
03fa20d532 remove unused/dead code 2018-10-09 12:27:02 -04:00
Jean-Marc Valin
a9835c4e5f more cleanup 2018-10-09 03:10:25 -04:00
Jean-Marc Valin
aba9af8bde mu-law code cleanup 2018-10-09 02:39:12 -04:00
Jean-Marc Valin
08211c279f missing script 2018-10-09 01:01:18 -04:00
Jean-Marc Valin
ea1391e174 deeper features 2018-10-03 22:30:44 -04:00
Jean-Marc Valin
639766b322 pitch embedding 2018-10-02 21:17:34 -04:00
Jean-Marc Valin
2d74d3189c ... 2018-10-02 18:26:42 -04:00
Jean-Marc Valin
c381db5688 Use excitation as input 2018-08-24 00:20:10 -04:00
Jean-Marc Valin
8f6e490ba2 clean excitation 2018-08-22 23:32:36 -04:00
Jean-Marc Valin
8a276fb44a predicting excitation 2018-08-21 19:17:27 -04:00
Jean-Marc Valin
a922f83cca Fix input noise 2018-08-21 13:02:26 -04:00
Jean-Marc Valin
08b5fe6cdc working decoder 2018-08-17 00:31:27 -04:00
Jean-Marc Valin
87cd75f6f4 Training seems to work 2018-08-16 22:43:13 -04:00
Jean-Marc Valin
785a2b2e84 Predicting pre-emphasized audio 2018-08-16 13:58:33 -04:00
Jean-Marc Valin
3d20cdaed4 Add prediction 2018-08-14 18:40:32 -04:00
Jean-Marc Valin
4fec1144f3 more pcm outputs 2018-08-03 01:59:29 -04:00
Jean-Marc Valin
70789e6f43 audio-domain synthesis 2018-07-31 18:37:27 -04:00
Jean-Marc Valin
4cf2b2705a fix ulaw2lin() 2018-07-29 01:41:18 -04:00
Jean-Marc Valin
2aba2a9c49 Add input embedding 2018-07-27 16:33:01 -04:00
Jean-Marc Valin
1837dad072 audio-domain version 2018-07-24 17:52:33 -04:00
Jean-Marc Valin
b6af21f31c wip... 2018-07-23 17:05:21 -04:00
Jean-Marc Valin
211435f5d3 Gated convolution 2018-07-13 17:10:03 -04:00
Jean-Marc Valin
0fa7150454 Implement FFTNet too 2018-07-13 14:19:27 -04:00
Jean-Marc Valin
f50058f3e3 first wavenet implementation 2018-07-13 02:44:43 -04:00
Jean-Marc Valin
374ba430c4 stashing stuff here 2018-07-12 18:20:25 -04:00
Jean-Marc Valin
679dfbab58 Fix NaN issue 2018-07-11 17:41:35 -04:00
Jean-Marc Valin
5d8a1313d6 decodes something... 2018-07-11 01:30:30 -04:00
Jean-Marc Valin
638252a965 wip 2018-07-10 13:56:59 -04:00
Jean-Marc Valin
824dbecaec decoder wip 2018-07-09 18:20:52 -04:00
Jean-Marc Valin
06511ba5a4 Add convolution 2018-06-26 22:52:24 -04:00
Jean-Marc Valin
f884d7bf18 add convolution 2018-06-26 17:14:29 -04:00
Jean-Marc Valin
fd9002e98e Adding pitch 2018-06-26 16:40:55 -04:00
Jean-Marc Valin
617e462be3 using features (except pitch gain which has NaNs for now) 2018-06-26 01:31:44 -04:00
Jean-Marc Valin
b65031ef64 excitation model 2018-06-25 16:26:47 -04:00
Jean-Marc Valin
cc1c52a63b saving features 2018-06-25 14:42:56 -04:00
Jean-Marc Valin
aa970f2a60 Computing features 2018-06-25 13:56:28 -04:00
Jean-Marc Valin
54710acfe7 fix pitch 2018-06-25 02:10:31 -04:00
Jean-Marc Valin
fd7a03bdad oops, fixes noise floor 2018-06-24 23:43:52 -04:00
Jean-Marc Valin
8ca8adae2b Add LPC analysis 2018-06-24 23:00:08 -04:00
Jean-Marc Valin
e018b6f152 cleanup 2018-06-24 03:48:51 -04:00
Jean-Marc Valin
61c6391c21 Importing DSP code from RNNoise 2018-06-24 02:41:36 -04:00
Jean-Marc Valin
c41afe41f0 initial commit 2018-06-21 20:45:54 -04:00
439 changed files with 47437 additions and 2866 deletions

View File

@ -1,37 +0,0 @@
image: Visual Studio 2015
configuration:
- Debug
- DebugDLL
- DebugDLL_fixed
- Release
- ReleaseDLL
- ReleaseDLL_fixed
platform:
- Win32
- x64
environment:
api_key:
secure: kR3Ac0NjGwFnTmXdFrR8d6VXjdk5F7L4F/BilC4nvaM=
build:
project: win32\VS2015\opus.sln
parallel: true
verbosity: minimal
after_build:
- cd %APPVEYOR_BUILD_FOLDER%
- 7z a opus.zip win32\VS2015\%PLATFORM%\%CONFIGURATION%\opus.??? include\*.h
test_script:
- cd %APPVEYOR_BUILD_FOLDER%\win32\VS2015\%PLATFORM%\%CONFIGURATION%
- test_opus_api.exe
- test_opus_decode.exe
- test_opus_encode.exe
artifacts:
- path: opus.zip
on_success:
- ps: if ($env:api_key -and "$env:configuration/$env:platform" -eq "ReleaseDLL_fixed/x64") { Start-AppveyorBuild -ApiKey $env:api_key -ProjectSlug 'opus-tools' }

51
.github/workflows/autotools.yml vendored Normal file
View File

@ -0,0 +1,51 @@
name: Autotools
on: [push, pull_request]
jobs:
AutoMakeBuild:
name: AutoMake/${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Linux/GCC",
os: ubuntu-latest,
compiler: gcc,
automakeconfig:
}
- {
name: "Linux/GCC/EnableAssertions",
os: ubuntu-latest,
compiler: gcc,
buildconfig: --enable-assertions
}
- {
name: "Linux/GCC/EnableCustomModes",
os: ubuntu-latest,
compiler: gcc,
buildconfig: --enable-assertions --enable-custom-modes
}
- {
name: "Linux/GCC/EnableDNN",
os: ubuntu-latest,
compiler: gcc,
buildconfig: --enable-assertions --enable-custom-modes --enable-dred --enable-osce
}
steps:
- uses: actions/checkout@v3
# No AutoMake on Mac so let's install it
- name: Install AutoConf, AutoMake and LibTool on MacOSX
if: matrix.config.os == 'macos-latest'
run: brew install autoconf automake libtool
- name: Autogen
run: CC=${{ matrix.config.compiler }} ./autogen.sh
- name: Configure
run: CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx ${{ matrix.config.buildconfig }}
- name: Build
run: make -j 2
- name: Test
run: make check -j 2

252
.github/workflows/cmake.yml vendored Normal file
View File

@ -0,0 +1,252 @@
name: CMake
on: [push, pull_request]
jobs:
CMakeVersionTest:
name: Test build with CMake 3.1.0
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Download models
run: ./autogen.sh
- name: Install CMake 3.1
run: |
curl -sL https://github.com/Kitware/CMake/releases/download/v3.1.0/cmake-3.1.0-Linux-x86_64.sh -o cmakeinstall.sh
chmod +x cmakeinstall.sh
sudo ./cmakeinstall.sh --prefix=/usr/local --exclude-subdir
rm cmakeinstall.sh
sudo apt-get install libidn11
- name: Create Work Dir
run: mkdir build
- name: Configure
working-directory: ./build
run: cmake .. -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON
- name: Build
working-directory: ./build
run: make -j 2 -s
- name: Test
working-directory: ./build
run: ctest -j 2
CMakeMINGW:
name: CMake MINGW
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Download models
run: ./autogen.sh
- name: Install MINGW
run: sudo apt-get install -y mingw-w64
- name: Create Work Dir
run: mkdir build
- name: Configure
working-directory: ./build
run: cmake .. -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DCMAKE_SYSTEM_NAME=Windows -DCMAKE_C_COMPILER=x86_64-w64-mingw32-gcc
- name: Build
working-directory: ./build
run: cmake --build . -j 2 --config Release --target package
CMakeBuild:
name: CMake/${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Android/So/ARMv8/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a -DBUILD_SHARED_LIBS=ON"
}
# TODO: Android ARMv7
# - {
# name: "Android/So/ARMv7/Release",
# os: ubuntu-latest,
# config: Release,
# args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=armeabi-v7a -DBUILD_SHARED_LIBS=ON"
# }
- {
name: "Android/So/X86/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86 -DBUILD_SHARED_LIBS=ON"
}
- {
name: "Android/So/X64/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64 -DBUILD_SHARED_LIBS=ON"
}
- {
name: "Windows/Dll/X86/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON
}
- {
name: "Windows/Dll/X64/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A Win32 -DBUILD_SHARED_LIBS=ON
}
- {
name: "Windows/Dll/ARMv8/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A ARM64 -DBUILD_SHARED_LIBS=ON
}
- {
name: "Linux/So/X64/Release",
os: ubuntu-latest,
config: Release,
args: -DBUILD_SHARED_LIBS=ON
}
- {
name: "MacOSX/So/X64/Release",
os: macos-latest,
config: Release,
args: -DBUILD_SHARED_LIBS=ON
}
- {
name: "MacOSX/Framework/X64/Release",
os: macos-latest,
config: Release,
args: -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install
}
# use unix makefiles for iOS to avoid Xcode to complain about signing.
- {
name: "iOS/Dll/arm64/Release",
os: macos-latest,
config: Release,
args: -G "Unix Makefiles" -DBUILD_SHARED_LIBS=ON -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
}
# use unix makefiles for iOS to avoid Xcode to complain about signing.
- {
name: "iOS/Framework/arm64/Release",
os: macos-latest,
config: Release,
args: -G "Unix Makefiles" -DBUILD_FRAMEWORK=ON -DCMAKE_INSTALL_PREFIX=install -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
}
- {
name: "Windows/Lib/X86/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A Win32
}
- {
name: "Windows/Lib/X64/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A Win32
}
- {
name: "Windows/Lib/armv8/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A ARM64
}
- {
name: "Linux/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: ""
}
- {
name: "MacOSX/Lib/X64/Release",
os: macos-latest,
config: Release,
args: ""
}
# use unix makefiles for iOS to avoid Xcode to complain about signing.
- {
name: "iOS/Lib/arm64/Release",
os: macos-latest,
config: Release,
args: -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
}
- {
name: "Android/Lib/ARMv8/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a"
}
# TODO: Android ARMv7
# - {
# name: "Android/Lib/ARMv7/Release",
# os: ubuntu-latest,
# config: Release,
# args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=armeabi-v7a"
# }
- {
name: "Android/Lib/X86/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86"
}
- {
name: "Android/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64"
}
- {
name: "CustomModes/Linux/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: "-DOPUS_CUSTOM_MODES=ON"
}
- {
name: "AssertionsFuzz/Windows/Lib/X64/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A Win32 -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
}
- {
name: "AssertionsFuzz/Linux/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
}
- {
name: "AssertionsFuzz/MacOSX/Lib/X64/Release",
os: macos-latest,
config: Release,
args: -DOPUS_ASSERTIONS=ON -DOPUS_FUZZING=ON
}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install AutoConf, AutoMake and LibTool # Needed for autogen.sh
if: matrix.config.os == 'macos-latest'
run: brew install autoconf automake libtool
- name: Download models Windows
if: contains(matrix.config.name, 'Windows')
run: .\autogen.bat
- name: Download models
if: contains(matrix.config.name, 'MacOSX') ||
contains(matrix.config.name, 'Linux') ||
contains(matrix.config.name, 'Android') ||
contains(matrix.config.name, 'iOS')
run: ./autogen.sh
- name: Create Work Dir
run: mkdir build
- name: Configure
working-directory: ./build
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON
- name: Build
working-directory: ./build
run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
- name: Test
if: contains(matrix.config.name, 'Windows') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
contains(matrix.config.name, 'MacOSX') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
contains(matrix.config.name, 'Linux') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll')
working-directory: ./build
run: ctest -j 2 -C ${{ matrix.config.config }} --output-on-failure

121
.github/workflows/dred.yml vendored Normal file
View File

@ -0,0 +1,121 @@
# Configs that enables Deep Redudancy (DRED)
name: DRED
on: [push, pull_request]
jobs:
CMakeBuild:
name: CMake/${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Windows/Lib/X64/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -DOPUS_X86_PRESUME_AVX2=ON
}
- {
name: "Windows/Lib/armv8/Release",
os: windows-latest,
config: Release,
args: -G "Visual Studio 17 2022" -A ARM64
}
- {
name: "Linux/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: -DOPUS_X86_PRESUME_AVX2=ON
}
- {
name: "Android/Lib/X64/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=x86_64"
}
- {
name: "Android/Lib/ARMv8/Release",
os: ubuntu-latest,
config: Release,
args: "-DCMAKE_TOOLCHAIN_FILE=${ANDROID_HOME}/ndk/25.2.9519653/build/cmake/android.toolchain.cmake -DANDROID_ABI=arm64-v8a"
}
- {
name: "MacOSX/Lib/X64/Release",
os: macos-latest,
config: Release,
# some macs are really old in githubs lab so they don't support avx
args: -DOPUS_X86_PRESUME_AVX2=OFF
}
- {
name: "iOS/Lib/arm64/Release",
os: macos-latest,
config: Release,
args: -G "Unix Makefiles" -DCMAKE_SYSTEM_NAME=iOS -DCMAKE_OSX_ARCHITECTURES=arm64
}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install AutoConf, AutoMake and LibTool # Needed for autogen.sh
if: matrix.config.os == 'macos-latest'
run: brew install autoconf automake libtool
- name: Download models Windows
if: contains(matrix.config.name, 'Windows')
run: .\autogen.bat
- name: Download models
if: contains(matrix.config.name, 'MacOSX') ||
contains(matrix.config.name, 'Linux') ||
contains(matrix.config.name, 'Android') ||
contains(matrix.config.name, 'iOS')
run: ./autogen.sh
- name: Create Work Dir
run: mkdir build
- name: Configure
working-directory: ./build
run: cmake .. ${{ matrix.config.args }} -DCMAKE_BUILD_TYPE=${{ matrix.config.config }} -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON
- name: Build
working-directory: ./build
run: cmake --build . -j 2 --config ${{ matrix.config.config }} --target package
- name: Test
if: contains(matrix.config.name, 'Windows') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
contains(matrix.config.name, 'MacOSX') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll') ||
contains(matrix.config.name, 'Linux') && !contains(matrix.config.name, 'ARM') && !contains(matrix.config.name, 'Dll')
working-directory: ./build
run: ctest -j 2 -C ${{ matrix.config.config }} --output-on-failure
AutoToolsBuild:
name: AutoTools/${{ matrix.config.name }}
runs-on: ${{ matrix.config.os }}
strategy:
fail-fast: false
matrix:
config:
- {
name: "Linux/GCC",
os: ubuntu-latest,
compiler: gcc,
automakeconfig:
}
- {
name: "Linux/Clang",
os: ubuntu-latest,
compiler: clang,
automakeconfig:
}
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install AutoConf, AutoMake and LibTool on MacOSX
if: matrix.config.os == 'macos-latest'
run: brew install autoconf automake libtool
- name: Autogen
run: CC=${{ matrix.config.compiler }} ./autogen.sh
- name: Configure
run: CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx
- name: Build
run: make -j 2
- name: Test
run: make check -j 2

15
.github/workflows/repository.yml vendored Normal file
View File

@ -0,0 +1,15 @@
name: Repository
on: [push, pull_request]
jobs:
CheckTrailingWhiteSpaces:
name: Check trailing white spaces
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Check Whitespaces
run: |
git diff-tree --check origin/opus-ng HEAD

1
.gitignore vendored
View File

@ -49,6 +49,7 @@ tests/*test
tests/test_opus_api
tests/test_opus_decode
tests/test_opus_encode
tests/test_opus_extensions
tests/test_opus_padding
tests/test_opus_projection
celt/arm/armopts.s

View File

@ -56,15 +56,17 @@ ci-fairy:
autoconf:
stage: build
tags:
- avx2
before_script:
- apt-get update &&
apt-get install -y zip doxygen git automake libtool make
apt-get install -y zip doxygen git automake libtool make wget
- !reference [.snippets, git_prep]
script:
- ./autogen.sh
- ./configure
- make -j4
- make distcheck
- CFLAGS="-mavx -mfma -mavx2 -O2 -ffast-math" ./configure --enable-float-approx --enable-dred --enable-osce
- make -j16
- DISTCHECK_CONFIGURE_FLAGS="--enable-float-approx --enable-dred --enable-osce CFLAGS='-mavx -mfma -mavx2 -O2'" make distcheck -j16
cache:
paths:
- "src/*.o"
@ -76,32 +78,38 @@ autoconf:
cmake:
stage: build
tags:
- avx2
before_script:
- apt-get update &&
apt-get install -y cmake ninja-build git
apt-get install -y cmake ninja-build git automake libtool wget
- !reference [.snippets, git_prep]
script:
- ./autogen.sh
- mkdir build
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_TESTING=ON -DOPUS_BUILD_PROGRAMS=ON
- cmake -S . -B build -G "Ninja" -DCMAKE_BUILD_TYPE=Release -DOPUS_BUILD_PROGRAMS=ON -DBUILD_TESTING=ON -DOPUS_FAST_MATH=ON -DOPUS_FLOAT_APPROX=ON -DOPUS_DRED=ON -DOPUS_OSCE=ON -DOPUS_X86_PRESUME_AVX2=ON
- cmake --build build
- cd build && ctest --output-on-failure
- cd build && ctest --output-on-failure -j 16
.meson:
image: 'debian:bookworm-slim'
stage: build
before_script:
- apt-get update &&
apt-get install -y ninja-build doxygen meson git
apt-get install -y ninja-build doxygen meson git automake libtool wget
- !reference [.snippets, git_prep]
script:
- ./autogen.sh
- mkdir builddir
- meson setup -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir ${MESON_EXTRA_ARGS}
- meson setup -Denable-deep-plc=true -Denable-osce=true -Denable-dred=true -Dtests=enabled -Ddocs=enabled -Dbuildtype=release builddir
- meson compile -C builddir
- meson test -C builddir
#- meson dist --no-tests -C builddir
meson x86_64:
extends: '.meson'
tags:
- avx2
variables:
MESON_EXTRA_ARGS: '--werror'

0
.gitmodules vendored Normal file
View File

View File

@ -1,21 +0,0 @@
language: c
compiler:
- gcc
- clang
os:
- linux
- osx
env:
- CONFIG=""
- CONFIG="--enable-assertions"
- CONFIG="--enable-fixed-point"
- CONFIG="--enable-fixed-point --disable-float-api"
- CONFIG="--enable-fixed-point --enable-assertions"
script:
- ./autogen.sh
- ./configure $CONFIG
- make distcheck

View File

@ -71,6 +71,10 @@ set(OPUS_CHECK_ASM_HELP_STR "enable bit-exactness checks between optimized and c
option(OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR} OFF)
add_feature_info(OPUS_CHECK_ASM OPUS_CHECK_ASM ${OPUS_CHECK_ASM_HELP_STR})
set(OPUS_DNN_FLOAT_DEBUG_HELP_STR "Run DNN computations as float for debugging purposes.")
option(OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR} OFF)
add_feature_info(OPUS_DNN_FLOAT_DEBUG OPUS_DNN_FLOAT_DEBUG ${OPUS_DNN_FLOAT_DEBUG_HELP_STR})
set(OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR "install pkg-config module.")
option(OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR} ON)
add_feature_info(OPUS_INSTALL_PKG_CONFIG_MODULE OPUS_INSTALL_PKG_CONFIG_MODULE ${OPUS_INSTALL_PKG_CONFIG_MODULE_HELP_STR})
@ -79,6 +83,14 @@ set(OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR "install CMake package config modu
option(OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR} ON)
add_feature_info(OPUS_INSTALL_CMAKE_CONFIG_MODULE OPUS_INSTALL_CMAKE_CONFIG_MODULE ${OPUS_INSTALL_CMAKE_CONFIG_MODULE_HELP_STR})
set(OPUS_DRED_HELP_STR "enable DRED.")
option(OPUS_DRED ${OPUS_DRED_HELP_STR} OFF)
add_feature_info(OPUS_DRED OPUS_DRED ${OPUS_DRED_HELP_STR})
set(OPUS_OSCE_HELP_STR "enable OSCE.")
option(OPUS_OSCE ${OPUS_OSCE_HELP_STR} OFF)
add_feature_info(OPUS_OSCE OPUS_OSCE ${OPUS_OSCE_HELP_STR})
if(APPLE)
set(OPUS_BUILD_FRAMEWORK_HELP_STR "build Framework bundle for Apple systems.")
option(OPUS_BUILD_FRAMEWORK ${OPUS_BUILD_FRAMEWORK_HELP_STR} OFF)
@ -173,13 +185,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
OFF)
add_feature_info(OPUS_X86_MAY_HAVE_SSE4_1 OPUS_X86_MAY_HAVE_SSE4_1 ${OPUS_X86_MAY_HAVE_SSE4_1_HELP_STR})
set(OPUS_X86_MAY_HAVE_AVX_HELP_STR "does runtime check for AVX support.")
cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX
${OPUS_X86_MAY_HAVE_AVX_HELP_STR}
set(OPUS_X86_MAY_HAVE_AVX2_HELP_STR "does runtime check for AVX FMA AVX2 support.")
cmake_dependent_option(OPUS_X86_MAY_HAVE_AVX2
${OPUS_X86_MAY_HAVE_AVX2_HELP_STR}
ON
"AVX_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
"AVX2_SUPPORTED; NOT OPUS_DISABLE_INTRINSICS"
OFF)
add_feature_info(OPUS_X86_MAY_HAVE_AVX OPUS_X86_MAY_HAVE_AVX ${OPUS_X86_MAY_HAVE_AVX_HELP_STR})
add_feature_info(OPUS_X86_MAY_HAVE_AVX2 OPUS_X86_MAY_HAVE_AVX2 ${OPUS_X86_MAY_HAVE_AVX2_HELP_STR})
# PRESUME depends on MAY HAVE, but PRESUME will override runtime detection
set(OPUS_X86_PRESUME_SSE_HELP_STR "assume target CPU has SSE1 support (override runtime check).")
@ -220,13 +232,13 @@ if(OPUS_CPU_X86 OR OPUS_CPU_X64)
OFF)
add_feature_info(OPUS_X86_PRESUME_SSE4_1 OPUS_X86_PRESUME_SSE4_1 ${OPUS_X86_PRESUME_SSE4_1_HELP_STR})
set(OPUS_X86_PRESUME_AVX_HELP_STR "assume target CPU has AVX support (override runtime check).")
cmake_dependent_option(OPUS_X86_PRESUME_AVX
${OPUS_X86_PRESUME_AVX_HELP_STR}
set(OPUS_X86_PRESUME_AVX2_HELP_STR "assume target CPU has AVX FMA AVX2 support (override runtime check).")
cmake_dependent_option(OPUS_X86_PRESUME_AVX2
${OPUS_X86_PRESUME_AVX2_HELP_STR}
OFF
"OPUS_X86_MAY_HAVE_AVX; NOT OPUS_DISABLE_INTRINSICS"
"OPUS_X86_MAY_HAVE_AVX2; NOT OPUS_DISABLE_INTRINSICS"
OFF)
add_feature_info(OPUS_X86_PRESUME_AVX OPUS_X86_PRESUME_AVX ${OPUS_X86_PRESUME_AVX_HELP_STR})
add_feature_info(OPUS_X86_PRESUME_AVX2 OPUS_X86_PRESUME_AVX2 ${OPUS_X86_PRESUME_AVX2_HELP_STR})
endif()
feature_summary(WHAT ALL)
@ -274,6 +286,7 @@ target_include_directories(
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/opus>
PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/dnn
celt
silk)
@ -309,6 +322,10 @@ if(OPUS_CHECK_ASM)
target_compile_definitions(opus PRIVATE OPUS_CHECK_ASM)
endif()
if(NOT OPUS_DNN_FLOAT_DEBUG)
target_compile_definitions(opus PRIVATE DISABLE_DEBUG_FLOAT)
endif()
if(OPUS_VAR_ARRAYS)
target_compile_definitions(opus PRIVATE VAR_ARRAYS)
elseif(OPUS_USE_ALLOCA)
@ -365,11 +382,33 @@ if(NOT OPUS_ENABLE_FLOAT_API)
target_compile_definitions(opus PRIVATE DISABLE_FLOAT_API)
endif()
if (OPUS_DEEP_PLC OR OPUS_DRED OR OPUS_OSCE)
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
set(OPUS_DNN TRUE)
else()
set(OPUS_DNN FALSE)
endif()
if (OPUS_DNN)
add_sources_group(opus lpcnet ${deep_plc_headers} ${deep_plc_sources})
target_compile_definitions(opus PRIVATE ENABLE_DEEP_PLC)
endif()
if (OPUS_DRED)
add_sources_group(opus lpcnet ${dred_headers} ${dred_sources})
target_compile_definitions(opus PRIVATE ENABLE_DRED)
endif()
if (OPUS_OSCE)
add_sources_group(opus lpcnet ${osce_headers} ${osce_sources})
target_compile_definitions(opus PRIVATE ENABLE_OSCE)
endif()
if(NOT OPUS_DISABLE_INTRINSICS)
if(((OPUS_X86_MAY_HAVE_SSE AND NOT OPUS_X86_PRESUME_SSE) OR
(OPUS_X86_MAY_HAVE_SSE2 AND NOT OPUS_X86_PRESUME_SSE2) OR
(OPUS_X86_MAY_HAVE_SSE4_1 AND NOT OPUS_X86_PRESUME_SSE4_1) OR
(OPUS_X86_MAY_HAVE_AVX AND NOT OPUS_X86_PRESUME_AVX)) AND
(OPUS_X86_MAY_HAVE_AVX2 AND NOT OPUS_X86_PRESUME_AVX2)) AND
RUNTIME_CPU_CAPABILITY_DETECTION)
target_compile_definitions(opus PRIVATE OPUS_HAVE_RTCD)
if(NOT MSVC)
@ -383,6 +422,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
endif()
add_sources_group(opus celt ${celt_sources_x86_rtcd})
add_sources_group(opus silk ${silk_sources_x86_rtcd})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_x86_rtcd})
endif()
endif()
if(SSE1_SUPPORTED)
@ -404,9 +446,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(SSE2_SUPPORTED)
if(OPUS_X86_MAY_HAVE_SSE2)
add_sources_group(opus celt ${celt_sources_sse2})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_sse2})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE2)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
set_source_files_properties(${celt_sources_sse2} ${dnn_sources_sse2} PROPERTIES COMPILE_FLAGS -msse2)
endif()
endif()
if(OPUS_X86_PRESUME_SSE2)
@ -421,9 +466,12 @@ if(NOT OPUS_DISABLE_INTRINSICS)
if(OPUS_X86_MAY_HAVE_SSE4_1)
add_sources_group(opus celt ${celt_sources_sse4_1})
add_sources_group(opus silk ${silk_sources_sse4_1})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_sse4_1})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_SSE4_1)
if(NOT MSVC)
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
set_source_files_properties(${celt_sources_sse4_1} ${silk_sources_sse4_1} ${dnn_sources_sse4_1} PROPERTIES COMPILE_FLAGS -msse4.1)
endif()
if(OPUS_FIXED_POINT)
@ -441,22 +489,37 @@ if(NOT OPUS_DISABLE_INTRINSICS)
endif()
endif()
if(AVX_SUPPORTED)
# mostly placeholder in case of avx intrinsics is added
if(OPUS_X86_MAY_HAVE_AVX)
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX)
if(AVX2_SUPPORTED)
if(OPUS_X86_MAY_HAVE_AVX2)
add_sources_group(opus celt ${celt_sources_avx2})
add_sources_group(opus silk ${silk_sources_avx2})
add_sources_group(opus silk ${silk_sources_float_avx2})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_avx2})
endif()
target_compile_definitions(opus PRIVATE OPUS_X86_MAY_HAVE_AVX2)
if(MSVC)
set(AVX2_FLAGS "${AVX2_FLAGS} /arch:AVX2")
else()
set(AVX2_FLAGS "${AVX2_FLAGS} -mavx2 -mfma -mavx")
endif()
set_source_files_properties(${celt_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
set_source_files_properties(${silk_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
set_source_files_properties(${silk_sources_float_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
set_source_files_properties(${dnn_sources_avx2} PROPERTIES COMPILE_FLAGS ${AVX2_FLAGS})
endif()
if(OPUS_X86_PRESUME_AVX)
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX)
if(OPUS_X86_PRESUME_AVX2)
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_AVX2)
target_compile_definitions(opus PRIVATE OPUS_X86_PRESUME_SSE4_1)
if(NOT MSVC)
target_compile_options(opus PRIVATE -mavx)
target_compile_options(opus PRIVATE -mavx2 -mfma -mavx)
endif()
endif()
endif()
if(MSVC)
if(AVX_SUPPORTED AND OPUS_X86_PRESUME_AVX) # on 64 bit and 32 bits
add_definitions(/arch:AVX)
if(AVX2_SUPPORTED AND OPUS_X86_PRESUME_AVX2) # on 64 bit and 32 bits
add_definitions(/arch:AVX2)
elseif(OPUS_CPU_X86) # if AVX not supported then set SSE flag
if((SSE4_1_SUPPORTED AND OPUS_X86_PRESUME_SSE4_1)
OR (SSE2_SUPPORTED AND OPUS_X86_PRESUME_SSE2))
@ -486,6 +549,9 @@ if(NOT OPUS_DISABLE_INTRINSICS)
add_sources_group(opus celt ${celt_sources_arm_neon_intr})
add_sources_group(opus silk ${silk_sources_arm_neon_intr})
if (OPUS_DNN)
add_sources_group(opus lpcnet ${dnn_sources_arm_neon})
endif()
# silk arm neon depends on main_Fix.h
target_include_directories(opus PRIVATE silk/fixed)
@ -582,6 +648,7 @@ if(OPUS_BUILD_PROGRAMS)
target_include_directories(opus_demo PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_include_directories(opus_demo PRIVATE silk) # debug.h
target_include_directories(opus_demo PRIVATE celt) # arch.h
target_include_directories(opus_demo PRIVATE dnn)
target_link_libraries(opus_demo PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
target_compile_definitions(opus_demo PRIVATE OPUS_BUILD)
@ -589,10 +656,6 @@ if(OPUS_BUILD_PROGRAMS)
add_executable(opus_compare ${opus_compare_sources})
target_include_directories(opus_compare PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(opus_compare PRIVATE opus ${OPUS_REQUIRED_LIBRARIES})
if(MSVC)
# move cosmetic warning to level 4 for opus_compare
target_compile_options(opus_compare PRIVATE /w44244)
endif()
endif()
if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
@ -636,11 +699,32 @@ if(BUILD_TESTING AND NOT BUILD_SHARED_LIBS)
add_executable(test_opus_encode ${test_opus_encode_sources})
target_include_directories(test_opus_encode
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt)
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
target_link_libraries(test_opus_encode PRIVATE opus)
target_compile_definitions(test_opus_encode PRIVATE OPUS_BUILD)
add_test(NAME test_opus_encode COMMAND ${CMAKE_COMMAND}
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_encode>
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
add_executable(test_opus_extensions ${test_opus_extensions_sources})
target_include_directories(test_opus_extensions
PRIVATE ${CMAKE_CURRENT_BINARY_DIR} celt dnn)
target_link_libraries(test_opus_extensions PRIVATE opus)
target_compile_definitions(test_opus_extensions PRIVATE OPUS_BUILD)
add_test(NAME test_opus_extensions COMMAND ${CMAKE_COMMAND}
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_extensions>
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
if(OPUS_DRED)
add_executable(test_opus_dred ${test_opus_dred_sources})
target_include_directories(test_opus_dred
PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
target_link_libraries(test_opus_dred PRIVATE opus)
target_compile_definitions(test_opus_dred PRIVATE OPUS_BUILD)
add_test(NAME test_opus_dred COMMAND ${CMAKE_COMMAND}
-DTEST_EXECUTABLE=$<TARGET_FILE:test_opus_dred>
-DCMAKE_SYSTEM_NAME=${CMAKE_SYSTEM_NAME}
-P "${PROJECT_SOURCE_DIR}/cmake/RunTest.cmake")
endif()
endif()

View File

@ -1,7 +1,7 @@
Copyright 2001-2011 Xiph.Org, Skype Limited, Octasic,
Copyright 2001-2023 Xiph.Org, Skype Limited, Octasic,
Jean-Marc Valin, Timothy B. Terriberry,
CSIRO, Gregory Maxwell, Mark Borgerding,
Erik de Castro Lopo
Erik de Castro Lopo, Mozilla, Amazon
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions

View File

@ -10,12 +10,25 @@ lib_LTLIBRARIES = libopus.la
DIST_SUBDIRS = doc
AM_CPPFLAGS = -I$(top_srcdir)/include -I$(top_srcdir)/celt -I$(top_srcdir)/silk \
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS)
-I$(top_srcdir)/silk/float -I$(top_srcdir)/silk/fixed $(NE10_CFLAGS) \
-I$(top_srcdir)/dnn
include celt_sources.mk
include lpcnet_sources.mk
include silk_sources.mk
include opus_sources.mk
LPCNET_SOURCES =
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DEEP_PLC_SOURCES)
endif
if ENABLE_DRED
LPCNET_SOURCES += $(DRED_SOURCES)
endif
if ENABLE_OSCE
LPCNET_SOURCES += $(OSCE_SOURCES)
endif
if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
if HAVE_SSE4_1
@ -29,6 +42,9 @@ SILK_SOURCES += $(SILK_SOURCES_FLOAT)
if HAVE_SSE4_1
SILK_SOURCES += $(SILK_SOURCES_SSE4_1)
endif
if HAVE_AVX2
SILK_SOURCES += $(SILK_SOURCES_FLOAT_AVX2)
endif
endif
if DISABLE_FLOAT_API
@ -40,15 +56,31 @@ if CPU_X86
if HAVE_RTCD
CELT_SOURCES += $(CELT_SOURCES_X86_RTCD)
SILK_SOURCES += $(SILK_SOURCES_X86_RTCD)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_X86_RTCD)
endif
endif
if HAVE_SSE
CELT_SOURCES += $(CELT_SOURCES_SSE)
endif
if HAVE_SSE2
CELT_SOURCES += $(CELT_SOURCES_SSE2)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_SSE2)
endif
endif
if HAVE_SSE4_1
CELT_SOURCES += $(CELT_SOURCES_SSE4_1)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_SSE4_1)
endif
endif
if HAVE_AVX2
SILK_SOURCES += $(SILK_SOURCES_AVX2)
CELT_SOURCES += $(CELT_SOURCES_AVX2)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_AVX2)
endif
endif
endif
@ -56,6 +88,18 @@ if CPU_ARM
if HAVE_RTCD
CELT_SOURCES += $(CELT_SOURCES_ARM_RTCD)
SILK_SOURCES += $(SILK_SOURCES_ARM_RTCD)
if ENABLE_DEEP_PLC
LPCNET_SOURCES += $(DNN_SOURCES_ARM_RTCD)
endif
endif
if ENABLE_DEEP_PLC
if HAVE_ARM_DOTPROD
LPCNET_SOURCES += $(DNN_SOURCES_DOTPROD)
endif
if HAVE_ARM_NEON_INTR
LPCNET_SOURCES += $(DNN_SOURCES_NEON)
endif
endif
if HAVE_ARM_NEON_INTR
@ -80,10 +124,25 @@ CLEANFILES = $(CELT_SOURCES_ARM_ASM:.s=-gnu.S) \
$(CELT_AM_SOURCES_ARM_ASM:.s.in=-gnu.S)
include celt_headers.mk
include lpcnet_headers.mk
include silk_headers.mk
include opus_headers.mk
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(OPUS_SOURCES)
LPCNET_HEAD =
if ENABLE_DEEP_PLC
LPCNET_HEAD += $(DEEP_PLC_HEAD)
endif
if ENABLE_DRED
LPCNET_HEAD += $(DRED_HEAD)
endif
if ENABLE_OSCE
LPCNET_HEAD += $(OSCE_HEAD)
endif
if ENABLE_LOSSGEN
LPCNET_HEAD += $(LOSSGEN_HEAD)
endif
libopus_la_SOURCES = $(CELT_SOURCES) $(SILK_SOURCES) $(LPCNET_SOURCES) $(OPUS_SOURCES)
libopus_la_LDFLAGS = -no-undefined -version-info @OPUS_LT_CURRENT@:@OPUS_LT_REVISION@:@OPUS_LT_AGE@
libopus_la_LIBADD = $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
@ -92,7 +151,7 @@ endif
pkginclude_HEADERS = include/opus.h include/opus_multistream.h include/opus_types.h include/opus_defines.h include/opus_projection.h
noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD)
noinst_HEADERS = $(OPUS_HEAD) $(SILK_HEAD) $(CELT_HEAD) $(LPCNET_HEAD)
if EXTRA_PROGRAMS
noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
@ -109,7 +168,9 @@ noinst_PROGRAMS = celt/tests/test_unit_cwrs32 \
silk/tests/test_unit_LPC_inv_pred_gain \
tests/test_opus_api \
tests/test_opus_decode \
tests/test_opus_dred \
tests/test_opus_encode \
tests/test_opus_extensions \
tests/test_opus_padding \
tests/test_opus_projection \
trivial_example
@ -126,10 +187,14 @@ TESTS = celt/tests/test_unit_cwrs32 \
tests/test_opus_api \
tests/test_opus_decode \
tests/test_opus_encode \
tests/test_opus_extensions \
tests/test_opus_padding \
tests/test_opus_projection
opus_demo_SOURCES = src/opus_demo.c
if ENABLE_LOSSGEN
opus_demo_SOURCES += $(LOSSGEN_SOURCES)
endif
opus_demo_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
@ -155,18 +220,28 @@ tests_test_opus_decode_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_padding_SOURCES = tests/test_opus_padding.c tests/test_opus_common.h
tests_test_opus_padding_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
tests_test_opus_dred_SOURCES = tests/test_opus_dred.c tests/test_opus_common.h
tests_test_opus_dred_LDADD = libopus.la $(NE10_LIBS) $(LIBM)
CELT_OBJ = $(CELT_SOURCES:.c=.lo)
SILK_OBJ = $(SILK_SOURCES:.c=.lo)
LPCNET_OBJ = $(LPCNET_SOURCES:.c=.lo)
OPUS_OBJ = $(OPUS_SOURCES:.c=.lo)
tests_test_opus_extensions_SOURCES = tests/test_opus_extensions.c tests/test_opus_common.h
tests_test_opus_extensions_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
tests_test_opus_extensions_LDADD += libarmasm.la
endif
tests_test_opus_projection_SOURCES = tests/test_opus_projection.c tests/test_opus_common.h
tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
tests_test_opus_projection_LDADD = $(OPUS_OBJ) $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
tests_test_opus_projection_LDADD += libarmasm.la
endif
silk_tests_test_unit_LPC_inv_pred_gain_SOURCES = silk/tests/test_unit_LPC_inv_pred_gain.c
silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
silk_tests_test_unit_LPC_inv_pred_gain_LDADD = $(SILK_OBJ) $(LPCNET_OBJ) $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
silk_tests_test_unit_LPC_inv_pred_gain_LDADD += libarmasm.la
endif
@ -175,7 +250,7 @@ celt_tests_test_unit_cwrs32_SOURCES = celt/tests/test_unit_cwrs32.c
celt_tests_test_unit_cwrs32_LDADD = $(LIBM)
celt_tests_test_unit_dft_SOURCES = celt/tests/test_unit_dft.c
celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_dft_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_dft_LDADD += libarmasm.la
endif
@ -187,19 +262,19 @@ celt_tests_test_unit_laplace_SOURCES = celt/tests/test_unit_laplace.c
celt_tests_test_unit_laplace_LDADD = $(LIBM)
celt_tests_test_unit_mathops_SOURCES = celt/tests/test_unit_mathops.c
celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_mathops_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_mathops_LDADD += libarmasm.la
endif
celt_tests_test_unit_mdct_SOURCES = celt/tests/test_unit_mdct.c
celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_mdct_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_mdct_LDADD += libarmasm.la
endif
celt_tests_test_unit_rotation_SOURCES = celt/tests/test_unit_rotation.c
celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(NE10_LIBS) $(LIBM)
celt_tests_test_unit_rotation_LDADD = $(CELT_OBJ) $(LPCNET_OBJ) $(NE10_LIBS) $(LIBM)
if OPUS_ARM_EXTERNAL_ASM
celt_tests_test_unit_rotation_LDADD += libarmasm.la
endif
@ -217,6 +292,24 @@ opus_custom_demo_LDADD = libopus.la $(LIBM)
endif
endif
if EXTRA_PROGRAMS
if ENABLE_DEEP_PLC
noinst_PROGRAMS += lpcnet_demo dump_data dump_weights_blob
lpcnet_demo_SOURCES = dnn/lpcnet_demo.c
lpcnet_demo_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
dump_data_SOURCES = dnn/dump_data.c
dump_data_LDADD = $(LPCNET_OBJ) $(CELT_OBJ) $(LIBM)
dump_weights_blob_SOURCES = dnn/write_lpcnet_weights.c
dump_weights_blob_LDADD = $(LIBM)
dump_weights_blob_CFLAGS = $(AM_CFLAGS) -DDUMP_BINARY_WEIGHTS
endif
if ENABLE_DRED
TESTS += tests/test_opus_dred
endif
endif
EXTRA_DIST = opus.pc.in \
opus-uninstalled.pc.in \
opus.m4 \
@ -249,21 +342,7 @@ EXTRA_DIST = opus.pc.in \
doc/meson.build \
tests/run_vectors.sh \
celt/arm/arm2gnu.pl \
celt/arm/celt_pitch_xcorr_arm.s \
win32/VS2015/opus.vcxproj \
win32/VS2015/test_opus_encode.vcxproj.filters \
win32/VS2015/test_opus_encode.vcxproj \
win32/VS2015/opus_demo.vcxproj \
win32/VS2015/test_opus_api.vcxproj.filters \
win32/VS2015/test_opus_api.vcxproj \
win32/VS2015/test_opus_decode.vcxproj.filters \
win32/VS2015/opus_demo.vcxproj.filters \
win32/VS2015/opus.vcxproj.filters \
win32/VS2015/test_opus_decode.vcxproj \
win32/VS2015/opus.sln \
win32/VS2015/common.props \
win32/genversion.bat \
win32/config.h
celt/arm/celt_pitch_xcorr_arm.s
pkgconfigdir = $(libdir)/pkgconfig
pkgconfig_DATA = opus.pc
@ -362,21 +441,37 @@ $(SSE_OBJ): CFLAGS += $(OPUS_X86_SSE_CFLAGS)
endif
if HAVE_SSE2
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo)
SSE2_OBJ = $(CELT_SOURCES_SSE2:.c=.lo) \
$(DNN_SOURCES_SSE2:.c=.lo)
$(SSE2_OBJ): CFLAGS += $(OPUS_X86_SSE2_CFLAGS)
endif
if HAVE_SSE4_1
SSE4_1_OBJ = $(CELT_SOURCES_SSE4_1:.c=.lo) \
$(DNN_SOURCES_SSE4_1:.c=.lo) \
$(SILK_SOURCES_SSE4_1:.c=.lo) \
$(SILK_SOURCES_FIXED_SSE4_1:.c=.lo)
$(SSE4_1_OBJ): CFLAGS += $(OPUS_X86_SSE4_1_CFLAGS)
endif
if HAVE_AVX2
AVX2_OBJ = $(CELT_SOURCES_AVX2:.c=.lo) \
$(SILK_SOURCES_AVX2:.c=.lo) \
$(SILK_SOURCES_FLOAT_AVX2:.c=.lo) \
$(DNN_SOURCES_AVX2:.c=.lo)
$(AVX2_OBJ): CFLAGS += $(OPUS_X86_AVX2_CFLAGS)
endif
if HAVE_ARM_NEON_INTR
ARM_NEON_INTR_OBJ = $(CELT_SOURCES_ARM_NEON_INTR:.c=.lo) \
$(SILK_SOURCES_ARM_NEON_INTR:.c=.lo) \
$(DNN_SOURCES_NEON:.c=.lo) \
$(SILK_SOURCES_FIXED_ARM_NEON_INTR:.c=.lo)
$(ARM_NEON_INTR_OBJ): CFLAGS += \
$(OPUS_ARM_NEON_INTR_CFLAGS) $(NE10_CFLAGS)
endif
if HAVE_ARM_DOTPROD
ARM_DOTPROD_OBJ = $(DNN_SOURCES_DOTPROD:.c=.lo)
$(ARM_DOTPROD_OBJ): CFLAGS += $(ARM_DOTPROD_INTR_CFLAGS)
endif

View File

@ -102,13 +102,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))
TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))
OPUSCOMPARE_SRCS_C = src/opus_compare.c
OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding
# Rules
all: lib opus_demo opus_compare $(TESTS)
@ -133,6 +136,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
$(LINK.o.cmdline)
test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
$(LINK.o.cmdline)
test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
$(LINK.o.cmdline)
@ -154,8 +160,10 @@ force:
clean:
rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
test_opus_padding$(EXESUFFIX)
$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
$(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)
.PHONY: all lib clean force check

View File

@ -100,13 +100,16 @@ TESTOPUSDECODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSDECODE_SRCS_C))
TESTOPUSENCODE_SRCS_C = tests/test_opus_encode.c tests/opus_encode_regressions.c
TESTOPUSENCODE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSENCODE_SRCS_C))
TESTOPUSEXTENSIONS_SRCS_C = tests/test_opus_extensions.c
TESTOPUSEXTENSIONS_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSEXTENSIONS_SRCS_C))
TESTOPUSPADDING_SRCS_C = tests/test_opus_padding.c
TESTOPUSPADDING_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(TESTOPUSPADDING_SRCS_C))
OPUSCOMPARE_SRCS_C = src/opus_compare.c
OPUSCOMPARE_OBJS := $(patsubst %.c,%$(OBJSUFFIX),$(OPUSCOMPARE_SRCS_C))
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_padding
TESTS := test_opus_api test_opus_decode test_opus_encode test_opus_extensions test_opus_padding
# Rules
all: lib opus_demo opus_compare $(TESTS)
@ -131,6 +134,9 @@ test_opus_decode$(EXESUFFIX): $(TESTOPUSDECODE_OBJS) $(TARGET)
test_opus_encode$(EXESUFFIX): $(TESTOPUSENCODE_OBJS) $(TARGET)
$(LINK.o.cmdline)
test_opus_extensions$(EXESUFFIX): $(TESTOPUSEXTENSIONS_OBJS) $(TARGET)
$(LINK.o.cmdline)
test_opus_padding$(EXESUFFIX): $(TESTOPUSPADDING_OBJS) $(TARGET)
$(LINK.o.cmdline)
@ -152,8 +158,10 @@ force:
clean:
rm -f opus_demo$(EXESUFFIX) opus_compare$(EXESUFFIX) $(TARGET) \
test_opus_api$(EXESUFFIX) test_opus_decode$(EXESUFFIX) \
test_opus_encode$(EXESUFFIX) test_opus_padding$(EXESUFFIX) \
test_opus_encode$(EXESUFFIX) test_opus_extensions$(EXESUFFIX) \
test_opus_padding$(EXESUFFIX)
$(OBJS) $(OPUSDEMO_OBJS) $(OPUSCOMPARE_OBJS) $(TESTOPUSAPI_OBJS) \
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) $(TESTOPUSPADDING_OBJS)
$(TESTOPUSDECODE_OBJS) $(TESTOPUSENCODE_OBJS) \
$(TESTOPUSEXTENSIONS_OBJS) $(TESTOPUSPADDING_OBJS)
.PHONY: all lib clean force check

6
README
View File

@ -77,6 +77,8 @@ On Apple macOS, install Xcode and brew.sh, then in the Terminal enter:
% ./configure
% make
On x86, it's a good idea to use a -march= option that allows the use of AVX2.
3) Install the codec libraries (optional)
% sudo make install
@ -133,6 +135,10 @@ To run compare the code to these test vectors:
% tar -zxf opus_testvectors-rfc8251.tar.gz
% ./tests/run_vectors.sh ./ opus_newvectors 48000
== Compiling libopus for Windows and alternative build systems ==
See cmake/README.md or meson/README.md.
== Portability notes ==
This implementation uses floating-point by default but can be compiled to

13
autogen.bat Normal file
View File

@ -0,0 +1,13 @@
@echo off
REM Run this to set up the build system: configure, makefiles, etc.
setlocal enabledelayedexpansion
REM Parse the real autogen.sh script for version
for /F "tokens=2 delims= " %%A in ('findstr "dnn/download_model.sh" autogen.sh') do (
set "model=%%A"
)
call dnn\download_model.bat %model%
echo Updating build configuration files, please wait....

View File

@ -9,6 +9,8 @@ set -e
srcdir=`dirname $0`
test -n "$srcdir" && cd "$srcdir"
dnn/download_model.sh ec04a94
echo "Updating build configuration files, please wait...."
autoreconf -isf

View File

@ -40,7 +40,8 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, c
celt_inner_prod_c, /* ARMv4 */
celt_inner_prod_c, /* EDSP */
celt_inner_prod_c, /* Media */
celt_inner_prod_neon /* NEON */
celt_inner_prod_neon,/* NEON */
celt_inner_prod_neon /* DOTPROD */
};
void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const opus_val16 *y01, const opus_val16 *y02,
@ -48,7 +49,8 @@ void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *x, const o
dual_inner_prod_c, /* ARMv4 */
dual_inner_prod_c, /* EDSP */
dual_inner_prod_c, /* Media */
dual_inner_prod_neon /* NEON */
dual_inner_prod_neon,/* NEON */
dual_inner_prod_neon /* DOTPROD */
};
# endif
@ -61,7 +63,8 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
MAY_HAVE_EDSP(celt_pitch_xcorr), /* EDSP */
MAY_HAVE_MEDIA(celt_pitch_xcorr), /* Media */
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
MAY_HAVE_NEON(celt_pitch_xcorr), /* NEON */
MAY_HAVE_NEON(celt_pitch_xcorr) /* DOTPROD */
};
# endif
@ -72,7 +75,8 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* ARMv4 */
celt_pitch_xcorr_c, /* EDSP */
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
celt_pitch_xcorr_float_neon, /* Neon */
celt_pitch_xcorr_float_neon /* DOTPROD */
};
# endif
# endif /* FIXED_POINT */
@ -90,6 +94,7 @@ void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
xcorr_kernel_c, /* EDSP */
xcorr_kernel_c, /* Media */
xcorr_kernel_neon_fixed, /* Neon */
xcorr_kernel_neon_fixed /* DOTPROD */
};
#endif
@ -101,14 +106,16 @@ int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_alloc_arch_c, /* ARMv4 */
opus_fft_alloc_arch_c, /* EDSP */
opus_fft_alloc_arch_c, /* Media */
opus_fft_alloc_arm_neon /* Neon with NE10 library support */
opus_fft_alloc_arm_neon, /* Neon with NE10 library support */
opus_fft_alloc_arm_neon /* DOTPROD with NE10 library support */
};
void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
opus_fft_free_arch_c, /* ARMv4 */
opus_fft_free_arch_c, /* EDSP */
opus_fft_free_arch_c, /* Media */
opus_fft_free_arm_neon /* Neon with NE10 */
opus_fft_free_arm_neon, /* Neon with NE10 */
opus_fft_free_arm_neon /* DOTPROD with NE10 */
};
# endif /* CUSTOM_MODES */
@ -118,7 +125,8 @@ void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_fft_c, /* ARMv4 */
opus_fft_c, /* EDSP */
opus_fft_c, /* Media */
opus_fft_neon /* Neon with NE10 */
opus_fft_neon, /* Neon with NE10 */
opus_fft_neon /* DOTPROD with NE10 */
};
void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
@ -127,7 +135,8 @@ void (*const OPUS_IFFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
opus_ifft_c, /* ARMv4 */
opus_ifft_c, /* EDSP */
opus_ifft_c, /* Media */
opus_ifft_neon /* Neon with NE10 */
opus_ifft_neon, /* Neon with NE10 */
opus_ifft_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@ -139,7 +148,8 @@ void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_forward_c, /* ARMv4 */
clt_mdct_forward_c, /* EDSP */
clt_mdct_forward_c, /* Media */
clt_mdct_forward_neon /* Neon with NE10 */
clt_mdct_forward_neon, /* Neon with NE10 */
clt_mdct_forward_neon /* DOTPROD with NE10 */
};
void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
@ -151,7 +161,8 @@ void (*const CLT_MDCT_BACKWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
clt_mdct_backward_c, /* ARMv4 */
clt_mdct_backward_c, /* EDSP */
clt_mdct_backward_c, /* Media */
clt_mdct_backward_neon /* Neon with NE10 */
clt_mdct_backward_neon, /* Neon with NE10 */
clt_mdct_backward_neon /* DOTPROD with NE10 */
};
# endif /* HAVE_ARM_NE10 */

View File

@ -43,6 +43,7 @@
#define OPUS_CPU_ARM_EDSP_FLAG (1<<OPUS_ARCH_ARM_EDSP)
#define OPUS_CPU_ARM_MEDIA_FLAG (1<<OPUS_ARCH_ARM_MEDIA)
#define OPUS_CPU_ARM_NEON_FLAG (1<<OPUS_ARCH_ARM_NEON)
#define OPUS_CPU_ARM_DOTPROD_FLAG (1<<OPUS_ARCH_ARM_DOTPROD)
#if defined(_MSC_VER)
/*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/
@ -126,6 +127,14 @@ opus_uint32 opus_cpu_capabilities(void)
p = strstr(buf, " neon");
if(p != NULL && (p[5] == ' ' || p[5] == '\n'))
flags |= OPUS_CPU_ARM_NEON_FLAG;
p = strstr(buf, " asimd");
if(p != NULL && (p[6] == ' ' || p[6] == '\n'))
flags |= OPUS_CPU_ARM_NEON_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_EDSP_FLAG;
# endif
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
p = strstr(buf, " asimddp");
if(p != NULL && (p[8] == ' ' || p[8] == '\n'))
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
# endif
}
# endif
@ -144,6 +153,13 @@ opus_uint32 opus_cpu_capabilities(void)
# endif
}
#if defined(OPUS_ARM_PRESUME_AARCH64_NEON_INTR)
flags |= OPUS_CPU_ARM_EDSP_FLAG | OPUS_CPU_ARM_MEDIA_FLAG | OPUS_CPU_ARM_NEON_FLAG;
# if defined(OPUS_ARM_PRESUME_DOTPROD)
flags |= OPUS_CPU_ARM_DOTPROD_FLAG;
# endif
#endif
fclose(cpuinfo);
}
return flags;
@ -180,7 +196,13 @@ static int opus_select_arch_impl(void)
}
arch++;
celt_assert(arch == OPUS_ARCH_ARM_NEON);
if(!(flags & OPUS_CPU_ARM_DOTPROD_FLAG)) {
celt_assert(arch == OPUS_ARCH_ARM_NEON);
return arch;
}
arch++;
celt_assert(arch == OPUS_ARCH_ARM_DOTPROD);
return arch;
}

View File

@ -46,6 +46,12 @@
# define MAY_HAVE_NEON(name) MAY_HAVE_MEDIA(name)
# endif
# if defined(OPUS_ARM_MAY_HAVE_DOTPROD)
# define MAY_HAVE_DOTPROD(name) name ## _dotprod
# else
# define MAY_HAVE_DOTPROD(name) MAY_HAVE_NEON(name)
# endif
# if defined(OPUS_ARM_PRESUME_EDSP)
# define PRESUME_EDSP(name) name ## _edsp
# else
@ -64,6 +70,12 @@
# define PRESUME_NEON(name) PRESUME_MEDIA(name)
# endif
# if defined(OPUS_ARM_PRESUME_DOTPROD)
# define PRESUME_DOTPROD(name) name ## _dotprod
# else
# define PRESUME_DOTPROD(name) PRESUME_NEON(name)
# endif
# if defined(OPUS_HAVE_RTCD)
int opus_select_arch(void);
@ -71,6 +83,7 @@ int opus_select_arch(void);
#define OPUS_ARCH_ARM_EDSP (1)
#define OPUS_ARCH_ARM_MEDIA (2)
#define OPUS_ARCH_ARM_NEON (3)
#define OPUS_ARCH_ARM_DOTPROD (4)
# endif

View File

@ -97,6 +97,14 @@ void xcorr_kernel_neon_fixed(const opus_val16 * x, const opus_val16 * y, opus_va
}
#else
#if defined(__ARM_FEATURE_FMA) && defined(__ARM_ARCH_ISA_A64)
/* If we can, force the compiler to use an FMA instruction rather than break
* vmlaq_f32() into fmul/fadd. */
#define vmlaq_lane_f32(a,b,c,lane) vfmaq_lane_f32(a,b,c,lane)
#endif
/*
* Function: xcorr_kernel_neon_float
* ---------------------------------

View File

@ -130,6 +130,13 @@ void dual_inner_prod_neon(const opus_val16 *x, const opus_val16 *y01, const opus
/* ========================================================================== */
#ifdef __ARM_FEATURE_FMA
/* If we can, force the compiler to use an FMA instruction rather than break
vmlaq_f32() into fmul/fadd. */
#define vmlaq_f32(a,b,c) vfmaq_f32(a,b,c)
#endif
#ifdef OPUS_CHECK_ASM
/* This part of code simulates floating-point NEON operations. */

View File

@ -1450,7 +1450,7 @@ void quant_all_bands(int encode, const CELTMode *m, int start, int end,
if (encode && resynth)
lowband_scratch = _lowband_scratch;
else
lowband_scratch = X_+M*eBands[m->nbEBands-1];
lowband_scratch = X_+M*eBands[m->effEBands-1];
ALLOC(X_save, resynth_alloc, celt_norm);
ALLOC(Y_save, resynth_alloc, celt_norm);
ALLOC(X_save2, resynth_alloc, celt_norm);

View File

@ -41,6 +41,7 @@
#include "entenc.h"
#include "entdec.h"
#include "arch.h"
#include "lpcnet.h"
#ifdef __cplusplus
extern "C" {
@ -149,6 +150,13 @@ int celt_decoder_get_size(int channels);
int celt_decoder_init(CELTDecoder *st, opus_int32 sampling_rate, int channels);
int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
#ifdef ENABLE_DEEP_PLC
,LPCNetPLCState *lpcnet
#endif
);
int celt_decode_with_ec(OpusCustomDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum);
@ -225,23 +233,13 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
opus_val16 g0, opus_val16 g1, int tapset0, int tapset1,
const opus_val16 *window, int overlap, int arch);
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif
void init_caps(const CELTMode *m,int *cap,int LM,int C);
#ifdef RESYNTH
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem);
void deemphasis(celt_sig *in[], opus_val16 *pcm, int N, int C, int downsample, const opus_val16 *coef, celt_sig *mem, int accum);
void celt_synthesis(const CELTMode *mode, celt_norm *X, celt_sig * out_syn[],
opus_val16 *oldBandE, int start, int effEnd, int C, int CC, int isTransient,
int LM, int downsample, int silence);
int LM, int downsample, int silence, int arch);
#endif
#ifdef __cplusplus

View File

@ -51,6 +51,11 @@
#include "celt_lpc.h"
#include "vq.h"
#ifdef ENABLE_DEEP_PLC
#include "lpcnet.h"
#include "lpcnet_private.h"
#endif
/* The maximum pitch lag to allow in the pitch-based PLC. It's possible to save
CPU time in the PLC pitch search by making this smaller than MAX_PERIOD. The
current value corresponds to a pitch of 66.67 Hz. */
@ -69,6 +74,9 @@
/**********************************************************************/
#define DECODE_BUFFER_SIZE 2048
#define PLC_UPDATE_FRAMES 4
#define PLC_UPDATE_SAMPLES (PLC_UPDATE_FRAMES*FRAME_SIZE)
/** Decoder state
@brief Decoder state
*/
@ -82,6 +90,7 @@ struct OpusCustomDecoder {
int start, end;
int signalling;
int disable_inv;
int complexity;
int arch;
/* Everything beyond this point gets cleared on a reset */
@ -98,11 +107,18 @@ struct OpusCustomDecoder {
opus_val16 postfilter_gain_old;
int postfilter_tapset;
int postfilter_tapset_old;
int prefilter_and_fold;
celt_sig preemph_memD[2];
#ifdef ENABLE_DEEP_PLC
opus_int16 plc_pcm[PLC_UPDATE_SAMPLES];
int plc_fill;
float plc_preemphasis_mem;
#endif
celt_sig _decode_mem[1]; /* Size = channels*(DECODE_BUFFER_SIZE+mode->overlap) */
/* opus_val16 lpc[], Size = channels*LPC_ORDER */
/* opus_val16 lpc[], Size = channels*CELT_LPC_ORDER */
/* opus_val16 oldEBands[], Size = 2*mode->nbEBands */
/* opus_val16 oldLogE[], Size = 2*mode->nbEBands */
/* opus_val16 oldLogE2[], Size = 2*mode->nbEBands */
@ -157,7 +173,7 @@ OPUS_CUSTOM_NOSTATIC int opus_custom_decoder_get_size(const CELTMode *mode, int
{
int size = sizeof(struct CELTDecoder)
+ (channels*(DECODE_BUFFER_SIZE+mode->overlap)-1)*sizeof(celt_sig)
+ channels*LPC_ORDER*sizeof(opus_val16)
+ channels*CELT_LPC_ORDER*sizeof(opus_val16)
+ 4*2*mode->nbEBands*sizeof(opus_val16);
return size;
}
@ -499,7 +515,100 @@ static int celt_plc_pitch_search(celt_sig *decode_mem[2], int C, int arch)
return pitch_index;
}
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
static void prefilter_and_fold(CELTDecoder * OPUS_RESTRICT st, int N)
{
int c;
int CC;
int i;
int overlap;
celt_sig *decode_mem[2];
const OpusCustomMode *mode;
VARDECL(opus_val32, etmp);
mode = st->mode;
overlap = st->overlap;
CC = st->channels;
ALLOC(etmp, overlap, opus_val32);
c=0; do {
decode_mem[c] = st->_decode_mem + c*(DECODE_BUFFER_SIZE+overlap);
} while (++c<CC);
c=0; do {
/* Apply the pre-filter to the MDCT overlap for the next frame because
the post-filter will be re-applied in the decoder after the MDCT
overlap. */
comb_filter(etmp, decode_mem[c]+DECODE_BUFFER_SIZE-N,
st->postfilter_period_old, st->postfilter_period, overlap,
-st->postfilter_gain_old, -st->postfilter_gain,
st->postfilter_tapset_old, st->postfilter_tapset, NULL, 0, st->arch);
/* Simulate TDAC on the concealed audio so that it blends with the
MDCT of the next frame. */
for (i=0;i<overlap/2;i++)
{
decode_mem[c][DECODE_BUFFER_SIZE-N+i] =
MULT16_32_Q15(mode->window[i], etmp[overlap-1-i])
+ MULT16_32_Q15(mode->window[overlap-i-1], etmp[i]);
}
} while (++c<CC);
}
#ifdef ENABLE_DEEP_PLC
#define SINC_ORDER 48
/* h=cos(pi/2*abs(sin([-24:24]/48*pi*23./24)).^2);
b=sinc([-24:24]/3*1.02).*h;
b=b/sum(b); */
static const float sinc_filter[SINC_ORDER+1] = {
4.2931e-05f, -0.000190293f, -0.000816132f, -0.000637162f, 0.00141662f, 0.00354764f, 0.00184368f, -0.00428274f,
-0.00856105f, -0.0034003f, 0.00930201f, 0.0159616f, 0.00489785f, -0.0169649f, -0.0259484f, -0.00596856f,
0.0286551f, 0.0405872f, 0.00649994f, -0.0509284f, -0.0716655f, -0.00665212f, 0.134336f, 0.278927f,
0.339995f, 0.278927f, 0.134336f, -0.00665212f, -0.0716655f, -0.0509284f, 0.00649994f, 0.0405872f,
0.0286551f, -0.00596856f, -0.0259484f, -0.0169649f, 0.00489785f, 0.0159616f, 0.00930201f, -0.0034003f,
-0.00856105f, -0.00428274f, 0.00184368f, 0.00354764f, 0.00141662f, -0.000637162f, -0.000816132f, -0.000190293f,
4.2931e-05f
};
void update_plc_state(LPCNetPLCState *lpcnet, celt_sig *decode_mem[2], float *plc_preemphasis_mem, int CC)
{
int i;
int tmp_read_post, tmp_fec_skip;
int offset;
celt_sig buf48k[DECODE_BUFFER_SIZE];
opus_int16 buf16k[PLC_UPDATE_SAMPLES];
if (CC == 1) OPUS_COPY(buf48k, decode_mem[0], DECODE_BUFFER_SIZE);
else {
for (i=0;i<DECODE_BUFFER_SIZE;i++) {
buf48k[i] = .5*(decode_mem[0][i] + decode_mem[1][i]);
}
}
/* Down-sample the last 40 ms. */
for (i=1;i<DECODE_BUFFER_SIZE;i++) buf48k[i] += PREEMPHASIS*buf48k[i-1];
*plc_preemphasis_mem = buf48k[DECODE_BUFFER_SIZE-1];
offset = DECODE_BUFFER_SIZE-SINC_ORDER-1 - 3*(PLC_UPDATE_SAMPLES-1);
celt_assert(3*(PLC_UPDATE_SAMPLES-1) + SINC_ORDER + offset == DECODE_BUFFER_SIZE-1);
for (i=0;i<PLC_UPDATE_SAMPLES;i++) {
int j;
float sum = 0;
for (j=0;j<SINC_ORDER+1;j++) {
sum += buf48k[3*i + j + offset]*sinc_filter[j];
}
buf16k[i] = sum;
}
tmp_read_post = lpcnet->fec_read_pos;
tmp_fec_skip = lpcnet->fec_skip;
for (i=0;i<PLC_UPDATE_FRAMES;i++) {
lpcnet_plc_update(lpcnet, &buf16k[FRAME_SIZE*i]);
}
lpcnet->fec_read_pos = tmp_read_post;
lpcnet->fec_skip = tmp_fec_skip;
}
#endif
static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM
#ifdef ENABLE_DEEP_PLC
,LPCNetPLCState *lpcnet
#endif
)
{
int c;
int i;
@ -527,14 +636,18 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
out_syn[c] = decode_mem[c]+DECODE_BUFFER_SIZE-N;
} while (++c<C);
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*C);
oldBandE = lpc+C*LPC_ORDER;
oldBandE = lpc+C*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
oldLogE2 = oldLogE + 2*nbEBands;
backgroundLogE = oldLogE2 + 2*nbEBands;
loss_duration = st->loss_duration;
start = st->start;
#ifdef ENABLE_DEEP_PLC
noise_based = start != 0 || (lpcnet->fec_fill_pos == 0 && (st->skip_plc || loss_duration >= 80));
#else
noise_based = loss_duration >= 40 || start != 0 || st->skip_plc;
#endif
if (noise_based)
{
/* Noise-based PLC/CNG */
@ -559,9 +672,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
#endif
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N,
DECODE_BUFFER_SIZE-N+(overlap>>1));
DECODE_BUFFER_SIZE-N+overlap);
} while (++c<C);
if (st->prefilter_and_fold) {
prefilter_and_fold(st, N);
}
/* Energy decay */
decay = loss_duration==0 ? QCONST16(1.5f, DB_SHIFT) : QCONST16(.5f, DB_SHIFT);
c=0; do
@ -590,6 +707,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
st->rng = seed;
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd, C, C, 0, LM, st->downsample, 0, st->arch);
st->prefilter_and_fold = 0;
/* Skip regular PLC until we get two consecutive packets. */
st->skip_plc = 1;
} else {
int exc_length;
/* Pitch-based PLC */
@ -597,12 +717,14 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
opus_val16 *exc;
opus_val16 fade = Q15ONE;
int pitch_index;
VARDECL(opus_val32, etmp);
VARDECL(opus_val16, _exc);
VARDECL(opus_val16, fir_tmp);
if (loss_duration == 0)
{
#ifdef ENABLE_DEEP_PLC
if (lpcnet->loaded) update_plc_state(lpcnet, decode_mem, &st->plc_preemphasis_mem, C);
#endif
st->last_pitch_index = pitch_index = celt_plc_pitch_search(decode_mem, C, st->arch);
} else {
pitch_index = st->last_pitch_index;
@ -613,10 +735,9 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
decaying signal, but we can't get more than MAX_PERIOD. */
exc_length = IMIN(2*pitch_index, MAX_PERIOD);
ALLOC(etmp, overlap, opus_val32);
ALLOC(_exc, MAX_PERIOD+LPC_ORDER, opus_val16);
ALLOC(_exc, MAX_PERIOD+CELT_LPC_ORDER, opus_val16);
ALLOC(fir_tmp, exc_length, opus_val16);
exc = _exc+LPC_ORDER;
exc = _exc+CELT_LPC_ORDER;
window = mode->window;
c=0; do {
opus_val16 decay;
@ -628,16 +749,16 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
int j;
buf = decode_mem[c];
for (i=0;i<MAX_PERIOD+LPC_ORDER;i++)
exc[i-LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-LPC_ORDER+i], SIG_SHIFT);
for (i=0;i<MAX_PERIOD+CELT_LPC_ORDER;i++)
exc[i-CELT_LPC_ORDER] = SROUND16(buf[DECODE_BUFFER_SIZE-MAX_PERIOD-CELT_LPC_ORDER+i], SIG_SHIFT);
if (loss_duration == 0)
{
opus_val32 ac[LPC_ORDER+1];
opus_val32 ac[CELT_LPC_ORDER+1];
/* Compute LPC coefficients for the last MAX_PERIOD samples before
the first loss so we can work in the excitation-filter domain. */
_celt_autocorr(exc, ac, window, overlap,
LPC_ORDER, MAX_PERIOD, st->arch);
CELT_LPC_ORDER, MAX_PERIOD, st->arch);
/* Add a noise floor of -40 dB. */
#ifdef FIXED_POINT
ac[0] += SHR32(ac[0],13);
@ -645,7 +766,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
ac[0] *= 1.0001f;
#endif
/* Use lag windowing to stabilize the Levinson-Durbin recursion. */
for (i=1;i<=LPC_ORDER;i++)
for (i=1;i<=CELT_LPC_ORDER;i++)
{
/*ac[i] *= exp(-.5*(2*M_PI*.002*i)*(2*M_PI*.002*i));*/
#ifdef FIXED_POINT
@ -654,7 +775,7 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
ac[i] -= ac[i]*(0.008f*0.008f)*i*i;
#endif
}
_celt_lpc(lpc+c*LPC_ORDER, ac, LPC_ORDER);
_celt_lpc(lpc+c*CELT_LPC_ORDER, ac, CELT_LPC_ORDER);
#ifdef FIXED_POINT
/* For fixed-point, apply bandwidth expansion until we can guarantee that
no overflow can happen in the IIR filter. This means:
@ -662,13 +783,13 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
while (1) {
opus_val16 tmp=Q15ONE;
opus_val32 sum=QCONST16(1., SIG_SHIFT);
for (i=0;i<LPC_ORDER;i++)
sum += ABS16(lpc[c*LPC_ORDER+i]);
for (i=0;i<CELT_LPC_ORDER;i++)
sum += ABS16(lpc[c*CELT_LPC_ORDER+i]);
if (sum < 65535) break;
for (i=0;i<LPC_ORDER;i++)
for (i=0;i<CELT_LPC_ORDER;i++)
{
tmp = MULT16_16_Q15(QCONST16(.99f,15), tmp);
lpc[c*LPC_ORDER+i] = MULT16_16_Q15(lpc[c*LPC_ORDER+i], tmp);
lpc[c*CELT_LPC_ORDER+i] = MULT16_16_Q15(lpc[c*CELT_LPC_ORDER+i], tmp);
}
}
#endif
@ -678,8 +799,8 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
{
/* Compute the excitation for exc_length samples before the loss. We need the copy
because celt_fir() cannot filter in-place. */
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*LPC_ORDER,
fir_tmp, exc_length, LPC_ORDER, st->arch);
celt_fir(exc+MAX_PERIOD-exc_length, lpc+c*CELT_LPC_ORDER,
fir_tmp, exc_length, CELT_LPC_ORDER, st->arch);
OPUS_COPY(exc+MAX_PERIOD-exc_length, fir_tmp, exc_length);
}
@ -737,15 +858,15 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
S1 += SHR32(MULT16_16(tmp, tmp), 10);
}
{
opus_val16 lpc_mem[LPC_ORDER];
opus_val16 lpc_mem[CELT_LPC_ORDER];
/* Copy the last decoded samples (prior to the overlap region) to
synthesis filter memory so we can have a continuous signal. */
for (i=0;i<LPC_ORDER;i++)
for (i=0;i<CELT_LPC_ORDER;i++)
lpc_mem[i] = SROUND16(buf[DECODE_BUFFER_SIZE-N-1-i], SIG_SHIFT);
/* Apply the synthesis filter to convert the excitation back into
the signal domain. */
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, LPC_ORDER,
celt_iir(buf+DECODE_BUFFER_SIZE-N, lpc+c*CELT_LPC_ORDER,
buf+DECODE_BUFFER_SIZE-N, extrapolation_len, CELT_LPC_ORDER,
lpc_mem, st->arch);
#ifdef FIXED_POINT
for (i=0; i < extrapolation_len; i++)
@ -792,23 +913,65 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
}
}
/* Apply the pre-filter to the MDCT overlap for the next frame because
the post-filter will be re-applied in the decoder after the MDCT
overlap. */
comb_filter(etmp, buf+DECODE_BUFFER_SIZE,
st->postfilter_period, st->postfilter_period, overlap,
-st->postfilter_gain, -st->postfilter_gain,
st->postfilter_tapset, st->postfilter_tapset, NULL, 0, st->arch);
/* Simulate TDAC on the concealed audio so that it blends with the
MDCT of the next frame. */
for (i=0;i<overlap/2;i++)
{
buf[DECODE_BUFFER_SIZE+i] =
MULT16_32_Q15(window[i], etmp[overlap-1-i])
+ MULT16_32_Q15(window[overlap-i-1], etmp[i]);
}
} while (++c<C);
#ifdef ENABLE_DEEP_PLC
if (lpcnet->loaded && (st->complexity >= 5 || lpcnet->fec_fill_pos > 0)) {
float overlap_mem;
int samples_needed16k;
celt_sig *buf;
VARDECL(float, buf_copy);
buf = decode_mem[0];
ALLOC(buf_copy, C*overlap, float);
c=0; do {
OPUS_COPY(buf_copy+c*overlap, &decode_mem[c][DECODE_BUFFER_SIZE-N], overlap);
} while (++c<C);
/* Need enough samples from the PLC to cover the frame size, resampling delay,
and the overlap at the end. */
samples_needed16k = (N+SINC_ORDER+overlap)/3;
if (loss_duration == 0) {
st->plc_fill = 0;
}
while (st->plc_fill < samples_needed16k) {
lpcnet_plc_conceal(lpcnet, &st->plc_pcm[st->plc_fill]);
st->plc_fill += FRAME_SIZE;
}
/* Resample to 48 kHz. */
for (i=0;i<(N+overlap)/3;i++) {
int j;
float sum;
for (sum=0, j=0;j<17;j++) sum += 3*st->plc_pcm[i+j]*sinc_filter[3*j];
buf[DECODE_BUFFER_SIZE-N+3*i] = sum;
for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+2];
buf[DECODE_BUFFER_SIZE-N+3*i+1] = sum;
for (sum=0, j=0;j<16;j++) sum += 3*st->plc_pcm[i+j+1]*sinc_filter[3*j+1];
buf[DECODE_BUFFER_SIZE-N+3*i+2] = sum;
}
OPUS_MOVE(st->plc_pcm, &st->plc_pcm[N/3], st->plc_fill-N/3);
st->plc_fill -= N/3;
for (i=0;i<N;i++) {
float tmp = buf[DECODE_BUFFER_SIZE-N+i];
buf[DECODE_BUFFER_SIZE-N+i] -= PREEMPHASIS*st->plc_preemphasis_mem;
st->plc_preemphasis_mem = tmp;
}
overlap_mem = st->plc_preemphasis_mem;
for (i=0;i<overlap;i++) {
float tmp = buf[DECODE_BUFFER_SIZE+i];
buf[DECODE_BUFFER_SIZE+i] -= PREEMPHASIS*overlap_mem;
overlap_mem = tmp;
}
/* For now, we just do mono PLC. */
if (C==2) OPUS_COPY(decode_mem[1], decode_mem[0], DECODE_BUFFER_SIZE+overlap);
c=0; do {
/* Cross-fade with 48-kHz non-neural PLC for the first 2.5 ms to avoid a discontinuity. */
if (loss_duration == 0) {
for (i=0;i<overlap;i++) decode_mem[c][DECODE_BUFFER_SIZE-N+i] = (1-window[i])*buf_copy[c*overlap+i] + (window[i])*decode_mem[c][DECODE_BUFFER_SIZE-N+i];
}
} while (++c<C);
}
#endif
st->prefilter_and_fold = 1;
}
/* Saturate to soemthing large to avoid wrap-around. */
@ -817,8 +980,12 @@ static void celt_decode_lost(CELTDecoder * OPUS_RESTRICT st, int N, int LM)
RESTORE_STACK;
}
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
int celt_decode_with_ec_dred(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum
#ifdef ENABLE_DEEP_PLC
,LPCNetPLCState *lpcnet
#endif
)
{
int c, i, N;
int spread_decision;
@ -881,7 +1048,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
frame_size *= st->downsample;
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+overlap)*CC);
oldBandE = lpc+CC*LPC_ORDER;
oldBandE = lpc+CC*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*nbEBands;
oldLogE2 = oldLogE + 2*nbEBands;
backgroundLogE = oldLogE2 + 2*nbEBands;
@ -935,15 +1102,25 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
if (data == NULL || len<=1)
{
celt_decode_lost(st, N, LM);
celt_decode_lost(st, N, LM
#ifdef ENABLE_DEEP_PLC
, lpcnet
#endif
);
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
RESTORE_STACK;
return frame_size/st->downsample;
}
#ifdef ENABLE_DEEP_PLC
else {
/* FIXME: This is a bit of a hack just to make sure opus_decode_native() knows we're no longer in PLC. */
if (lpcnet) lpcnet->blend = 0;
}
#endif
/* Check if there are at least two packets received consecutively before
* turning on the pitch-based PLC */
st->skip_plc = st->loss_duration != 0;
if (st->loss_duration == 0) st->skip_plc = 0;
if (dec == NULL)
{
@ -1006,6 +1183,36 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
/* Decode the global flags (first symbols in the stream) */
intra_ener = tell+3<=total_bits ? ec_dec_bit_logp(dec, 3) : 0;
/* If recovering from packet loss, make sure we make the energy prediction safe to reduce the
risk of getting loud artifacts. */
if (!intra_ener && st->loss_duration != 0) {
c=0; do
{
opus_val16 safety = 0;
int missing = IMIN(10, st->loss_duration>>LM);
if (LM==0) safety = QCONST16(1.5f,DB_SHIFT);
else if (LM==1) safety = QCONST16(.5f,DB_SHIFT);
for (i=start;i<end;i++)
{
if (oldBandE[c*nbEBands+i] < MAX16(oldLogE[c*nbEBands+i], oldLogE2[c*nbEBands+i])) {
/* If energy is going down already, continue the trend. */
opus_val32 slope;
opus_val32 E0, E1, E2;
E0 = oldBandE[c*nbEBands+i];
E1 = oldLogE[c*nbEBands+i];
E2 = oldLogE2[c*nbEBands+i];
slope = MAX32(E1 - E0, HALF32(E2 - E0));
E0 -= MAX32(0, (1+missing)*slope);
oldBandE[c*nbEBands+i] = MAX32(-QCONST16(20.f,DB_SHIFT), E0);
} else {
/* Otherwise take the min of the last frames. */
oldBandE[c*nbEBands+i] = MIN16(MIN16(oldBandE[c*nbEBands+i], oldLogE[c*nbEBands+i]), oldLogE2[c*nbEBands+i]);
}
/* Shorter frames have more natural fluctuations -- play it safe. */
oldBandE[c*nbEBands+i] -= safety;
}
} while (++c<2);
}
/* Get band energies */
unquant_coarse_energy(mode, start, end, oldBandE,
intra_ener, dec, C, LM);
@ -1073,7 +1280,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
unquant_fine_energy(mode, start, end, oldBandE, fine_quant, dec, C);
c=0; do {
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap/2);
OPUS_MOVE(decode_mem[c], decode_mem[c]+N, DECODE_BUFFER_SIZE-N+overlap);
} while (++c<CC);
/* Decode fixed codebook */
@ -1109,7 +1316,9 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
for (i=0;i<C*nbEBands;i++)
oldBandE[i] = -QCONST16(28.f,DB_SHIFT);
}
if (st->prefilter_and_fold) {
prefilter_and_fold(st, N);
}
celt_synthesis(mode, X, out_syn, oldBandE, start, effEnd,
C, CC, isTransient, LM, st->downsample, silence, st->arch);
@ -1173,6 +1382,7 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
deemphasis(out_syn, pcm, N, CC, st->downsample, mode->preemph, st->preemph_memD, accum);
st->loss_duration = 0;
st->prefilter_and_fold = 0;
RESTORE_STACK;
if (ec_tell(dec) > 8*len)
return OPUS_INTERNAL_ERROR;
@ -1181,6 +1391,15 @@ int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *dat
return frame_size/st->downsample;
}
int celt_decode_with_ec(CELTDecoder * OPUS_RESTRICT st, const unsigned char *data,
int len, opus_val16 * OPUS_RESTRICT pcm, int frame_size, ec_dec *dec, int accum)
{
return celt_decode_with_ec_dred(st, data, len, pcm, frame_size, dec, accum
#ifdef ENABLE_DEEP_PLC
, NULL
#endif
);
}
#ifdef CUSTOM_MODES
@ -1254,6 +1473,26 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
va_start(ap, request);
switch (request)
{
case OPUS_SET_COMPLEXITY_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
if(value<0 || value>10)
{
goto bad_arg;
}
st->complexity = value;
}
break;
case OPUS_GET_COMPLEXITY_REQUEST:
{
opus_int32 *value = va_arg(ap, opus_int32*);
if (!value)
{
goto bad_arg;
}
*value = st->complexity;
}
break;
case CELT_SET_START_BAND_REQUEST:
{
opus_int32 value = va_arg(ap, opus_int32);
@ -1300,7 +1539,7 @@ int opus_custom_decoder_ctl(CELTDecoder * OPUS_RESTRICT st, int request, ...)
int i;
opus_val16 *lpc, *oldBandE, *oldLogE, *oldLogE2;
lpc = (opus_val16*)(st->_decode_mem+(DECODE_BUFFER_SIZE+st->overlap)*st->channels);
oldBandE = lpc+st->channels*LPC_ORDER;
oldBandE = lpc+st->channels*CELT_LPC_ORDER;
oldLogE = oldBandE + 2*st->mode->nbEBands;
oldLogE2 = oldLogE + 2*st->mode->nbEBands;
OPUS_CLEAR((char*)&st->DECODER_RESET_START,

View File

@ -281,6 +281,9 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
/* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
for (i=0;i<len;i++)
{
#ifndef FIXED_POINT
float mem00;
#endif
opus_val32 x,y;
x = SHR32(in[i+c*len],SIG_SHIFT);
y = ADD32(mem0, x);
@ -288,8 +291,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
mem0 = mem1 + y - SHL32(x,1);
mem1 = x - SHR32(y,1);
#else
/* Original code:
mem0 = mem1 + y - 2*x;
mem1 = x - .5f*y;
Modified code to shorten dependency chains: */
mem00=mem0;
mem0 = mem0 - x + .5f*mem1;
mem1 = x - mem00;
#endif
tmp[i] = SROUND16(y, 2);
/*printf("%f ", tmp[i]);*/
@ -322,10 +330,11 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(x2-mem0,forward_shift);
#else
tmp[i] = mem0 + MULT16_16_P15(forward_decay,x2-mem0);
#endif
mem0 = tmp[i];
#else
mem0 = x2 + (1.f-forward_decay)*mem0;
tmp[i] = forward_decay*mem0;
#endif
}
mem0=0;
@ -337,11 +346,13 @@ static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int
#ifdef FIXED_POINT
/* FIXME: Use PSHR16() instead */
tmp[i] = mem0 + PSHR32(tmp[i]-mem0,3);
#else
tmp[i] = mem0 + MULT16_16_P15(QCONST16(0.125f,15),tmp[i]-mem0);
#endif
mem0 = tmp[i];
maxE = MAX16(maxE, mem0);
#else
mem0 = tmp[i] + 0.875f*mem0;
tmp[i] = 0.125f*mem0;
maxE = MAX16(maxE, 0.125f*mem0);
#endif
}
/*for (i=0;i<len2;i++)printf("%f ", tmp[i]/mean);printf("\n");*/
@ -1565,10 +1576,13 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
vbr_rate = 0;
tmp = st->bitrate*frame_size;
if (tell>1)
tmp += tell;
tmp += tell*mode->Fs;
if (st->bitrate!=OPUS_BITRATE_MAX)
{
nbCompressedBytes = IMAX(2, IMIN(nbCompressedBytes,
(tmp+4*mode->Fs)/(8*mode->Fs)-!!st->signalling));
ec_enc_shrink(enc, nbCompressedBytes);
}
effectiveBytes = nbCompressedBytes - nbFilledBytes;
}
equiv_rate = ((opus_int32)nbCompressedBytes*8*50 << (3-LM)) - (40*C+20)*((400>>LM) - 50);
@ -2246,7 +2260,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (anti_collapse_on)
{
anti_collapse(mode, X, collapse_masks, LM, C, N,
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng);
start, end, oldBandE, oldLogE, oldLogE2, pulses, st->rng, st->arch);
}
c=0; do {
@ -2265,15 +2279,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
st->prefilter_period_old=IMAX(st->prefilter_period_old, COMBFILTER_MINPERIOD);
comb_filter(out_mem[c], out_mem[c], st->prefilter_period_old, st->prefilter_period, mode->shortMdctSize,
st->prefilter_gain_old, st->prefilter_gain, st->prefilter_tapset_old, st->prefilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
if (LM!=0)
comb_filter(out_mem[c]+mode->shortMdctSize, out_mem[c]+mode->shortMdctSize, st->prefilter_period, pitch_index, N-mode->shortMdctSize,
st->prefilter_gain, gain1, st->prefilter_tapset, prefilter_tapset,
mode->window, overlap);
mode->window, overlap, st->arch);
} while (++c<CC);
/* We reuse freq[] as scratch space for the de-emphasis */
deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD);
deemphasis(out_mem, (opus_val16*)pcm, N, CC, st->upsample, mode->preemph, st->preemph_memD, 0);
st->prefilter_period_old = st->prefilter_period;
st->prefilter_gain_old = st->prefilter_gain;
st->prefilter_tapset_old = st->prefilter_tapset;

View File

@ -44,7 +44,7 @@ int p
opus_val32 r;
opus_val32 error = ac[0];
#ifdef FIXED_POINT
opus_val32 lpc[LPC_ORDER];
opus_val32 lpc[CELT_LPC_ORDER];
#else
float *lpc = _lpc;
#endif

View File

@ -35,7 +35,7 @@
#include "x86/celt_lpc_sse.h"
#endif
#define LPC_ORDER 24
#define CELT_LPC_ORDER 24
void _celt_lpc(opus_val16 *_lpc, const opus_val32 *ac, int p);

View File

@ -35,19 +35,20 @@
(defined(OPUS_ARM_ASM) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR))
#include "arm/armcpu.h"
/* We currently support 4 ARM variants:
/* We currently support 5 ARM variants:
* arch[0] -> ARMv4
* arch[1] -> ARMv5E
* arch[2] -> ARMv6
* arch[3] -> NEON
* arch[4] -> NEON+DOTPROD
*/
#define OPUS_ARCHMASK 3
#define OPUS_ARCHMASK 7
#elif defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#include "x86/x86cpu.h"
/* We currently support 5 x86 variants:

View File

@ -195,6 +195,27 @@ int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){
return ret;
}
int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb){
opus_uint32 r;
opus_uint32 d;
opus_uint32 s;
opus_uint32 t;
int ret;
s=_this->rng;
d=_this->val;
r=s>>_ftb;
ret=-1;
do{
t=s;
s=IMUL32(r,_icdf[++ret]);
}
while(d<s);
_this->val=d-s;
_this->rng=t-s;
ec_dec_normalize(_this);
return ret;
}
opus_uint32 ec_dec_uint(ec_dec *_this,opus_uint32 _ft){
unsigned ft;
unsigned s;

View File

@ -81,6 +81,16 @@ int ec_dec_bit_logp(ec_dec *_this,unsigned _logp);
Return: The decoded symbol s.*/
int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb);
/*Decodes a symbol given an "inverse" CDF table.
No call to ec_dec_update() is necessary after this call.
_icdf: The "inverse" CDF, such that symbol s falls in the range
[s>0?ft-_icdf[s-1]:0,ft-_icdf[s]), where ft=1<<_ftb.
The values must be monotonically non-increasing, and the last value
must be 0.
_ftb: The number of bits of precision in the cumulative distribution.
Return: The decoded symbol s.*/
int ec_dec_icdf16(ec_dec *_this,const opus_uint16 *_icdf,unsigned _ftb);
/*Extracts a raw unsigned integer with a non-power-of-2 range from the stream.
The bits must have been encoded with ec_enc_uint().
No call to ec_dec_update() is necessary after this call.

View File

@ -172,6 +172,17 @@ void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb){
ec_enc_normalize(_this);
}
void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb){
opus_uint32 r;
r=_this->rng>>_ftb;
if(_s>0){
_this->val+=_this->rng-IMUL32(r,_icdf[_s-1]);
_this->rng=IMUL32(r,_icdf[_s-1]-_icdf[_s]);
}
else _this->rng-=IMUL32(r,_icdf[_s]);
ec_enc_normalize(_this);
}
void ec_enc_uint(ec_enc *_this,opus_uint32 _fl,opus_uint32 _ft){
unsigned ft;
unsigned fl;

View File

@ -64,6 +64,15 @@ void ec_enc_bit_logp(ec_enc *_this,int _val,unsigned _logp);
_ftb: The number of bits of precision in the cumulative distribution.*/
void ec_enc_icdf(ec_enc *_this,int _s,const unsigned char *_icdf,unsigned _ftb);
/*Encodes a symbol given an "inverse" CDF table.
_s: The index of the symbol to encode.
_icdf: The "inverse" CDF, such that symbol _s falls in the range
[_s>0?ft-_icdf[_s-1]:0,ft-_icdf[_s]), where ft=1<<_ftb.
The values must be monotonically non-increasing, and the last value
must be 0.
_ftb: The number of bits of precision in the cumulative distribution.*/
void ec_enc_icdf16(ec_enc *_this,int _s,const opus_uint16 *_icdf,unsigned _ftb);
/*Encodes a raw unsigned integer in the stream.
_fl: The integer to encode.
_ft: The number of integers that can be encoded (one more than the max).

View File

@ -132,3 +132,104 @@ int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay)
ec_dec_update(dec, fl, IMIN(fl+fs,32768), 32768);
return val;
}
void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay)
{
int s;
opus_uint16 sign_icdf[3];
sign_icdf[0] = 32768-p0;
sign_icdf[1] = sign_icdf[0]/2;
sign_icdf[2] = 0;
s = value == 0 ? 0 : (value > 0 ? 1 : 2);
ec_enc_icdf16(enc, s, sign_icdf, 15);
value = abs(value);
if (value)
{
int i;
opus_uint16 icdf[8];
icdf[0] = IMAX(7, decay);
for (i=1;i<7;i++)
{
icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
}
icdf[7] = 0;
value--;
do {
ec_enc_icdf16(enc, IMIN(value, 7), icdf, 15);
value -= 7;
} while (value >= 0);
}
}
int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay)
{
int s;
int value;
opus_uint16 sign_icdf[3];
sign_icdf[0] = 32768-p0;
sign_icdf[1] = sign_icdf[0]/2;
sign_icdf[2] = 0;
s = ec_dec_icdf16(dec, sign_icdf, 15);
if (s==2) s = -1;
if (s != 0)
{
int i;
int v;
opus_uint16 icdf[8];
icdf[0] = IMAX(7, decay);
for (i=1;i<7;i++)
{
icdf[i] = IMAX(7-i, (icdf[i-1] * (opus_int32)decay) >> 15);
}
icdf[7] = 0;
value = 1;
do {
v = ec_dec_icdf16(dec, icdf, 15);
value += v;
} while (v == 7);
return s*value;
} else return 0;
}
#if 0
#include <stdio.h>
#define NB_VALS 10
#define DATA_SIZE 10000
int main() {
ec_enc enc;
ec_dec dec;
unsigned char *ptr;
int i;
int decay, p0;
int val[NB_VALS] = {6, 7, 8, 9, 10, 11, 12, 13, 14, 15};
/*for (i=0;i<NB_VALS;i++) {
val[i] = -log(rand()/(float)RAND_MAX);
if (rand()%2) val[i] = -val[i];
}*/
p0 = 16000;
decay = 16000;
ptr = (unsigned char *)malloc(DATA_SIZE);
ec_enc_init(&enc,ptr,DATA_SIZE);
for (i=0;i<NB_VALS;i++) {
printf("%d ", val[i]);
}
printf("\n");
for (i=0;i<NB_VALS;i++) {
ec_laplace_encode_p0(&enc, val[i], p0, decay);
}
ec_enc_done(&enc);
ec_dec_init(&dec,ec_get_buffer(&enc),ec_range_bytes(&enc));
for (i=0;i<NB_VALS;i++) {
val[i] = ec_laplace_decode_p0(&dec, p0, decay);
}
for (i=0;i<NB_VALS;i++) {
printf("%d ", val[i]);
}
printf("\n");
}
#endif

View File

@ -26,6 +26,9 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LAPLACE_H
#define LAPLACE_H
#include "entenc.h"
#include "entdec.h"
@ -46,3 +49,9 @@ void ec_laplace_encode(ec_enc *enc, int *value, unsigned fs, int decay);
@return Value decoded
*/
int ec_laplace_decode(ec_dec *dec, unsigned fs, int decay);
int ec_laplace_decode_p0(ec_dec *dec, opus_uint16 p0, opus_uint16 decay);
void ec_laplace_encode_p0(ec_enc *enc, int value, opus_uint16 p0, opus_uint16 decay);
#endif

View File

@ -230,6 +230,12 @@ static OPUS_INLINE opus_val32 celt_exp2_frac(opus_val16 x)
frac = SHL16(x, 4);
return ADD16(D0, MULT16_16_Q15(frac, ADD16(D1, MULT16_16_Q15(frac, ADD16(D2 , MULT16_16_Q15(D3,frac))))));
}
#undef D0
#undef D1
#undef D2
#undef D3
/** Base-2 exponential approximation (2^x). (Q10 input, Q16 output) */
static OPUS_INLINE opus_val32 celt_exp2(opus_val16 x)
{

View File

@ -6,6 +6,8 @@ celt_sse2_sources = sources['CELT_SOURCES_SSE2']
celt_sse4_1_sources = sources['CELT_SOURCES_SSE4_1']
celt_avx2_sources = sources['CELT_SOURCES_AVX2']
celt_neon_intr_sources = sources['CELT_SOURCES_ARM_NEON_INTR']
celt_static_libs = []
@ -14,7 +16,7 @@ if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
celt_sources += sources['CELT_SOURCES_X86_RTCD']
endif
foreach intr_name : ['sse', 'sse2', 'sse4_1', 'neon_intr']
foreach intr_name : ['sse', 'sse2', 'sse4_1', 'avx2', 'neon_intr']
have_intr = get_variable('have_' + intr_name)
if not have_intr
continue

View File

@ -27,8 +27,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __CELT_MIPSR1_H__
#define __CELT_MIPSR1_H__
#ifndef CELT_MIPSR1_H__
#define CELT_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -149,4 +149,4 @@ void comb_filter(opus_val32 *y, opus_val32 *x, int T0, int T1, int N,
}
}
#endif /* __CELT_MIPSR1_H__ */
#endif /* CELT_MIPSR1_H__ */

View File

@ -38,8 +38,8 @@
MDCT implementation in FFMPEG, but has differences in signs, ordering
and scaling in many places.
*/
#ifndef __MDCT_MIPSR1_H__
#define __MDCT_MIPSR1_H__
#ifndef MDCT_MIPSR1_H__
#define MDCT_MIPSR1_H__
#ifndef SKIP_CONFIG_H
#ifdef HAVE_CONFIG_H
@ -285,4 +285,4 @@ void clt_mdct_backward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scala
}
}
}
#endif /* __MDCT_MIPSR1_H__ */
#endif /* MDCT_MIPSR1_H__ */

View File

@ -26,8 +26,8 @@
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef __VQ_MIPSR1_H__
#define __VQ_MIPSR1_H__
#ifndef VQ_MIPSR1_H__
#define VQ_MIPSR1_H__
#ifdef HAVE_CONFIG_H
#include "config.h"
@ -113,4 +113,4 @@ void renormalise_vector(celt_norm *X, int N, opus_val16 gain, int arch)
/*return celt_sqrt(E);*/
}
#endif /* __VQ_MIPSR1_H__ */
#endif /* VQ_MIPSR1_H__ */

View File

@ -41,7 +41,7 @@
#include <string.h>
#include <stdlib.h>
/** Opus wrapper for malloc(). To do your own dynamic allocation, all you need to do is replace this function and opus_free */
/** Opus wrapper for malloc(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
#ifndef OVERRIDE_OPUS_ALLOC
static OPUS_INLINE void *opus_alloc (size_t size)
{
@ -49,7 +49,15 @@ static OPUS_INLINE void *opus_alloc (size_t size)
}
#endif
/** Same as celt_alloc(), except that the area is only needed inside a CELT call (might cause problem with wideband though) */
#ifndef OVERRIDE_OPUS_REALLOC
static OPUS_INLINE void *opus_realloc (void *ptr, size_t size)
{
return realloc(ptr, size);
}
#endif
/** Used only for non-threadsafe pseudostack.
If desired, this can always return the same area of memory rather than allocating a new one every time. */
#ifndef OVERRIDE_OPUS_ALLOC_SCRATCH
static OPUS_INLINE void *opus_alloc_scratch (size_t size)
{
@ -58,7 +66,7 @@ static OPUS_INLINE void *opus_alloc_scratch (size_t size)
}
#endif
/** Opus wrapper for free(). To do your own dynamic allocation, all you need to do is replace this function and opus_alloc */
/** Opus wrapper for free(). To do your own dynamic allocation replace this function, opus_realloc, and opus_free */
#ifndef OVERRIDE_OPUS_FREE
static OPUS_INLINE void opus_free (void *ptr)
{

View File

@ -189,4 +189,15 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
# define celt_pitch_xcorr celt_pitch_xcorr_c
#endif
#ifdef NON_STATIC_COMB_FILTER_CONST_C
void comb_filter_const_c(opus_val32 *y, opus_val32 *x, int T, int N,
opus_val16 g10, opus_val16 g11, opus_val16 g12);
#endif
#ifndef OVERRIDE_COMB_FILTER_CONST
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_c(y, x, T, N, g10, g11, g12))
#endif
#endif

101
celt/x86/pitch_avx.c Normal file
View File

@ -0,0 +1,101 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <immintrin.h>
#include "x86cpu.h"
#include "pitch.h"
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(FIXED_POINT)
/* Like the "regular" xcorr_kernel(), but computes 8 results at a time. */
static void xcorr_kernel_avx(const float *x, const float *y, float sum[8], int len)
{
__m256 xsum0, xsum1, xsum2, xsum3, xsum4, xsum5, xsum6, xsum7;
xsum7 = xsum6 = xsum5 = xsum4 = xsum3 = xsum2 = xsum1 = xsum0 = _mm256_setzero_ps();
int i;
__m256 x0;
/* Compute 8 inner products using partial sums. */
for (i=0;i<len-7;i+=8)
{
x0 = _mm256_loadu_ps(x+i);
xsum0 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i ), xsum0);
xsum1 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+1), xsum1);
xsum2 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+2), xsum2);
xsum3 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+3), xsum3);
xsum4 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+4), xsum4);
xsum5 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+5), xsum5);
xsum6 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+6), xsum6);
xsum7 = _mm256_fmadd_ps(x0, _mm256_loadu_ps(y+i+7), xsum7);
}
if (i != len) {
static const int mask[15] = {-1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, 0, 0};
__m256i m;
m = _mm256_loadu_si256((__m256i*)(void*)(mask + 7+i-len));
x0 = _mm256_maskload_ps(x+i, m);
xsum0 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i , m), xsum0);
xsum1 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+1, m), xsum1);
xsum2 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+2, m), xsum2);
xsum3 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+3, m), xsum3);
xsum4 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+4, m), xsum4);
xsum5 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+5, m), xsum5);
xsum6 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+6, m), xsum6);
xsum7 = _mm256_fmadd_ps(x0, _mm256_maskload_ps(y+i+7, m), xsum7);
}
/* 8 horizontal adds. */
/* Compute [0 4] [1 5] [2 6] [3 7] */
xsum0 = _mm256_add_ps(_mm256_permute2f128_ps(xsum0, xsum4, 2<<4), _mm256_permute2f128_ps(xsum0, xsum4, 1 | (3<<4)));
xsum1 = _mm256_add_ps(_mm256_permute2f128_ps(xsum1, xsum5, 2<<4), _mm256_permute2f128_ps(xsum1, xsum5, 1 | (3<<4)));
xsum2 = _mm256_add_ps(_mm256_permute2f128_ps(xsum2, xsum6, 2<<4), _mm256_permute2f128_ps(xsum2, xsum6, 1 | (3<<4)));
xsum3 = _mm256_add_ps(_mm256_permute2f128_ps(xsum3, xsum7, 2<<4), _mm256_permute2f128_ps(xsum3, xsum7, 1 | (3<<4)));
/* Compute [0 1 4 5] [2 3 6 7] */
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
xsum1 = _mm256_hadd_ps(xsum2, xsum3);
/* Compute [0 1 2 3 4 5 6 7] */
xsum0 = _mm256_hadd_ps(xsum0, xsum1);
_mm256_storeu_ps(sum, xsum0);
}
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch)
{
int i;
celt_assert(max_pitch>0);
(void)arch;
for (i=0;i<max_pitch-7;i+=8)
{
xcorr_kernel_avx(_x, _y+i, &xcorr[i], len);
}
for (;i<max_pitch;i++)
{
xcorr[i] = celt_inner_prod(_x, _y+i, len, arch);
}
}
#endif

View File

@ -131,12 +131,6 @@ extern opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(FIXED_POINT)
#define OVERRIDE_DUAL_INNER_PROD
#define OVERRIDE_COMB_FILTER_CONST
#undef dual_inner_prod
#undef comb_filter_const
void dual_inner_prod_sse(const opus_val16 *x,
const opus_val16 *y01,
const opus_val16 *y02,
@ -154,13 +148,17 @@ void comb_filter_const_sse(opus_val32 *y,
#if defined(OPUS_X86_PRESUME_SSE)
#define OVERRIDE_DUAL_INNER_PROD
#define OVERRIDE_COMB_FILTER_CONST
# define dual_inner_prod(x, y01, y02, N, xy1, xy2, arch) \
((void)(arch),dual_inner_prod_sse(x, y01, y02, N, xy1, xy2))
# define comb_filter_const(y, x, T, N, g10, g11, g12, arch) \
((void)(arch),comb_filter_const_sse(y, x, T, N, g10, g11, g12))
#else
#elif defined(OPUS_HAVE_RTCD)
#define OVERRIDE_DUAL_INNER_PROD
#define OVERRIDE_COMB_FILTER_CONST
extern void (*const DUAL_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
const opus_val16 *x,
const opus_val16 *y01,
@ -187,6 +185,32 @@ extern void (*const COMB_FILTER_CONST_IMPL[OPUS_ARCHMASK + 1])(
#define NON_STATIC_COMB_FILTER_CONST_C
#endif
#endif
void celt_pitch_xcorr_avx2(const float *_x, const float *_y, float *xcorr, int len, int max_pitch, int arch);
#if defined(OPUS_X86_PRESUME_AVX2)
#define OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr celt_pitch_xcorr_avx2
#elif defined(OPUS_HAVE_RTCD) && defined(OPUS_X86_MAY_HAVE_AVX2)
#define OVERRIDE_PITCH_XCORR
extern void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
const float *_x,
const float *_y,
float *xcorr,
int len,
int max_pitch,
int arch
);
#define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*PITCH_XCORR_IMPL[(arch) & OPUS_ARCHMASK])(_x, _y, xcorr, len, max_pitch, arch))
#endif /* OPUS_X86_PRESUME_AVX2 && !OPUS_HAVE_RTCD */
#endif /* OPUS_X86_MAY_HAVE_SSE && !FIXED_POINT */
#endif

View File

@ -28,16 +28,18 @@
#define VQ_SSE_H
#if defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(FIXED_POINT)
#define OVERRIDE_OP_PVQ_SEARCH
opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch);
#if defined(OPUS_X86_PRESUME_SSE2)
#define OVERRIDE_OP_PVQ_SEARCH
#define op_pvq_search(x, iy, K, N, arch) \
(op_pvq_search_sse2(x, iy, K, N, arch))
#else
#elif defined(OPUS_HAVE_RTCD)
#define OVERRIDE_OP_PVQ_SEARCH
extern opus_val16 (*const OP_PVQ_SEARCH_IMPL[OPUS_ARCHMASK + 1])(
celt_norm *_X, int *iy, int K, int N, int arch);

View File

@ -75,7 +75,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
sums = _mm_add_ps(sums, x4);
/* Clear y and iy in case we don't do the projection. */
_mm_storeu_ps(&y[j], _mm_setzero_ps());
_mm_storeu_si128((__m128i*)&iy[j], _mm_setzero_si128());
_mm_storeu_si128((__m128i*)(void*)&iy[j], _mm_setzero_si128());
_mm_storeu_ps(&X[j], x4);
_mm_storeu_ps(&signy[j], s4);
}
@ -116,7 +116,7 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
rx4 = _mm_mul_ps(x4, rcp4);
iy4 = _mm_cvttps_epi32(rx4);
pulses_sum = _mm_add_epi32(pulses_sum, iy4);
_mm_storeu_si128((__m128i*)&iy[j], iy4);
_mm_storeu_si128((__m128i*)(void*)&iy[j], iy4);
y4 = _mm_cvtepi32_ps(iy4);
xy4 = _mm_add_ps(xy4, _mm_mul_ps(x4, y4));
yy4 = _mm_add_ps(yy4, _mm_mul_ps(y4, y4));
@ -205,10 +205,10 @@ opus_val16 op_pvq_search_sse2(celt_norm *_X, int *iy, int K, int N, int arch)
{
__m128i y4;
__m128i s4;
y4 = _mm_loadu_si128((__m128i*)&iy[j]);
y4 = _mm_loadu_si128((__m128i*)(void*)&iy[j]);
s4 = _mm_castps_si128(_mm_loadu_ps(&signy[j]));
y4 = _mm_xor_si128(_mm_add_epi32(y4, s4), s4);
_mm_storeu_si128((__m128i*)&iy[j], y4);
_mm_storeu_si128((__m128i*)(void*)&iy[j], y4);
}
RESTORE_STACK;
return yy;

View File

@ -0,0 +1,47 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef _MSC_VER
# ifdef OPUS_X86_MAY_HAVE_SSE
# ifndef __SSE__
# define __SSE__
# endif
# endif
# ifdef OPUS_X86_MAY_HAVE_SSE2
# ifndef __SSE2__
# define __SSE2__
# endif
# endif
# ifdef OPUS_X86_MAY_HAVE_SSE4_1
# ifndef __SSE4_1__
# define __SSE4_1__
# endif
# endif
#endif

View File

@ -90,6 +90,26 @@ opus_val32 (*const CELT_INNER_PROD_IMPL[OPUS_ARCHMASK + 1])(
# else
#if defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)
void (*const PITCH_XCORR_IMPL[OPUS_ARCHMASK + 1])(
const float *_x,
const float *_y,
float *xcorr,
int len,
int max_pitch,
int arch
) = {
celt_pitch_xcorr_c, /* non-sse */
celt_pitch_xcorr_c,
celt_pitch_xcorr_c,
celt_pitch_xcorr_c,
MAY_HAVE_AVX2(celt_pitch_xcorr)
};
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)
void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(

View File

@ -39,7 +39,7 @@
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX) && !defined(OPUS_X86_PRESUME_AVX)))
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
#if defined(_MSC_VER)
@ -105,7 +105,7 @@ typedef struct CPU_Feature{
int HW_SSE2;
int HW_SSE41;
/* SIMD: 256-bit */
int HW_AVX;
int HW_AVX2;
} CPU_Feature;
static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
@ -121,13 +121,19 @@ static void opus_cpu_feature_check(CPU_Feature *cpu_feature)
cpu_feature->HW_SSE = (info[3] & (1 << 25)) != 0;
cpu_feature->HW_SSE2 = (info[3] & (1 << 26)) != 0;
cpu_feature->HW_SSE41 = (info[2] & (1 << 19)) != 0;
cpu_feature->HW_AVX = (info[2] & (1 << 28)) != 0;
cpu_feature->HW_AVX2 = (info[2] & (1 << 28)) != 0 && (info[2] & (1 << 12)) != 0;
if (cpu_feature->HW_AVX2 && nIds >= 7) {
cpuid(info, 7);
cpu_feature->HW_AVX2 = cpu_feature->HW_AVX2 && (info[1] & (1 << 5)) != 0;
} else {
cpu_feature->HW_AVX2 = 0;
}
}
else {
cpu_feature->HW_SSE = 0;
cpu_feature->HW_SSE2 = 0;
cpu_feature->HW_SSE41 = 0;
cpu_feature->HW_AVX = 0;
cpu_feature->HW_AVX2 = 0;
}
}
@ -157,7 +163,7 @@ static int opus_select_arch_impl(void)
}
arch++;
if (!cpu_feature.HW_AVX)
if (!cpu_feature.HW_AVX2)
{
return arch;
}

View File

@ -46,13 +46,17 @@
# define MAY_HAVE_SSE4_1(name) name ## _c
# endif
# if defined(OPUS_X86_MAY_HAVE_AVX)
# define MAY_HAVE_AVX(name) name ## _avx
# if defined(OPUS_X86_MAY_HAVE_AVX2)
# define MAY_HAVE_AVX2(name) name ## _avx2
# else
# define MAY_HAVE_AVX(name) name ## _c
# define MAY_HAVE_AVX2(name) name ## _c
# endif
# if defined(OPUS_HAVE_RTCD)
# if defined(OPUS_HAVE_RTCD) && \
((defined(OPUS_X86_MAY_HAVE_SSE) && !defined(OPUS_X86_PRESUME_SSE)) || \
(defined(OPUS_X86_MAY_HAVE_SSE2) && !defined(OPUS_X86_PRESUME_SSE2)) || \
(defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_PRESUME_SSE4_1)) || \
(defined(OPUS_X86_MAY_HAVE_AVX2) && !defined(OPUS_X86_PRESUME_AVX2)))
int opus_select_arch(void);
# endif
@ -68,6 +72,6 @@ int opus_select_arch(void);
(_mm_cvtepi8_epi32(_mm_cvtsi32_si128(OP_LOADU_EPI32(x))))
#define OP_CVTEPI16_EPI32_M64(x) \
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(x))))
(_mm_cvtepi16_epi32(_mm_loadl_epi64((__m128i *)(void*)(x))))
#endif

View File

@ -50,4 +50,5 @@ celt/mips/pitch_mipsr1.h \
celt/mips/vq_mipsr1.h \
celt/x86/pitch_sse.h \
celt/x86/vq_sse.h \
celt/x86/x86_arch_macros.h \
celt/x86/x86cpu.h

View File

@ -33,6 +33,9 @@ CELT_SOURCES_SSE4_1 = \
celt/x86/celt_lpc_sse4_1.c \
celt/x86/pitch_sse4_1.c
CELT_SOURCES_AVX2 = \
celt/x86/pitch_avx.c
CELT_SOURCES_ARM_RTCD = \
celt/arm/armcpu.c \
celt/arm/arm_celt_map.c

View File

@ -102,7 +102,10 @@ if(MINGW)
endif()
endif()
if(NOT MSVC)
if(MSVC)
# move cosmetic warnings to level 4
add_compile_options(/w44244 /w44305 /w44267)
else()
set(WARNING_LIST -Wall -W -Wstrict-prototypes -Wextra -Wcast-align -Wnested-externs -Wshadow)
include(CheckCCompilerFlag)
foreach(WARNING_FLAG ${WARNING_LIST})

View File

@ -47,10 +47,12 @@ function(check_flag NAME FLAG)
endfunction()
include(CheckIncludeFile)
# function to check if compiler supports SSE, SSE2, SSE4.1 and AVX if target
# systems may not have SSE support then use OPUS_MAY_HAVE_SSE option if target
# system is guaranteed to have SSE support then OPUS_PRESUME_SSE can be used to
# skip SSE runtime check
# This function determines if the compiler has support for SSE, SSE2, SSE4.1, AVX,
# AVX2 and FMA. Should the target systems potentially lack SSE support, the
# OPUS_MAY_HAVE_SSE option is recommended for use. If, however, the target system is
# assured to support SSE, the OPUS_PRESUME_SSE option can be employed, thus
# eliminating the necessity for an SSE runtime check.
function(opus_detect_sse COMPILER_SUPPORT_SIMD)
message(STATUS "Check SIMD support by compiler")
check_include_file(xmmintrin.h HAVE_XMMINTRIN_H) # SSE1
@ -111,20 +113,20 @@ function(opus_detect_sse COMPILER_SUPPORT_SIMD)
PARENT_SCOPE)
endif()
check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX
check_include_file(immintrin.h HAVE_IMMINTRIN_H) # AVX2
if(HAVE_IMMINTRIN_H)
if(MSVC)
check_flag(AVX /arch:AVX)
check_flag(AVX2 /arch:AVX2)
else()
check_flag(AVX -mavx)
check_flag(AVX2 -mavx2 -mfma -mavx)
endif()
else()
set(AVX_SUPPORTED
set(AVX2_SUPPORTED
0
PARENT_SCOPE)
endif()
if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX_SUPPORTED)
if(SSE1_SUPPORTED OR SSE2_SUPPORTED OR SSE4_1_SUPPORTED OR AVX2_SUPPORTED)
set(COMPILER_SUPPORT_SIMD 1 PARENT_SCOPE)
else()
message(STATUS "No SIMD support in compiler")
@ -215,7 +217,7 @@ function(get_opus_sources SOURCE_GROUP MAKE_FILE SOURCES)
if(${list_length} LESS 1)
message(
FATAL_ERROR
"No files parsed succesfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
"No files parsed successfully from ${SOURCE_GROUP} in ${MAKE_FILE}")
endif()
# remove trailing whitespaces

View File

@ -13,6 +13,8 @@ get_opus_sources(SILK_SOURCES_X86_RTCD silk_sources.mk silk_sources_x86_rtcd)
get_opus_sources(SILK_SOURCES_SSE4_1 silk_sources.mk silk_sources_sse4_1)
get_opus_sources(SILK_SOURCES_FIXED_SSE4_1 silk_sources.mk
silk_sources_fixed_sse4_1)
get_opus_sources(SILK_SOURCES_AVX2 silk_sources.mk silk_sources_avx2)
get_opus_sources(SILK_SOURCES_FLOAT_AVX2 silk_sources.mk silk_sources_float_avx2)
get_opus_sources(SILK_SOURCES_ARM_RTCD silk_sources.mk silk_sources_arm_rtcd)
get_opus_sources(SILK_SOURCES_ARM_NEON_INTR silk_sources.mk
silk_sources_arm_neon_intr)
@ -29,6 +31,7 @@ get_opus_sources(CELT_SOURCES_X86_RTCD celt_sources.mk celt_sources_x86_rtcd)
get_opus_sources(CELT_SOURCES_SSE celt_sources.mk celt_sources_sse)
get_opus_sources(CELT_SOURCES_SSE2 celt_sources.mk celt_sources_sse2)
get_opus_sources(CELT_SOURCES_SSE4_1 celt_sources.mk celt_sources_sse4_1)
get_opus_sources(CELT_SOURCES_AVX2 celt_sources.mk celt_sources_avx2)
get_opus_sources(CELT_SOURCES_ARM_RTCD celt_sources.mk celt_sources_arm_rtcd)
get_opus_sources(CELT_SOURCES_ARM_ASM celt_sources.mk celt_sources_arm_asm)
get_opus_sources(CELT_AM_SOURCES_ARM_ASM celt_sources.mk
@ -37,13 +40,30 @@ get_opus_sources(CELT_SOURCES_ARM_NEON_INTR celt_sources.mk
celt_sources_arm_neon_intr)
get_opus_sources(CELT_SOURCES_ARM_NE10 celt_sources.mk celt_sources_arm_ne10)
get_opus_sources(DEEP_PLC_HEAD lpcnet_headers.mk deep_plc_headers)
get_opus_sources(DRED_HEAD lpcnet_headers.mk dred_headers)
get_opus_sources(OSCE_HEAD lpcnet_headers.mk osce_headers)
get_opus_sources(DEEP_PLC_SOURCES lpcnet_sources.mk deep_plc_sources)
get_opus_sources(DRED_SOURCES lpcnet_sources.mk dred_sources)
get_opus_sources(OSCE_SOURCES lpcnet_sources.mk osce_sources)
get_opus_sources(DNN_SOURCES_X86_RTCD lpcnet_sources.mk dnn_sources_x86_rtcd)
get_opus_sources(DNN_SOURCES_SSE2 lpcnet_sources.mk dnn_sources_sse2)
get_opus_sources(DNN_SOURCES_SSE4_1 lpcnet_sources.mk dnn_sources_sse4_1)
get_opus_sources(DNN_SOURCES_AVX2 lpcnet_sources.mk dnn_sources_avx2)
get_opus_sources(DNN_SOURCES_NEON lpcnet_sources.mk dnn_sources_arm_neon)
get_opus_sources(DNN_SOURCES_DOTPROD lpcnet_sources.mk dnn_sources_arm_dotprod)
get_opus_sources(opus_demo_SOURCES Makefile.am opus_demo_sources)
get_opus_sources(opus_custom_demo_SOURCES Makefile.am opus_custom_demo_sources)
get_opus_sources(opus_compare_SOURCES Makefile.am opus_compare_sources)
get_opus_sources(tests_test_opus_api_SOURCES Makefile.am test_opus_api_sources)
get_opus_sources(tests_test_opus_encode_SOURCES Makefile.am
test_opus_encode_sources)
get_opus_sources(tests_test_opus_extensions_SOURCES Makefile.am
test_opus_extensions_sources)
get_opus_sources(tests_test_opus_decode_SOURCES Makefile.am
test_opus_decode_sources)
get_opus_sources(tests_test_opus_padding_SOURCES Makefile.am
test_opus_padding_sources)
get_opus_sources(tests_test_opus_dred_SOURCES Makefile.am
test_opus_dred_sources)

View File

@ -162,12 +162,39 @@ AS_IF([test "$enable_custom_modes" = "yes"],[
AM_CONDITIONAL([CUSTOM_MODES], [test "$enable_custom_modes" = "yes"])
AC_ARG_ENABLE([dred],
[AS_HELP_STRING([--enable-dred], [Use Deep REDundancy (DRED)])],,
[enable_dred=no])
AS_IF([test "$enable_dred" = "yes"],[
AC_DEFINE([ENABLE_DRED], [1], [DRED])
])
AM_CONDITIONAL([ENABLE_DRED], [test "$enable_dred" = "yes"])
AC_ARG_ENABLE([deep-plc],
[AS_HELP_STRING([--enable-deep-plc], [Use deep PLC for SILK])],,
[enable_deep_plc=no])
AS_IF([test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"],[
AC_DEFINE([ENABLE_DEEP_PLC], [1], [Deep PLC])
])
AM_CONDITIONAL([ENABLE_DEEP_PLC], [test "$enable_deep_plc" = "yes" || test "$enable_dred" = "yes" || test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
AC_ARG_ENABLE([lossgen],
[AS_HELP_STRING([--enable-lossgen], [Build opus_demo with packet loss simulator])],,
[enable_lossgen=no])
AS_IF([test "$enable_lossgen" = "yes"],[
AC_DEFINE([ENABLE_LOSSGEN], [1], [LOSSGEN])
])
AM_CONDITIONAL([ENABLE_LOSSGEN], [test "$enable_lossgen" = "yes"])
has_float_approx=no
#case "$host_cpu" in
#i[[3456]]86 | x86_64 | powerpc64 | powerpc32 | ia64)
# has_float_approx=yes
# ;;
#esac
case "$host_cpu" in
i[[3456]]86 | x86_64 | arm* | aarch64* | powerpc64 | powerpc32 | ia64)
has_float_approx=yes
;;
esac
AC_ARG_ENABLE([float-approx],
[AS_HELP_STRING([--enable-float-approx], [enable fast approximations for floating point])],
@ -202,7 +229,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
case $host_cpu in
arm*)
dnl Currently we only have asm for fixed-point
AS_IF([test "$enable_float" != "yes"],[
#AS_IF([test "$enable_float" != "yes"],[
cpu_arm=yes
AC_DEFINE([OPUS_ARM_ASM], [], [Make use of ARM asm optimization])
AS_GCC_INLINE_ASSEMBLY(
@ -316,6 +343,18 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
)
])
AC_SUBST(OPUS_ARM_MAY_HAVE_NEON)
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],[
AC_DEFINE(OPUS_ARM_MAY_HAVE_DOTPROD, 1,
[Define if compiler supports DOTPROD instructions])
AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"], [
AC_DEFINE(OPUS_ARM_PRESUME_DOTPROD, 1,
[Define if binary requires DOTPROD instruction support])
asm_optimization="$asm_optimization (DOTPROD)"
],
[rtcd_support="$rtcd_support (DOTPROD)"]
)
])
AC_SUBST(OPUS_ARM_MAY_HAVE_DOTPROD)
dnl Make sure turning on RTCD gets us at least one
dnl instruction set.
AS_IF([test x"$rtcd_support" != x""],
@ -336,7 +375,7 @@ AS_IF([test x"${enable_asm}" = x"yes"],[
[*** ARM assembly requires perl -- disabling optimizations])
asm_optimization="(missing perl dependency for ARM)"
])
])
#])
;;
esac
],[
@ -352,13 +391,14 @@ AM_CONDITIONAL([OPUS_ARM_EXTERNAL_ASM],
AM_CONDITIONAL([HAVE_SSE], [false])
AM_CONDITIONAL([HAVE_SSE2], [false])
AM_CONDITIONAL([HAVE_SSE4_1], [false])
AM_CONDITIONAL([HAVE_AVX], [false])
AM_CONDITIONAL([HAVE_AVX2], [false])
m4_define([DEFAULT_X86_SSE_CFLAGS], [-msse])
m4_define([DEFAULT_X86_SSE2_CFLAGS], [-msse2])
m4_define([DEFAULT_X86_SSE4_1_CFLAGS], [-msse4.1])
m4_define([DEFAULT_X86_AVX_CFLAGS], [-mavx])
m4_define([DEFAULT_X86_AVX2_CFLAGS], [-mavx -mfma -mavx2])
m4_define([DEFAULT_ARM_NEON_INTR_CFLAGS], [-mfpu=neon])
m4_define([DEFAULT_ARM_DOTPROD_INTR_CFLAGS], ["-march=armv8.2-a+dotprod"])
# With GCC on ARM32 softfp architectures (e.g. Android, or older Ubuntu) you need to specify
# -mfloat-abi=softfp for -mfpu=neon to work. However, on ARM32 hardfp architectures (e.g. newer Ubuntu),
# this option will break things.
@ -374,14 +414,16 @@ AS_CASE([$host],
AC_ARG_VAR([X86_SSE_CFLAGS], [C compiler flags to compile SSE intrinsics @<:@default=]DEFAULT_X86_SSE_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE2_CFLAGS], [C compiler flags to compile SSE2 intrinsics @<:@default=]DEFAULT_X86_SSE2_CFLAGS[@:>@])
AC_ARG_VAR([X86_SSE4_1_CFLAGS], [C compiler flags to compile SSE4.1 intrinsics @<:@default=]DEFAULT_X86_SSE4_1_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX_CFLAGS], [C compiler flags to compile AVX intrinsics @<:@default=]DEFAULT_X86_AVX_CFLAGS[@:>@])
AC_ARG_VAR([X86_AVX2_CFLAGS], [C compiler flags to compile AVX2 intrinsics @<:@default=]DEFAULT_X86_AVX2_CFLAGS[@:>@])
AC_ARG_VAR([ARM_NEON_INTR_CFLAGS], [C compiler flags to compile ARM NEON intrinsics @<:@default=]DEFAULT_ARM_NEON_INTR_CFLAGS / DEFAULT_ARM_NEON_SOFTFP_INTR_CFLAGS[@:>@])
AC_ARG_VAR([ARM_DOTPROD_INTR_CFLAGS], [C compiler flags to compile ARM DOTPROD intrinsics @<:@default=]DEFAULT_ARM_DOTPROD_INTR_CFLAGS[@:>@])
AS_VAR_SET_IF([X86_SSE_CFLAGS], [], [AS_VAR_SET([X86_SSE_CFLAGS], "DEFAULT_X86_SSE_CFLAGS")])
AS_VAR_SET_IF([X86_SSE2_CFLAGS], [], [AS_VAR_SET([X86_SSE2_CFLAGS], "DEFAULT_X86_SSE2_CFLAGS")])
AS_VAR_SET_IF([X86_SSE4_1_CFLAGS], [], [AS_VAR_SET([X86_SSE4_1_CFLAGS], "DEFAULT_X86_SSE4_1_CFLAGS")])
AS_VAR_SET_IF([X86_AVX_CFLAGS], [], [AS_VAR_SET([X86_AVX_CFLAGS], "DEFAULT_X86_AVX_CFLAGS")])
AS_VAR_SET_IF([X86_AVX2_CFLAGS], [], [AS_VAR_SET([X86_AVX2_CFLAGS], "DEFAULT_X86_AVX2_CFLAGS")])
AS_VAR_SET_IF([ARM_NEON_INTR_CFLAGS], [], [AS_VAR_SET([ARM_NEON_INTR_CFLAGS], ["$RESOLVED_DEFAULT_ARM_NEON_INTR_CFLAGS"])])
AS_VAR_SET_IF([ARM_DOTPROD_INTR_CFLAGS], [], [AS_VAR_SET([ARM_DOTPROD_INTR_CFLAGS], ["DEFAULT_ARM_DOTPROD_INTR_CFLAGS"])])
AC_DEFUN([OPUS_PATH_NE10],
[
@ -525,6 +567,46 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
intrinsics_support="$intrinsics_support (NEON [Aarch64])"
])
OPUS_CHECK_INTRINSICS(
[Aarch64 dotprod],
[$ARM_DOTPROD_INTR_CFLAGS],
[OPUS_ARM_MAY_HAVE_DOTPROD],
[OPUS_ARM_PRESUME_DOTPROD],
[[#include <arm_neon.h>
]],
[[
static int32x4_t acc;
static int8x16_t a, b;
acc = vdotq_s32(acc, a, b);
]]
)
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
[
OPUS_ARM_DOTPROD_INTR_CFLAGS="$ARM_NEON_DOTPROD_CFLAGS"
AC_SUBST([OPUS_ARM_DOTPROD_INTR_CFLAGS])
]
)
AS_IF([test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"],
[
AC_DEFINE([OPUS_ARM_MAY_HAVE_DOTPROD], 1, [Compiler supports Aarch64 DOTPROD Intrinsics])
intrinsics_support="$intrinsics_support (DOTPROD)"
AS_IF([test x"$OPUS_ARM_PRESUME_DOTPROD" = x"1"],
[
AC_DEFINE([OPUS_ARM_PRESUME_DOTPROD], 1, [Define if binary requires Aarch64 dotprod Intrinsics])
intrinsics_support="$intrinsics_support (DOTPROD [Aarch64])"
])
AS_IF([test x"$enable_rtcd" != x"no" && test x"$OPUS_ARM_PRESUME_DOTPROD" != x"1"],
[AS_IF([test x"$rtcd_support" = x"no"],
[rtcd_support="ARM (DOTPROD Intrinsics)"],
[rtcd_support="$rtcd_support (DOTPROD Intrinsics)"])])
]
)
AS_IF([test x"$intrinsics_support" = x""],
[intrinsics_support=no],
[intrinsics_support="ARM$intrinsics_support"])
@ -601,24 +683,24 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
]
)
OPUS_CHECK_INTRINSICS(
[AVX],
[$X86_AVX_CFLAGS],
[OPUS_X86_MAY_HAVE_AVX],
[OPUS_X86_PRESUME_AVX],
[AVX2],
[$X86_AVX2_CFLAGS],
[OPUS_X86_MAY_HAVE_AVX2],
[OPUS_X86_PRESUME_AVX2],
[[#include <immintrin.h>
#include <time.h>
]],
[[
__m256 mtest;
mtest = _mm256_set1_ps((float)time(NULL));
mtest = _mm256_addsub_ps(mtest, mtest);
return _mm_cvtss_si32(_mm256_extractf128_ps(mtest, 0));
mtest = _mm256_fmadd_ps(mtest, mtest, mtest);
return _mm256_extract_epi16(_mm256_cvttps_epi32(mtest), 0);
]]
)
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1" && test x"$OPUS_X86_PRESUME_AVX" != x"1"],
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1" && test x"$OPUS_X86_PRESUME_AVX2" != x"1"],
[
OPUS_X86_AVX_CFLAGS="$X86_AVX_CFLAGS"
AC_SUBST([OPUS_X86_AVX_CFLAGS])
OPUS_X86_AVX2_CFLAGS="$X86_AVX2_CFLAGS"
AC_SUBST([OPUS_X86_AVX2_CFLAGS])
]
)
AS_IF([test x"$rtcd_support" = x"no"], [rtcd_support=""])
@ -660,17 +742,17 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
[
AC_MSG_WARN([Compiler does not support SSE4.1 intrinsics])
])
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"],
AS_IF([test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"],
[
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX], 1, [Compiler supports X86 AVX Intrinsics])
intrinsics_support="$intrinsics_support AVX"
AC_DEFINE([OPUS_X86_MAY_HAVE_AVX2], 1, [Compiler supports X86 AVX2 Intrinsics])
intrinsics_support="$intrinsics_support AVX2"
AS_IF([test x"$OPUS_X86_PRESUME_AVX" = x"1"],
[AC_DEFINE([OPUS_X86_PRESUME_AVX], 1, [Define if binary requires AVX intrinsics support])],
[rtcd_support="$rtcd_support AVX"])
AS_IF([test x"$OPUS_X86_PRESUME_AVX2" = x"1"],
[AC_DEFINE([OPUS_X86_PRESUME_AVX2], 1, [Define if binary requires AVX2 intrinsics support])],
[rtcd_support="$rtcd_support AVX2"])
],
[
AC_MSG_WARN([Compiler does not support AVX intrinsics])
AC_MSG_WARN([Compiler does not support AVX2 intrinsics])
])
AS_IF([test x"$intrinsics_support" = x""],
@ -742,6 +824,8 @@ AS_IF([test x"$enable_intrinsics" = x"yes"],[
])
AM_CONDITIONAL([CPU_ARM], [test "$cpu_arm" = "yes"])
AM_CONDITIONAL([HAVE_ARM_DOTPROD],
[test x"$OPUS_ARM_MAY_HAVE_DOTPROD" = x"1"])
AM_CONDITIONAL([HAVE_ARM_NEON_INTR],
[test x"$OPUS_ARM_MAY_HAVE_NEON_INTR" = x"1"])
AM_CONDITIONAL([HAVE_ARM_NE10],
@ -753,8 +837,8 @@ AM_CONDITIONAL([HAVE_SSE2],
[test x"$OPUS_X86_MAY_HAVE_SSE2" = x"1"])
AM_CONDITIONAL([HAVE_SSE4_1],
[test x"$OPUS_X86_MAY_HAVE_SSE4_1" = x"1"])
AM_CONDITIONAL([HAVE_AVX],
[test x"$OPUS_X86_MAY_HAVE_AVX" = x"1"])
AM_CONDITIONAL([HAVE_AVX2],
[test x"$OPUS_X86_MAY_HAVE_AVX2" = x"1"])
AM_CONDITIONAL([HAVE_RTCD],
[test x"$enable_rtcd" = x"yes" -a x"$rtcd_support" != x"no"])
@ -813,6 +897,47 @@ AS_IF([test "$enable_doc" = "yes"], [
HAVE_DOXYGEN=no
])
AC_ARG_ENABLE([dot-product],
AS_HELP_STRING([--disable-dot-product], [Disable dot product implementation]),,
enable_dot_product=yes)
AS_IF([test "$enable_dot_product" = "no"], [
AC_DEFINE([DISABLE_DOT_PROD], [1], [Disable dot product instructions])
])
AC_ARG_ENABLE([dnn-debug-float],
AS_HELP_STRING([--enable-dnn-debug-float], [Use floating-point DNN computation everywhere]),,
enable_dnn_debug_float=no)
AS_IF([test "$enable_dnn_debug_float" = "no"], [
AC_DEFINE([DISABLE_DEBUG_FLOAT], [1], [Disable DNN debug float])
])
AC_ARG_ENABLE([osce-training-data],
AS_HELP_STRING([--enable-osce-training-data], [enables feature output for SILK enhancement]),,
[enable_osc_training_data=no]
)
AS_IF([test "$enable_osce_training_data" = "yes"], [
AC_DEFINE([ENABLE_OSCE_TRAINING_DATA], [1], [Enable dumping of OSCE training data])
])
AC_MSG_CHECKING([argument osce training data])
AS_IF([test "$enable_osce_training_data" = "yes"], [
AC_MSG_RESULT([yes])
], [AC_MSG_RESULT([no])])
AC_ARG_ENABLE([osce],
AS_HELP_STRING([--enable-osce], [enables feature output for SILK enhancement]),,
[enable_osce=no]
)
AS_IF([test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"], [
AC_DEFINE([ENABLE_OSCE], [1], [Enable Opus Speech Coding Enhancement])
])
AM_CONDITIONAL([ENABLE_OSCE], [test "$enable_osce" = "yes" || test "$enable_osce_training_data" = "yes"])
AM_CONDITIONAL([HAVE_DOXYGEN], [test "$HAVE_DOXYGEN" = "yes"])
AC_ARG_ENABLE([extra-programs],

24
dnn/LPCNet.yml Normal file
View File

@ -0,0 +1,24 @@
#
# install
# conda env create -f=LPCNet.yml
#
# update
# conda env update -f=LPCNet.yml
#
# activate
# conda activate LPCNet
#
# remove
# conda remove --name LPCNet --all
#
name: LPCNet
channels:
- anaconda
- conda-forge
dependencies:
- keras==2.2.4
- python>=3.6
- tensorflow-gpu==1.12.0
- cudatoolkit
- h5py
- numpy

1
dnn/README Normal file
View File

@ -0,0 +1 @@
See README.md

126
dnn/README.md Normal file
View File

@ -0,0 +1,126 @@
# LPCNet
Low complexity implementation of the WaveRNN-based LPCNet algorithm, as described in:
- J.-M. Valin, J. Skoglund, [LPCNet: Improving Neural Speech Synthesis Through Linear Prediction](https://jmvalin.ca/papers/lpcnet_icassp2019.pdf), *Proc. International Conference on Acoustics, Speech and Signal Processing (ICASSP)*, arXiv:1810.11846, 2019.
- J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [Neural Speech Synthesis on a Shoestring: Improving the Efficiency of LPCNet](https://jmvalin.ca/papers/improved_lpcnet.pdf), *Proc. ICASSP*, arxiv:2106.04129, 2022.
- K. Subramani, J.-M. Valin, U. Isik, P. Smaragdis, A. Krishnaswamy, [End-to-end LPCNet: A Neural Vocoder With Fully-Differentiable LPC Estimation](https://jmvalin.ca/papers/lpcnet_end2end.pdf), *Proc. INTERSPEECH*, arxiv:2106.04129, 2022.
For coding/PLC applications of LPCNet, see:
- J.-M. Valin, J. Skoglund, [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://jmvalin.ca/papers/lpcnet_codec.pdf), *Proc. INTERSPEECH*, arxiv:1903.12087, 2019.
- J. Skoglund, J.-M. Valin, [Improving Opus Low Bit Rate Quality with Neural Speech Synthesis](https://jmvalin.ca/papers/opusnet.pdf), *Proc. INTERSPEECH*, arxiv:1905.04628, 2020.
- J.-M. Valin, A. Mustafa, C. Montgomery, T.B. Terriberry, M. Klingbeil, P. Smaragdis, A. Krishnaswamy, [Real-Time Packet Loss Concealment With Mixed Generative and Predictive Model](https://jmvalin.ca/papers/lpcnet_plc.pdf), *Proc. INTERSPEECH*, arxiv:2205.05785, 2022.
- J.-M. Valin, J. Büthe, A. Mustafa, [Low-Bitrate Redundancy Coding of Speech Using a Rate-Distortion-Optimized Variational Autoencoder](https://jmvalin.ca/papers/valin_dred.pdf), *Proc. ICASSP*, arXiv:2212.04453, 2023. ([blog post](https://www.amazon.science/blog/neural-encoding-enables-more-efficient-recovery-of-lost-audio-packets))
# Introduction
Work in progress software for researching low CPU complexity algorithms for speech synthesis and compression by applying Linear Prediction techniques to WaveRNN. High quality speech can be synthesised on regular CPUs (around 3 GFLOP) with SIMD support (SSE2, SSSE3, AVX, AVX2/FMA, NEON currently supported). The code also supports very low bitrate compression at 1.6 kb/s.
The BSD licensed software is written in C and Python/Keras. For training, a GTX 1080 Ti or better is recommended.
This software is an open source starting point for LPCNet/WaveRNN-based speech synthesis and coding.
# Using the existing software
You can build the code using:
```
./autogen.sh
./configure
make
```
Note that the autogen.sh script is used when building from Git and will automatically download the latest model
(models are too large to put in Git). By default, LPCNet will attempt to use 8-bit dot product instructions on AVX\*/Neon to
speed up inference. To disable that (e.g. to avoid quantization effects when retraining), add --disable-dot-product to the
configure script. LPCNet does not yet have a complete implementation for some of the integer operations on the ARMv7
architecture so for now you will also need --disable-dot-product to successfully compile on 32-bit ARM.
It is highly recommended to set the CFLAGS environment variable to enable AVX or NEON *prior* to running configure, otherwise
no vectorization will take place and the code will be very slow. On a recent x86 CPU, something like
```
export CFLAGS='-Ofast -g -march=native'
```
should work. On ARM, you can enable Neon with:
```
export CFLAGS='-Ofast -g -mfpu=neon'
```
While not strictly required, the -Ofast flag will help with auto-vectorization, especially for dot products that
cannot be optimized without -ffast-math (which -Ofast enables). Additionally, -falign-loops=32 has been shown to
help on x86.
You can test the capabilities of LPCNet using the lpcnet\_demo application. To encode a file:
```
./lpcnet_demo -encode input.pcm compressed.bin
```
where input.pcm is a 16-bit (machine endian) PCM file sampled at 16 kHz. The raw compressed data (no header)
is written to compressed.bin and consists of 8 bytes per 40-ms packet.
To decode:
```
./lpcnet_demo -decode compressed.bin output.pcm
```
where output.pcm is also 16-bit, 16 kHz PCM.
Alternatively, you can run the uncompressed analysis/synthesis using -features
instead of -encode and -synthesis instead of -decode.
The same functionality is available in the form of a library. See include/lpcnet.h for the API.
To try packet loss concealment (PLC), you first need a PLC model, which you can get with:
```
./download_model.sh plc-3b1eab4
```
or (for the PLC challenge submission):
```
./download_model.sh plc_challenge
```
PLC can be tested with:
```
./lpcnet_demo -plc_file noncausal_dc error_pattern.txt input.pcm output.pcm
```
where error_pattern.txt is a text file with one entry per 20-ms packet, with 1 meaning "packet lost" and 0 meaning "packet not lost".
noncausal_dc is the non-causal (5-ms look-ahead) with special handling for DC offsets. It's also possible to use "noncausal", "causal",
or "causal_dc".
# Training a new model
This codebase is also meant for research and it is possible to train new models. These are the steps to do that:
1. Set up a Keras system with GPU.
1. Generate training data:
```
./dump_data -train input.s16 features.f32 data.s16
```
where the first file contains 16 kHz 16-bit raw PCM audio (no header) and the other files are output files. This program makes several passes over the data with different filters to generate a large amount of training data.
1. Now that you have your files, train with:
```
python3 training_tf2/train_lpcnet.py features.f32 data.s16 model_name
```
and it will generate an h5 file for each iteration, with model\_name as prefix. If it stops with a
"Failed to allocate RNN reserve space" message try specifying a smaller --batch-size for train\_lpcnet.py.
1. You can synthesise speech with Python and your GPU card (very slow):
```
./dump_data -test test_input.s16 test_features.f32
./training_tf2/test_lpcnet.py lpcnet_model_name.h5 test_features.f32 test.s16
```
1. Or with C on a CPU (C inference is much faster):
First extract the model files nnet\_data.h and nnet\_data.c
```
./training_tf2/dump_lpcnet.py lpcnet_model_name.h5
```
and move the generated nnet\_data.\* files to the src/ directory.
Then you just need to rebuild the software and use lpcnet\_demo as explained above.
# Speech Material for Training
Suitable training material can be obtained from [Open Speech and Language Resources](https://www.openslr.org/). See the datasets.txt file for details on suitable training data.
# Reading Further
1. [LPCNet: DSP-Boosted Neural Speech Synthesis](https://people.xiph.org/~jm/demo/lpcnet/)
1. [A Real-Time Wideband Neural Vocoder at 1.6 kb/s Using LPCNet](https://people.xiph.org/~jm/demo/lpcnet_codec/)
1. Sample model files (check compatibility): https://media.xiph.org/lpcnet/data/

449
dnn/adaconvtest.c Normal file
View File

@ -0,0 +1,449 @@
#include "lace_data.h"
#include "nolace_data.h"
#include "osce.h"
#include "nndsp.h"
#include <stdlib.h>
#include <stdio.h>
#include <math.h>
extern const WeightArray lacelayers_arrays[];
extern const WeightArray nolacelayers_arrays[];
void adaconv_compare(
const char * prefix,
int num_frames,
AdaConvState* hAdaConv,
LinearLayer *kernel_layer,
LinearLayer *gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain
)
{
char feature_file[256];
char x_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
float window[40];
init_adaconv_state(hAdaConv);
compute_overlap_window(window, 40);
FILE *f_features, *f_x_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size * in_channels, f_x_in) != frame_size * in_channels)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size * out_channels, f_x_out) != frame_size * out_channels)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adaconv_process_frame(hAdaConv, x_out, x_in, features, kernel_layer, gain_layer, feature_dim,
frame_size, overlap_size, in_channels, out_channels, kernel_size, left_padding,
filter_gain_a, filter_gain_b, shape_gain, window, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size * out_channels; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size * out_channels));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
void adacomb_compare(
const char * prefix,
int num_frames,
AdaCombState* hAdaComb,
LinearLayer *kernel_layer,
LinearLayer *gain_layer,
LinearLayer *global_gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit
)
{
char feature_file[256];
char x_in_file[256];
char p_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
int pitch_lag;
float window[40];
init_adacomb_state(hAdaComb);
compute_overlap_window(window, 40);
FILE *f_features, *f_x_in, *f_p_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(p_in_file, prefix);
strcat(p_in_file, "_p_in.s32");
f_p_in = fopen(p_in_file, "r");
if (f_p_in == NULL)
{
sprintf(message, "could not open file %s", p_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(&pitch_lag, sizeof(int), 1, f_p_in) != 1)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, p_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adacomb_process_frame(hAdaComb, x_out, x_in, features, kernel_layer, gain_layer, global_gain_layer,
pitch_lag, feature_dim, frame_size, overlap_size, kernel_size, left_padding, filter_gain_a, filter_gain_b, log_gain_limit, window, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
void adashape_compare(
const char * prefix,
int num_frames,
AdaShapeState* hAdaShape,
LinearLayer *alpha1,
LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k
)
{
char feature_file[256];
char x_in_file[256];
char x_out_file[256];
char message[512];
int i_frame, i_sample;
float mse;
float features[512];
float x_in[512];
float x_out_ref[512];
float x_out[512];
init_adashape_state(hAdaShape);
FILE *f_features, *f_x_in, *f_x_out;
strcpy(feature_file, prefix);
strcat(feature_file, "_features.f32");
f_features = fopen(feature_file, "r");
if (f_features == NULL)
{
sprintf(message, "could not open file %s", feature_file);
perror(message);
exit(1);
}
strcpy(x_in_file, prefix);
strcat(x_in_file, "_x_in.f32");
f_x_in = fopen(x_in_file, "r");
if (f_x_in == NULL)
{
sprintf(message, "could not open file %s", x_in_file);
perror(message);
exit(1);
}
strcpy(x_out_file, prefix);
strcat(x_out_file, "_x_out.f32");
f_x_out = fopen(x_out_file, "r");
if (f_x_out == NULL)
{
sprintf(message, "could not open file %s", x_out_file);
perror(message);
exit(1);
}
for (i_frame = 0; i_frame < num_frames; i_frame ++)
{
if (fread(features, sizeof(float), feature_dim, f_features) != feature_dim)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, feature_file);
exit(1);
}
if (fread(x_in, sizeof(float), frame_size, f_x_in) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_in_file);
exit(1);
}
if (fread(x_out_ref, sizeof(float), frame_size, f_x_out) != frame_size)
{
fprintf(stderr, "could not read frame %d from %s\n", i_frame, x_out_file);
exit(1);
}
adashape_process_frame(hAdaShape, x_out, x_in, features, alpha1, alpha2, feature_dim,
frame_size, avg_pool_k, 0);
mse = 0;
for (i_sample = 0; i_sample < frame_size; i_sample ++)
{
mse += pow(x_out_ref[i_sample] - x_out[i_sample], 2);
}
mse = sqrt(mse / (frame_size));
printf("rmse[%d] %f\n", i_frame, mse);
}
}
int main()
{
LACELayers hLACE;
NOLACELayers hNoLACE;
AdaConvState hAdaConv;
AdaCombState hAdaComb;
AdaShapeState hAdaShape;
init_adaconv_state(&hAdaConv);
init_lacelayers(&hLACE, lacelayers_arrays);
init_nolacelayers(&hNoLACE, nolacelayers_arrays);
printf("\ntesting lace.af1 (1 in, 1 out)...\n");
adaconv_compare(
"testvectors/lace_af1",
5,
&hAdaConv,
&hLACE.lace_af1_kernel,
&hLACE.lace_af1_gain,
LACE_AF1_FEATURE_DIM,
LACE_AF1_FRAME_SIZE,
LACE_AF1_OVERLAP_SIZE,
LACE_AF1_IN_CHANNELS,
LACE_AF1_OUT_CHANNELS,
LACE_AF1_KERNEL_SIZE,
LACE_AF1_LEFT_PADDING,
LACE_AF1_FILTER_GAIN_A,
LACE_AF1_FILTER_GAIN_B,
LACE_AF1_SHAPE_GAIN
);
printf("\ntesting nolace.af1 (1 in, 2 out)...\n");
adaconv_compare(
"testvectors/nolace_af1",
5,
&hAdaConv,
&hNoLACE.nolace_af1_kernel,
&hNoLACE.nolace_af1_gain,
NOLACE_AF1_FEATURE_DIM,
NOLACE_AF1_FRAME_SIZE,
NOLACE_AF1_OVERLAP_SIZE,
NOLACE_AF1_IN_CHANNELS,
NOLACE_AF1_OUT_CHANNELS,
NOLACE_AF1_KERNEL_SIZE,
NOLACE_AF1_LEFT_PADDING,
NOLACE_AF1_FILTER_GAIN_A,
NOLACE_AF1_FILTER_GAIN_B,
NOLACE_AF1_SHAPE_GAIN
);
printf("testing nolace.af4 (2 in, 1 out)...\n");
adaconv_compare(
"testvectors/nolace_af4",
5,
&hAdaConv,
&hNoLACE.nolace_af4_kernel,
&hNoLACE.nolace_af4_gain,
NOLACE_AF4_FEATURE_DIM,
NOLACE_AF4_FRAME_SIZE,
NOLACE_AF4_OVERLAP_SIZE,
NOLACE_AF4_IN_CHANNELS,
NOLACE_AF4_OUT_CHANNELS,
NOLACE_AF4_KERNEL_SIZE,
NOLACE_AF4_LEFT_PADDING,
NOLACE_AF4_FILTER_GAIN_A,
NOLACE_AF4_FILTER_GAIN_B,
NOLACE_AF4_SHAPE_GAIN
);
printf("\ntesting nolace.af2 (2 in, 2 out)...\n");
adaconv_compare(
"testvectors/nolace_af2",
5,
&hAdaConv,
&hNoLACE.nolace_af2_kernel,
&hNoLACE.nolace_af2_gain,
NOLACE_AF2_FEATURE_DIM,
NOLACE_AF2_FRAME_SIZE,
NOLACE_AF2_OVERLAP_SIZE,
NOLACE_AF2_IN_CHANNELS,
NOLACE_AF2_OUT_CHANNELS,
NOLACE_AF2_KERNEL_SIZE,
NOLACE_AF2_LEFT_PADDING,
NOLACE_AF2_FILTER_GAIN_A,
NOLACE_AF2_FILTER_GAIN_B,
NOLACE_AF2_SHAPE_GAIN
);
printf("\ntesting lace.cf1...\n");
adacomb_compare(
"testvectors/lace_cf1",
5,
&hAdaComb,
&hLACE.lace_cf1_kernel,
&hLACE.lace_cf1_gain,
&hLACE.lace_cf1_global_gain,
LACE_CF1_FEATURE_DIM,
LACE_CF1_FRAME_SIZE,
LACE_CF1_OVERLAP_SIZE,
LACE_CF1_KERNEL_SIZE,
LACE_CF1_LEFT_PADDING,
LACE_CF1_FILTER_GAIN_A,
LACE_CF1_FILTER_GAIN_B,
LACE_CF1_LOG_GAIN_LIMIT
);
printf("\ntesting nolace.tdshape1...\n");
adashape_compare(
"testvectors/nolace_tdshape1",
5,
&hAdaShape,
&hNoLACE.nolace_tdshape1_alpha1,
&hNoLACE.nolace_tdshape1_alpha2,
NOLACE_TDSHAPE1_FEATURE_DIM,
NOLACE_TDSHAPE1_FRAME_SIZE,
NOLACE_TDSHAPE1_AVG_POOL_K
);
return 0;
}
/* gcc -DVAR_ARRAYS -DENABLE_OSCE -I ../include -I ../silk -I . -I ../celt adaconvtest.c nndsp.c lace_data.c nolace_data.c nnet.c nnet_default.c ../celt/pitch.c ../celt/celt_lpc.c parse_lpcnet_weights.c -lm -o adaconvtest */

88
dnn/arm/arm_dnn_map.c Normal file
View File

@ -0,0 +1,88 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "arm/armcpu.h"
#include "nnet.h"
#if defined(OPUS_HAVE_RTCD)
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) && !defined(OPUS_ARM_PRESUME_DOTPROD))
void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
) = {
compute_linear_c, /* default */
compute_linear_c,
compute_linear_c,
MAY_HAVE_NEON(compute_linear), /* neon */
MAY_HAVE_DOTPROD(compute_linear) /* dotprod */
};
#endif
#if (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON)) && !defined(OPUS_ARM_PRESUME_NEON)
void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
) = {
compute_activation_c, /* default */
compute_activation_c,
compute_activation_c,
MAY_HAVE_NEON(compute_activation), /* neon */
MAY_HAVE_DOTPROD(compute_activation) /* dotprod */
};
void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
) = {
compute_conv2d_c, /* default */
compute_conv2d_c,
compute_conv2d_c,
MAY_HAVE_NEON(compute_conv2d), /* neon */
MAY_HAVE_DOTPROD(compute_conv2d) /* dotprod */
};
#endif
#endif

104
dnn/arm/dnn_arm.h Normal file
View File

@ -0,0 +1,104 @@
/* Copyright (c) 2011-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DNN_ARM_H
#define DNN_ARM_H
#include "cpu_support.h"
#include "opus_types.h"
void compute_linear_dotprod(const LinearLayer *linear, float *out, const float *in);
void compute_linear_neon(const LinearLayer *linear, float *out, const float *in);
void compute_activation_neon(float *output, const float *input, int N, int activation);
void compute_activation_dotprod(float *output, const float *input, int N, int activation);
void compute_conv2d_neon(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
void compute_conv2d_dotprod(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#if defined(OPUS_ARM_PRESUME_DOTPROD)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_dotprod(linear, out, in))
#elif defined(OPUS_ARM_PRESUME_NEON_INTR) && !defined(OPUS_ARM_MAY_HAVE_DOTPROD)
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_neon(linear, out, in))
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
extern void (*const DNN_COMPUTE_LINEAR_IMPL[OPUS_ARCHMASK + 1])(
const LinearLayer *linear,
float *out,
const float *in
);
#define OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) \
((*DNN_COMPUTE_LINEAR_IMPL[(arch) & OPUS_ARCHMASK])(linear, out, in))
#endif
#if defined(OPUS_ARM_PRESUME_NEON)
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_neon(output, input, N, activation))
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_neon(conv, out, mem, in, height, hstride, activation))
#elif defined(OPUS_HAVE_RTCD) && (defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON))
extern void (*const DNN_COMPUTE_ACTIVATION_IMPL[OPUS_ARCHMASK + 1])(
float *output,
const float *input,
int N,
int activation
);
#define OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) \
((*DNN_COMPUTE_ACTIVATION_IMPL[(arch) & OPUS_ARCHMASK])(output, input, N, activation))
extern void (*const DNN_COMPUTE_CONV2D_IMPL[OPUS_ARCHMASK + 1])(
const Conv2dLayer *conv,
float *out,
float *mem,
const float *in,
int height,
int hstride,
int activation
);
#define OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) \
((*DNN_COMPUTE_CONV2D_IMPL[(arch) & OPUS_ARCHMASK])(conv, out, mem, in, height, hstride, activation))
#endif
#endif /* DNN_ARM_H */

38
dnn/arm/nnet_dotprod.c Normal file
View File

@ -0,0 +1,38 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#ifndef __ARM_FEATURE_DOTPROD
#error nnet_dotprod.c is being compiled without DOTPROD enabled
#endif
#define RTCD_ARCH dotprod
#include "nnet_arch.h"

38
dnn/arm/nnet_neon.c Normal file
View File

@ -0,0 +1,38 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#if !(defined(__ARM_NEON__) || defined(__ARM_NEON))
#error nnet_neon.c is being compiled without Neon enabled
#endif
#define RTCD_ARCH neon
#include "nnet_arch.h"

245
dnn/burg.c Normal file
View File

@ -0,0 +1,245 @@
/***********************************************************************
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of Internet Society, IETF or IETF Trust, nor the
names of specific contributors, may be used to endorse or promote
products derived from this software without specific prior written
permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <string.h>
#include <assert.h>
#include "burg.h"
#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/
#define SILK_MAX_ORDER_LPC 16
#define FIND_LPC_COND_FAC 1e-5f
/* sum of squares of a silk_float array, with result as double */
static double silk_energy_FLP(
const float *data,
int dataSize
)
{
int i;
double result;
/* 4x unrolled loop */
result = 0.0;
for( i = 0; i < dataSize - 3; i += 4 ) {
result += data[ i + 0 ] * (double)data[ i + 0 ] +
data[ i + 1 ] * (double)data[ i + 1 ] +
data[ i + 2 ] * (double)data[ i + 2 ] +
data[ i + 3 ] * (double)data[ i + 3 ];
}
/* add any remaining products */
for( ; i < dataSize; i++ ) {
result += data[ i ] * (double)data[ i ];
}
assert( result >= 0.0 );
return result;
}
/* inner product of two silk_float arrays, with result as double */
static double silk_inner_product_FLP(
const float *data1,
const float *data2,
int dataSize
)
{
int i;
double result;
/* 4x unrolled loop */
result = 0.0;
for( i = 0; i < dataSize - 3; i += 4 ) {
result += data1[ i + 0 ] * (double)data2[ i + 0 ] +
data1[ i + 1 ] * (double)data2[ i + 1 ] +
data1[ i + 2 ] * (double)data2[ i + 2 ] +
data1[ i + 3 ] * (double)data2[ i + 3 ];
}
/* add any remaining products */
for( ; i < dataSize; i++ ) {
result += data1[ i ] * (double)data2[ i ];
}
return result;
}
/* Compute reflection coefficients from input signal */
float silk_burg_analysis( /* O returns residual energy */
float A[], /* O prediction coefficients (length order) */
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
const float minInvGain, /* I minimum inverse prediction gain */
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const int nb_subfr, /* I number of subframes stacked in x */
const int D /* I order */
)
{
int k, n, s, reached_max_gain;
double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2;
const float *x_ptr;
double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ];
double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ];
double Af[ SILK_MAX_ORDER_LPC ];
assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE );
/* Compute autocorrelations, added over subframes */
C0 = silk_energy_FLP( x, nb_subfr * subfr_length );
memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) );
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
for( n = 1; n < D + 1; n++ ) {
C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n );
}
}
memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) );
/* Initialize */
CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f;
invGain = 1.0f;
reached_max_gain = 0;
for( n = 0; n < D; n++ ) {
/* Update first row of correlation matrix (without first element) */
/* Update last row of correlation matrix (without last element, stored in reversed order) */
/* Update C * Af */
/* Update C * flipud(Af) (stored in reversed order) */
for( s = 0; s < nb_subfr; s++ ) {
x_ptr = x + s * subfr_length;
tmp1 = x_ptr[ n ];
tmp2 = x_ptr[ subfr_length - n - 1 ];
for( k = 0; k < n; k++ ) {
C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ];
C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ];
Atmp = Af[ k ];
tmp1 += x_ptr[ n - k - 1 ] * Atmp;
tmp2 += x_ptr[ subfr_length - n + k ] * Atmp;
}
for( k = 0; k <= n; k++ ) {
CAf[ k ] -= tmp1 * x_ptr[ n - k ];
CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ];
}
}
tmp1 = C_first_row[ n ];
tmp2 = C_last_row[ n ];
for( k = 0; k < n; k++ ) {
Atmp = Af[ k ];
tmp1 += C_last_row[ n - k - 1 ] * Atmp;
tmp2 += C_first_row[ n - k - 1 ] * Atmp;
}
CAf[ n + 1 ] = tmp1;
CAb[ n + 1 ] = tmp2;
/* Calculate nominator and denominator for the next order reflection (parcor) coefficient */
num = CAb[ n + 1 ];
nrg_b = CAb[ 0 ];
nrg_f = CAf[ 0 ];
for( k = 0; k < n; k++ ) {
Atmp = Af[ k ];
num += CAb[ n - k ] * Atmp;
nrg_b += CAb[ k + 1 ] * Atmp;
nrg_f += CAf[ k + 1 ] * Atmp;
}
assert( nrg_f > 0.0 );
assert( nrg_b > 0.0 );
/* Calculate the next order reflection (parcor) coefficient */
rc = -2.0 * num / ( nrg_f + nrg_b );
assert( rc > -1.0 && rc < 1.0 );
/* Update inverse prediction gain */
tmp1 = invGain * ( 1.0 - rc * rc );
if( tmp1 <= minInvGain ) {
/* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */
rc = sqrt( 1.0 - minInvGain / invGain );
if( num > 0 ) {
/* Ensure adjusted reflection coefficients has the original sign */
rc = -rc;
}
invGain = minInvGain;
reached_max_gain = 1;
} else {
invGain = tmp1;
}
/* Update the AR coefficients */
for( k = 0; k < (n + 1) >> 1; k++ ) {
tmp1 = Af[ k ];
tmp2 = Af[ n - k - 1 ];
Af[ k ] = tmp1 + rc * tmp2;
Af[ n - k - 1 ] = tmp2 + rc * tmp1;
}
Af[ n ] = rc;
if( reached_max_gain ) {
/* Reached max prediction gain; set remaining coefficients to zero and exit loop */
for( k = n + 1; k < D; k++ ) {
Af[ k ] = 0.0;
}
break;
}
/* Update C * Af and C * Ab */
for( k = 0; k <= n + 1; k++ ) {
tmp1 = CAf[ k ];
CAf[ k ] += rc * CAb[ n - k + 1 ];
CAb[ n - k + 1 ] += rc * tmp1;
}
}
if( reached_max_gain ) {
/* Convert to float */
for( k = 0; k < D; k++ ) {
A[ k ] = (float)( -Af[ k ] );
}
/* Subtract energy of preceding samples from C0 */
for( s = 0; s < nb_subfr; s++ ) {
C0 -= silk_energy_FLP( x + s * subfr_length, D );
}
/* Approximate residual energy */
nrg_f = C0 * invGain;
} else {
/* Compute residual energy and store coefficients as float */
nrg_f = CAf[ 0 ];
tmp1 = 1.0;
for( k = 0; k < D; k++ ) {
Atmp = Af[ k ];
nrg_f += CAf[ k + 1 ] * Atmp;
tmp1 += Atmp * Atmp;
A[ k ] = (float)(-Atmp);
}
nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1;
}
/* Return residual energy */
return (float)nrg_f;
}

View File

@ -1,5 +1,5 @@
/***********************************************************************
Copyright (c) 2011, Skype Limited. All rights reserved.
Copyright (c) 2006-2011, Skype Limited. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
@ -25,40 +25,17 @@ ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
***********************************************************************/
#ifndef CONFIG_H
#define CONFIG_H
#ifndef BURG_H
#define BURG_H
#define USE_ALLOCA 1
/* Comment out the next line for floating-point code */
/*#define FIXED_POINT 1 */
#define OPUS_BUILD 1
#if defined(_M_IX86) || defined(_M_X64)
/* Can always compile SSE intrinsics (no special compiler flags necessary) */
#define OPUS_X86_MAY_HAVE_SSE
#define OPUS_X86_MAY_HAVE_SSE2
#define OPUS_X86_MAY_HAVE_SSE4_1
/* Presume SSE functions, if compiled to use SSE/SSE2/AVX (note that AMD64 implies SSE2, and AVX
implies SSE4.1) */
#if defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 1)) || defined(__AVX__)
#define OPUS_X86_PRESUME_SSE 1
#endif
#if defined(_M_X64) || (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || defined(__AVX__)
#define OPUS_X86_PRESUME_SSE2 1
#endif
#if defined(__AVX__)
#define OPUS_X86_PRESUME_SSE4_1 1
#endif
#if !defined(OPUS_X86_PRESUME_SSE4_1) || !defined(OPUS_X86_PRESUME_SSE2) || !defined(OPUS_X86_PRESUME_SSE)
#define OPUS_HAVE_RTCD 1
#endif
float silk_burg_analysis( /* O returns residual energy */
float A[], /* O prediction coefficients (length order) */
const float x[], /* I input signal, length: nb_subfr*(D+L_sub) */
const float minInvGain, /* I minimum inverse prediction gain */
const int subfr_length, /* I input signal subframe length (incl. D preceding samples) */
const int nb_subfr, /* I number of subframes stacked in x */
const int D /* I order */
);
#endif
#include "version.h"
#endif /* CONFIG_H */

56
dnn/common.h Normal file
View File

@ -0,0 +1,56 @@
#ifndef COMMON_H
#define COMMON_H
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "opus_defines.h"
#define LOG256 5.5451774445f
static OPUS_INLINE float log2_approx(float x)
{
int integer;
float frac;
union {
float f;
int i;
} in;
in.f = x;
integer = (in.i>>23)-127;
in.i -= integer<<23;
frac = in.f - 1.5f;
frac = -0.41445418f + frac*(0.95909232f
+ frac*(-0.33951290f + frac*0.16541097f));
return 1+integer+frac;
}
#define log_approx(x) (0.69315f*log2_approx(x))
static OPUS_INLINE float ulaw2lin(float u)
{
float s;
float scale_1 = 32768.f/255.f;
u = u - 128.f;
s = u >= 0.f ? 1.f : -1.f;
u = fabs(u);
return s*scale_1*(exp(u/128.*LOG256)-1);
}
static OPUS_INLINE int lin2ulaw(float x)
{
float u;
float scale = 255.f/32768.f;
int s = x >= 0 ? 1 : -1;
x = fabs(x);
u = (s*(128*log_approx(1+scale*x)/LOG256));
u = 128 + u;
if (u < 0) u = 0;
if (u > 255) u = 255;
return (int)floor(.5 + u);
}
#endif

173
dnn/datasets.txt Normal file
View File

@ -0,0 +1,173 @@
The following datasets can be used to train a language-independent LPCNet model.
A good choice is to include all the data from these datasets, except for
hi_fi_tts for which only a small subset is recommended (since it's very large
but has few speakers). Note that this data typically needs to be resampled
before it can be used.
https://www.openslr.org/resources/30/si_lk.tar.gz
https://www.openslr.org/resources/32/af_za.tar.gz
https://www.openslr.org/resources/32/st_za.tar.gz
https://www.openslr.org/resources/32/tn_za.tar.gz
https://www.openslr.org/resources/32/xh_za.tar.gz
https://www.openslr.org/resources/37/bn_bd.zip
https://www.openslr.org/resources/37/bn_in.zip
https://www.openslr.org/resources/41/jv_id_female.zip
https://www.openslr.org/resources/41/jv_id_male.zip
https://www.openslr.org/resources/42/km_kh_male.zip
https://www.openslr.org/resources/43/ne_np_female.zip
https://www.openslr.org/resources/44/su_id_female.zip
https://www.openslr.org/resources/44/su_id_male.zip
https://www.openslr.org/resources/61/es_ar_female.zip
https://www.openslr.org/resources/61/es_ar_male.zip
https://www.openslr.org/resources/63/ml_in_female.zip
https://www.openslr.org/resources/63/ml_in_male.zip
https://www.openslr.org/resources/64/mr_in_female.zip
https://www.openslr.org/resources/65/ta_in_female.zip
https://www.openslr.org/resources/65/ta_in_male.zip
https://www.openslr.org/resources/66/te_in_female.zip
https://www.openslr.org/resources/66/te_in_male.zip
https://www.openslr.org/resources/69/ca_es_female.zip
https://www.openslr.org/resources/69/ca_es_male.zip
https://www.openslr.org/resources/70/en_ng_female.zip
https://www.openslr.org/resources/70/en_ng_male.zip
https://www.openslr.org/resources/71/es_cl_female.zip
https://www.openslr.org/resources/71/es_cl_male.zip
https://www.openslr.org/resources/72/es_co_female.zip
https://www.openslr.org/resources/72/es_co_male.zip
https://www.openslr.org/resources/73/es_pe_female.zip
https://www.openslr.org/resources/73/es_pe_male.zip
https://www.openslr.org/resources/74/es_pr_female.zip
https://www.openslr.org/resources/75/es_ve_female.zip
https://www.openslr.org/resources/75/es_ve_male.zip
https://www.openslr.org/resources/76/eu_es_female.zip
https://www.openslr.org/resources/76/eu_es_male.zip
https://www.openslr.org/resources/77/gl_es_female.zip
https://www.openslr.org/resources/77/gl_es_male.zip
https://www.openslr.org/resources/78/gu_in_female.zip
https://www.openslr.org/resources/78/gu_in_male.zip
https://www.openslr.org/resources/79/kn_in_female.zip
https://www.openslr.org/resources/79/kn_in_male.zip
https://www.openslr.org/resources/80/my_mm_female.zip
https://www.openslr.org/resources/83/irish_english_male.zip
https://www.openslr.org/resources/83/midlands_english_female.zip
https://www.openslr.org/resources/83/midlands_english_male.zip
https://www.openslr.org/resources/83/northern_english_female.zip
https://www.openslr.org/resources/83/northern_english_male.zip
https://www.openslr.org/resources/83/scottish_english_female.zip
https://www.openslr.org/resources/83/scottish_english_male.zip
https://www.openslr.org/resources/83/southern_english_female.zip
https://www.openslr.org/resources/83/southern_english_male.zip
https://www.openslr.org/resources/83/welsh_english_female.zip
https://www.openslr.org/resources/83/welsh_english_male.zip
https://www.openslr.org/resources/86/yo_ng_female.zip
https://www.openslr.org/resources/86/yo_ng_male.zip
https://www.openslr.org/resources/109/hi_fi_tts_v0.tar.gz
The corresponding citations for all these datasets are:
@inproceedings{demirsahin-etal-2020-open,
title = {{Open-source Multi-speaker Corpora of the English Accents in the British Isles}},
author = {Demirsahin, Isin and Kjartansson, Oddur and Gutkin, Alexander and Rivera, Clara},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
month = may,
year = {2020},
pages = {6532--6541},
address = {Marseille, France},
publisher = {European Language Resources Association (ELRA)},
url = {https://www.aclweb.org/anthology/2020.lrec-1.804},
ISBN = {979-10-95546-34-4},
}
@inproceedings{kjartansson-etal-2020-open,
title = {{Open-Source High Quality Speech Datasets for Basque, Catalan and Galician}},
author = {Kjartansson, Oddur and Gutkin, Alexander and Butryna, Alena and Demirsahin, Isin and Rivera, Clara},
booktitle = {Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)},
year = {2020},
pages = {21--27},
month = may,
address = {Marseille, France},
publisher = {European Language Resources association (ELRA)},
url = {https://www.aclweb.org/anthology/2020.sltu-1.3},
ISBN = {979-10-95546-35-1},
}
@inproceedings{guevara-rukoz-etal-2020-crowdsourcing,
title = {{Crowdsourcing Latin American Spanish for Low-Resource Text-to-Speech}},
author = {Guevara-Rukoz, Adriana and Demirsahin, Isin and He, Fei and Chu, Shan-Hui Cathy and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Gutkin, Alexander and Butryna, Alena and Kjartansson, Oddur},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
year = {2020},
month = may,
address = {Marseille, France},
publisher = {European Language Resources Association (ELRA)},
url = {https://www.aclweb.org/anthology/2020.lrec-1.801},
pages = {6504--6513},
ISBN = {979-10-95546-34-4},
}
@inproceedings{he-etal-2020-open,
title = {{Open-source Multi-speaker Speech Corpora for Building Gujarati, Kannada, Malayalam, Marathi, Tamil and Telugu Speech Synthesis Systems}},
author = {He, Fei and Chu, Shan-Hui Cathy and Kjartansson, Oddur and Rivera, Clara and Katanova, Anna and Gutkin, Alexander and Demirsahin, Isin and Johny, Cibu and Jansche, Martin and Sarin, Supheakmungkol and Pipatsrisawat, Knot},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
month = may,
year = {2020},
address = {Marseille, France},
publisher = {European Language Resources Association (ELRA)},
pages = {6494--6503},
url = {https://www.aclweb.org/anthology/2020.lrec-1.800},
ISBN = "{979-10-95546-34-4}",
}
@inproceedings{kjartansson-etal-tts-sltu2018,
title = {{A Step-by-Step Process for Building TTS Voices Using Open Source Data and Framework for Bangla, Javanese, Khmer, Nepali, Sinhala, and Sundanese}},
author = {Keshan Sodimana and Knot Pipatsrisawat and Linne Ha and Martin Jansche and Oddur Kjartansson and Pasindu De Silva and Supheakmungkol Sarin},
booktitle = {Proc. The 6th Intl. Workshop on Spoken Language Technologies for Under-Resourced Languages (SLTU)},
year = {2018},
address = {Gurugram, India},
month = aug,
pages = {66--70},
URL = {http://dx.doi.org/10.21437/SLTU.2018-14}
}
@inproceedings{oo-etal-2020-burmese,
title = {{Burmese Speech Corpus, Finite-State Text Normalization and Pronunciation Grammars with an Application to Text-to-Speech}},
author = {Oo, Yin May and Wattanavekin, Theeraphol and Li, Chenfang and De Silva, Pasindu and Sarin, Supheakmungkol and Pipatsrisawat, Knot and Jansche, Martin and Kjartansson, Oddur and Gutkin, Alexander},
booktitle = {Proceedings of The 12th Language Resources and Evaluation Conference (LREC)},
month = may,
year = {2020},
pages = "6328--6339",
address = {Marseille, France},
publisher = {European Language Resources Association (ELRA)},
url = {https://www.aclweb.org/anthology/2020.lrec-1.777},
ISBN = {979-10-95546-34-4},
}
@inproceedings{van-niekerk-etal-2017,
title = {{Rapid development of TTS corpora for four South African languages}},
author = {Daniel van Niekerk and Charl van Heerden and Marelie Davel and Neil Kleynhans and Oddur Kjartansson and Martin Jansche and Linne Ha},
booktitle = {Proc. Interspeech 2017},
pages = {2178--2182},
address = {Stockholm, Sweden},
month = aug,
year = {2017},
URL = {http://dx.doi.org/10.21437/Interspeech.2017-1139}
}
@inproceedings{gutkin-et-al-yoruba2020,
title = {{Developing an Open-Source Corpus of Yoruba Speech}},
author = {Alexander Gutkin and I{\c{s}}{\i}n Demir{\c{s}}ahin and Oddur Kjartansson and Clara Rivera and K\d{\'o}lá Túb\d{\`o}sún},
booktitle = {Proceedings of Interspeech 2020},
pages = {404--408},
month = {October},
year = {2020},
address = {Shanghai, China},
publisher = {International Speech and Communication Association (ISCA)},
doi = {10.21437/Interspeech.2020-1096},
url = {http://dx.doi.org/10.21437/Interspeech.2020-1096},
}
@article{bakhturina2021hi,
title={{Hi-Fi Multi-Speaker English TTS Dataset}},
author={Bakhturina, Evelina and Lavrukhin, Vitaly and Ginsburg, Boris and Zhang, Yang},
journal={arXiv preprint arXiv:2104.01497},
year={2021}
}

9
dnn/download_model.bat Normal file
View File

@ -0,0 +1,9 @@
@echo off
set model=opus_data-%1.tar.gz
if not exist %model% (
echo Downloading latest model
powershell -Command "(New-Object System.Net.WebClient).DownloadFile('https://media.xiph.org/opus/models/%model%', '%model%')"
)
tar -xvzf %model%

10
dnn/download_model.sh Executable file
View File

@ -0,0 +1,10 @@
#!/bin/sh
set -e
model=opus_data-$1.tar.gz
if [ ! -f $model ]; then
echo "Downloading latest model"
wget https://media.xiph.org/opus/models/$model
fi
tar xvomf $model

42
dnn/dred_rdovae.h Normal file
View File

@ -0,0 +1,42 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_H
#define DRED_RDOVAE_H
#include <stdlib.h>
#include "opus_types.h"
typedef struct RDOVAEDec RDOVAEDec;
typedef struct RDOVAEEnc RDOVAEEnc;
typedef struct RDOVAEDecStruct RDOVAEDecState;
typedef struct RDOVAEEncStruct RDOVAEEncState;
#endif

139
dnn/dred_rdovae_dec.c Normal file
View File

@ -0,0 +1,139 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dred_rdovae_dec.h"
#include "dred_rdovae_constants.h"
#include "os_support.h"
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
{
if (!*init) {
int i;
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
}
*init = 1;
}
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch)
{
int i;
RDOVAEDecState dec;
memset(&dec, 0, sizeof(dec));
dred_rdovae_dec_init_states(&dec, model, state, arch);
for (i = 0; i < 2*nb_latents; i += 2)
{
dred_rdovae_decode_qframe(
&dec,
model,
&features[2*i*DRED_NUM_FEATURES],
&latents[(i/2)*DRED_LATENT_DIM],
arch);
}
}
void dred_rdovae_dec_init_states(
RDOVAEDecState *h, /* io: state buffer handle */
const RDOVAEDec *model,
const float *initial_state, /* i: initial state */
int arch
)
{
float hidden[DEC_HIDDEN_INIT_OUT_SIZE];
float state_init[DEC_GRU1_STATE_SIZE+DEC_GRU2_STATE_SIZE+DEC_GRU3_STATE_SIZE+DEC_GRU4_STATE_SIZE+DEC_GRU5_STATE_SIZE];
int counter=0;
compute_generic_dense(&model->dec_hidden_init, hidden, initial_state, ACTIVATION_TANH, arch);
compute_generic_dense(&model->dec_gru_init, state_init, hidden, ACTIVATION_TANH, arch);
OPUS_COPY(h->gru1_state, state_init, DEC_GRU1_STATE_SIZE);
counter += DEC_GRU1_STATE_SIZE;
OPUS_COPY(h->gru2_state, &state_init[counter], DEC_GRU2_STATE_SIZE);
counter += DEC_GRU2_STATE_SIZE;
OPUS_COPY(h->gru3_state, &state_init[counter], DEC_GRU3_STATE_SIZE);
counter += DEC_GRU3_STATE_SIZE;
OPUS_COPY(h->gru4_state, &state_init[counter], DEC_GRU4_STATE_SIZE);
counter += DEC_GRU4_STATE_SIZE;
OPUS_COPY(h->gru5_state, &state_init[counter], DEC_GRU5_STATE_SIZE);
h->initialized = 0;
}
void dred_rdovae_decode_qframe(
RDOVAEDecState *dec_state, /* io: state buffer handle */
const RDOVAEDec *model,
float *qframe, /* o: quadruple feature frame (four concatenated frames in reverse order) */
const float *input, /* i: latent vector */
int arch
)
{
float buffer[DEC_DENSE1_OUT_SIZE + DEC_GRU1_OUT_SIZE + DEC_GRU2_OUT_SIZE + DEC_GRU3_OUT_SIZE + DEC_GRU4_OUT_SIZE + DEC_GRU5_OUT_SIZE
+ DEC_CONV1_OUT_SIZE + DEC_CONV2_OUT_SIZE + DEC_CONV3_OUT_SIZE + DEC_CONV4_OUT_SIZE + DEC_CONV5_OUT_SIZE];
int output_index = 0;
/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->dec_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += DEC_DENSE1_OUT_SIZE;
compute_generic_gru(&model->dec_gru1_input, &model->dec_gru1_recurrent, dec_state->gru1_state, buffer, arch);
compute_glu(&model->dec_glu1, &buffer[output_index], dec_state->gru1_state, arch);
output_index += DEC_GRU1_OUT_SIZE;
conv1_cond_init(dec_state->conv1_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv1, &buffer[output_index], dec_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV1_OUT_SIZE;
compute_generic_gru(&model->dec_gru2_input, &model->dec_gru2_recurrent, dec_state->gru2_state, buffer, arch);
compute_glu(&model->dec_glu2, &buffer[output_index], dec_state->gru2_state, arch);
output_index += DEC_GRU2_OUT_SIZE;
conv1_cond_init(dec_state->conv2_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv2, &buffer[output_index], dec_state->conv2_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV2_OUT_SIZE;
compute_generic_gru(&model->dec_gru3_input, &model->dec_gru3_recurrent, dec_state->gru3_state, buffer, arch);
compute_glu(&model->dec_glu3, &buffer[output_index], dec_state->gru3_state, arch);
output_index += DEC_GRU3_OUT_SIZE;
conv1_cond_init(dec_state->conv3_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv3, &buffer[output_index], dec_state->conv3_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV3_OUT_SIZE;
compute_generic_gru(&model->dec_gru4_input, &model->dec_gru4_recurrent, dec_state->gru4_state, buffer, arch);
compute_glu(&model->dec_glu4, &buffer[output_index], dec_state->gru4_state, arch);
output_index += DEC_GRU4_OUT_SIZE;
conv1_cond_init(dec_state->conv4_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv4, &buffer[output_index], dec_state->conv4_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV4_OUT_SIZE;
compute_generic_gru(&model->dec_gru5_input, &model->dec_gru5_recurrent, dec_state->gru5_state, buffer, arch);
compute_glu(&model->dec_glu5, &buffer[output_index], dec_state->gru5_state, arch);
output_index += DEC_GRU5_OUT_SIZE;
conv1_cond_init(dec_state->conv5_state, output_index, 1, &dec_state->initialized);
compute_generic_conv1d(&model->dec_conv5, &buffer[output_index], dec_state->conv5_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += DEC_CONV5_OUT_SIZE;
compute_generic_dense(&model->dec_output, qframe, buffer, ACTIVATION_LINEAR, arch);
}

53
dnn/dred_rdovae_dec.h Normal file
View File

@ -0,0 +1,53 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_DEC_H
#define DRED_RDOVAE_DEC_H
#include "dred_rdovae.h"
#include "dred_rdovae_dec_data.h"
#include "dred_rdovae_stats_data.h"
struct RDOVAEDecStruct {
int initialized;
float gru1_state[DEC_GRU1_STATE_SIZE];
float gru2_state[DEC_GRU2_STATE_SIZE];
float gru3_state[DEC_GRU3_STATE_SIZE];
float gru4_state[DEC_GRU4_STATE_SIZE];
float gru5_state[DEC_GRU5_STATE_SIZE];
float conv1_state[DEC_CONV1_STATE_SIZE];
float conv2_state[DEC_CONV2_STATE_SIZE];
float conv3_state[DEC_CONV3_STATE_SIZE];
float conv4_state[DEC_CONV4_STATE_SIZE];
float conv5_state[DEC_CONV5_STATE_SIZE];
};
void dred_rdovae_dec_init_states(RDOVAEDecState *h, const RDOVAEDec *model, const float * initial_state, int arch);
void dred_rdovae_decode_qframe(RDOVAEDecState *h, const RDOVAEDec *model, float *qframe, const float * z, int arch);
void DRED_rdovae_decode_all(const RDOVAEDec *model, float *features, const float *state, const float *latents, int nb_latents, int arch);
#endif

110
dnn/dred_rdovae_enc.c Normal file
View File

@ -0,0 +1,110 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <math.h>
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "dred_rdovae_enc.h"
#include "os_support.h"
#include "dred_rdovae_constants.h"
static void conv1_cond_init(float *mem, int len, int dilation, int *init)
{
if (!*init) {
int i;
for (i=0;i<dilation;i++) OPUS_CLEAR(&mem[i*len], len);
}
*init = 1;
}
void dred_rdovae_encode_dframe(
RDOVAEEncState *enc_state, /* io: encoder state */
const RDOVAEEnc *model,
float *latents, /* o: latent vector */
float *initial_state, /* o: initial state */
const float *input, /* i: double feature frame (concatenated) */
int arch
)
{
float padded_latents[DRED_PADDED_LATENT_DIM];
float padded_state[DRED_PADDED_STATE_DIM];
float buffer[ENC_DENSE1_OUT_SIZE + ENC_GRU1_OUT_SIZE + ENC_GRU2_OUT_SIZE + ENC_GRU3_OUT_SIZE + ENC_GRU4_OUT_SIZE + ENC_GRU5_OUT_SIZE
+ ENC_CONV1_OUT_SIZE + ENC_CONV2_OUT_SIZE + ENC_CONV3_OUT_SIZE + ENC_CONV4_OUT_SIZE + ENC_CONV5_OUT_SIZE];
float state_hidden[GDENSE1_OUT_SIZE];
int output_index = 0;
/* run encoder stack and concatenate output in buffer*/
compute_generic_dense(&model->enc_dense1, &buffer[output_index], input, ACTIVATION_TANH, arch);
output_index += ENC_DENSE1_OUT_SIZE;
compute_generic_gru(&model->enc_gru1_input, &model->enc_gru1_recurrent, enc_state->gru1_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru1_state, ENC_GRU1_OUT_SIZE);
output_index += ENC_GRU1_OUT_SIZE;
conv1_cond_init(enc_state->conv1_state, output_index, 1, &enc_state->initialized);
compute_generic_conv1d(&model->enc_conv1, &buffer[output_index], enc_state->conv1_state, buffer, output_index, ACTIVATION_TANH, arch);
output_index += ENC_CONV1_OUT_SIZE;
compute_generic_gru(&model->enc_gru2_input, &model->enc_gru2_recurrent, enc_state->gru2_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru2_state, ENC_GRU2_OUT_SIZE);
output_index += ENC_GRU2_OUT_SIZE;
conv1_cond_init(enc_state->conv2_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv2, &buffer[output_index], enc_state->conv2_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV2_OUT_SIZE;
compute_generic_gru(&model->enc_gru3_input, &model->enc_gru3_recurrent, enc_state->gru3_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru3_state, ENC_GRU3_OUT_SIZE);
output_index += ENC_GRU3_OUT_SIZE;
conv1_cond_init(enc_state->conv3_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv3, &buffer[output_index], enc_state->conv3_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV3_OUT_SIZE;
compute_generic_gru(&model->enc_gru4_input, &model->enc_gru4_recurrent, enc_state->gru4_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru4_state, ENC_GRU4_OUT_SIZE);
output_index += ENC_GRU4_OUT_SIZE;
conv1_cond_init(enc_state->conv4_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv4, &buffer[output_index], enc_state->conv4_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV4_OUT_SIZE;
compute_generic_gru(&model->enc_gru5_input, &model->enc_gru5_recurrent, enc_state->gru5_state, buffer, arch);
OPUS_COPY(&buffer[output_index], enc_state->gru5_state, ENC_GRU5_OUT_SIZE);
output_index += ENC_GRU5_OUT_SIZE;
conv1_cond_init(enc_state->conv5_state, output_index, 2, &enc_state->initialized);
compute_generic_conv1d_dilation(&model->enc_conv5, &buffer[output_index], enc_state->conv5_state, buffer, output_index, 2, ACTIVATION_TANH, arch);
output_index += ENC_CONV5_OUT_SIZE;
compute_generic_dense(&model->enc_zdense, padded_latents, buffer, ACTIVATION_LINEAR, arch);
OPUS_COPY(latents, padded_latents, DRED_LATENT_DIM);
/* next, calculate initial state */
compute_generic_dense(&model->gdense1, state_hidden, buffer, ACTIVATION_TANH, arch);
compute_generic_dense(&model->gdense2, padded_state, state_hidden, ACTIVATION_LINEAR, arch);
OPUS_COPY(initial_state, padded_state, DRED_STATE_DIM);
}

52
dnn/dred_rdovae_enc.h Normal file
View File

@ -0,0 +1,52 @@
/* Copyright (c) 2022 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef DRED_RDOVAE_ENC_H
#define DRED_RDOVAE_ENC_H
#include "dred_rdovae.h"
#include "dred_rdovae_enc_data.h"
struct RDOVAEEncStruct {
int initialized;
float gru1_state[ENC_GRU1_STATE_SIZE];
float gru2_state[ENC_GRU2_STATE_SIZE];
float gru3_state[ENC_GRU3_STATE_SIZE];
float gru4_state[ENC_GRU4_STATE_SIZE];
float gru5_state[ENC_GRU5_STATE_SIZE];
float conv1_state[ENC_CONV1_STATE_SIZE];
float conv2_state[2*ENC_CONV2_STATE_SIZE];
float conv3_state[2*ENC_CONV3_STATE_SIZE];
float conv4_state[2*ENC_CONV4_STATE_SIZE];
float conv5_state[2*ENC_CONV5_STATE_SIZE];
};
void dred_rdovae_encode_dframe(RDOVAEEncState *enc_state, const RDOVAEEnc *model, float *latents, float *initial_state, const float *input, int arch);
#endif

280
dnn/dump_data.c Normal file
View File

@ -0,0 +1,280 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include "kiss_fft.h"
#include "common.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include <assert.h>
#include "lpcnet.h"
#include "lpcnet_private.h"
#include "os_support.h"
#include "cpu_support.h"
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
int i;
for (i=0;i<N;i++) {
float xi, yi;
xi = x[i];
yi = x[i] + mem[0];
mem[0] = mem[1] + (b[0]*(double)xi - a[0]*(double)yi);
mem[1] = (b[1]*(double)xi - a[1]*(double)yi);
y[i] = yi;
}
}
static float uni_rand(void) {
return rand()/(double)RAND_MAX-.5;
}
static void rand_resp(float *a, float *b) {
a[0] = .75*uni_rand();
a[1] = .75*uni_rand();
b[0] = .75*uni_rand();
b[1] = .75*uni_rand();
}
void compute_noise(int *noise, float noise_std) {
int i;
for (i=0;i<FRAME_SIZE;i++) {
noise[i] = (int)floor(.5 + noise_std*.707*(log_approx(rand()/(float)RAND_MAX)-log_approx(rand()/(float)RAND_MAX)));
}
}
static opus_int16 float2short(float x)
{
int i;
i = (int)floor(.5+x);
return IMAX(-32767, IMIN(32767, i));
}
void write_audio(LPCNetEncState *st, const opus_int16 *pcm, const int *noise, FILE *file) {
int i;
opus_int16 data[2*FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) {
float p=0;
float e;
int j;
for (j=0;j<LPC_ORDER;j++) p -= st->features[NB_BANDS+2+j]*st->sig_mem[j];
e = lin2ulaw(pcm[i] - p);
/* Signal in. */
data[2*i] = float2short(st->sig_mem[0]);
/* Signal out. */
data[2*i+1] = pcm[i];
/* Simulate error on excitation. */
e += noise[i];
e = IMIN(255, IMAX(0, e));
OPUS_MOVE(&st->sig_mem[1], &st->sig_mem[0], LPC_ORDER-1);
st->sig_mem[0] = p + ulaw2lin(e);
}
fwrite(data, 4*FRAME_SIZE, 1, file);
}
int main(int argc, char **argv) {
int i;
char *argv0;
int count=0;
static const float a_hp[2] = {-1.99599, 0.99600};
static const float b_hp[2] = {-2, 1};
float a_sig[2] = {0};
float b_sig[2] = {0};
float mem_hp_x[2]={0};
float mem_resp_x[2]={0};
float mem_preemph=0;
float x[FRAME_SIZE];
int gain_change_count=0;
FILE *f1;
FILE *ffeat;
FILE *fpcm=NULL;
opus_int16 pcm[FRAME_SIZE]={0};
int noisebuf[FRAME_SIZE]={0};
opus_int16 tmp[FRAME_SIZE] = {0};
float speech_gain=1;
float old_speech_gain = 1;
int one_pass_completed = 0;
LPCNetEncState *st;
float noise_std=0;
int training = -1;
int burg = 0;
int pitch = 0;
FILE *fnoise = NULL;
float noise_gain = 0;
long noise_size=0;
int arch;
srand(getpid());
arch = opus_select_arch();
st = lpcnet_encoder_create();
argv0=argv[0];
if (argc == 5 && strcmp(argv[1], "-btrain")==0) {
burg = 1;
training = 1;
}
else if (argc == 4 && strcmp(argv[1], "-btest")==0) {
burg = 1;
training = 0;
}
else if (argc == 5 && strcmp(argv[1], "-ptrain")==0) {
pitch = 1;
training = 1;
fnoise = fopen(argv[2], "rb");
fseek(fnoise, 0, SEEK_END);
noise_size = ftell(fnoise);
fseek(fnoise, 0, SEEK_SET);
argv++;
}
else if (argc == 4 && strcmp(argv[1], "-ptest")==0) {
pitch = 1;
training = 0;
}
else if (argc == 5 && strcmp(argv[1], "-train")==0) training = 1;
else if (argc == 4 && strcmp(argv[1], "-test")==0) training = 0;
if (training == -1) {
fprintf(stderr, "usage: %s -train <speech> <features out> <pcm out>\n", argv0);
fprintf(stderr, " or %s -test <speech> <features out>\n", argv0);
return 1;
}
f1 = fopen(argv[2], "r");
if (f1 == NULL) {
fprintf(stderr,"Error opening input .s16 16kHz speech input file: %s\n", argv[2]);
exit(1);
}
ffeat = fopen(argv[3], "wb");
if (ffeat == NULL) {
fprintf(stderr,"Error opening output feature file: %s\n", argv[3]);
exit(1);
}
if (training && !pitch) {
fpcm = fopen(argv[4], "wb");
if (fpcm == NULL) {
fprintf(stderr,"Error opening output PCM file: %s\n", argv[4]);
exit(1);
}
}
while (1) {
size_t ret;
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
if (feof(f1) || ret != FRAME_SIZE) {
if (!training) break;
rewind(f1);
ret = fread(tmp, sizeof(opus_int16), FRAME_SIZE, f1);
if (ret != FRAME_SIZE) {
fprintf(stderr, "error reading\n");
exit(1);
}
one_pass_completed = 1;
}
for (i=0;i<FRAME_SIZE;i++) x[i] = tmp[i];
if (count*FRAME_SIZE_5MS>=10000000 && one_pass_completed) break;
if (training && ++gain_change_count > 2821) {
float tmp1, tmp2;
speech_gain = pow(10., (-30+(rand()%40))/20.);
if (rand()&1) speech_gain = -speech_gain;
if (rand()%20==0) speech_gain *= .01;
if (!pitch && rand()%100==0) speech_gain = 0;
gain_change_count = 0;
rand_resp(a_sig, b_sig);
tmp1 = rand()/(float)RAND_MAX;
tmp2 = rand()/(float)RAND_MAX;
noise_std = ABS16(-1.5*log(1e-4+tmp1)-.5*log(1e-4+tmp2));
if (fnoise != NULL) {
long pos;
/* Randomize the fraction because rand() only gives us 31 bits. */
float frac_pos = rand()/(float)RAND_MAX;
pos = (frac_pos*noise_size);
/* 32-bit alignment. */
pos = pos/4 * 4;
if (pos > noise_size-500000) pos = noise_size-500000;
noise_gain = pow(10., (-15+(rand()%40))/20.);
if (rand()%10==0) noise_gain = 0;
fseek(fnoise, pos, SEEK_SET);
}
}
if (fnoise != NULL) {
opus_int16 noise[FRAME_SIZE];
ret = fread(noise, sizeof(opus_int16), FRAME_SIZE, fnoise);
for (i=0;i<FRAME_SIZE;i++) x[i] += noise[i]*noise_gain;
}
biquad(x, mem_hp_x, x, b_hp, a_hp, FRAME_SIZE);
biquad(x, mem_resp_x, x, b_sig, a_sig, FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) {
float g;
float f = (float)i/FRAME_SIZE;
g = f*speech_gain + (1-f)*old_speech_gain;
x[i] *= g;
}
if (burg) {
float ceps[2*NB_BANDS];
burg_cepstral_analysis(ceps, x);
fwrite(ceps, sizeof(float), 2*NB_BANDS, ffeat);
}
preemphasis(x, &mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) x[i] += rand()/(float)RAND_MAX - .5;
/* PCM is delayed by 1/2 frame to make the features centered on the frames. */
for (i=0;i<FRAME_SIZE-TRAINING_OFFSET;i++) pcm[i+TRAINING_OFFSET] = float2short(x[i]);
compute_frame_features(st, x, arch);
if (fpcm) {
compute_noise(noisebuf, noise_std);
}
if (pitch) {
signed char pitch_features[PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES];
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
pitch_features[i] = floor(.5 + 127.f*st->xcorr_features[i]);
}
for (i=0;i<PITCH_IF_FEATURES;i++) {
pitch_features[i+PITCH_MAX_PERIOD-PITCH_MIN_PERIOD] = floor(.5 + 127.f*st->if_features[i]);
}
fwrite(pitch_features, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD+PITCH_IF_FEATURES, 1, ffeat);
} else {
fwrite(st->features, sizeof(float), NB_TOTAL_FEATURES, ffeat);
}
/*if(pitch) fwrite(pcm, FRAME_SIZE, 2, stdout);*/
if (fpcm) write_audio(st, pcm, noisebuf, fpcm);
/*if (fpcm) fwrite(pcm, sizeof(opus_int16), FRAME_SIZE, fpcm);*/
for (i=0;i<TRAINING_OFFSET;i++) pcm[i] = float2short(x[i+FRAME_SIZE-TRAINING_OFFSET]);
old_speech_gain = speech_gain;
count++;
}
fclose(f1);
fclose(ffeat);
if (fpcm) fclose(fpcm);
lpcnet_encoder_destroy(st);
return 0;
}

104
dnn/dump_lpcnet_tables.c Normal file
View File

@ -0,0 +1,104 @@
/* Copyright (c) 2017-2018 Mozilla
Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <stdio.h>
#include "freq.h"
#include "kiss_fft.h"
int main(void) {
int i;
FILE *file;
kiss_fft_state *kfft;
float half_window[OVERLAP_SIZE];
float dct_table[NB_BANDS*NB_BANDS];
file=fopen("lpcnet_tables.c", "wb");
fprintf(file, "/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/\n\n");
fprintf(file, "#ifdef HAVE_CONFIG_H\n");
fprintf(file, "#include \"config.h\"\n");
fprintf(file, "#endif\n");
fprintf(file, "#include \"kiss_fft.h\"\n\n");
kfft = opus_fft_alloc_twiddles(WINDOW_SIZE, NULL, NULL, NULL, 0);
fprintf(file, "static const arch_fft_state arch_fft = {0, NULL};\n\n");
fprintf (file, "static const opus_int16 fft_bitrev[%d] = {\n", kfft->nfft);
for (i=0;i<kfft->nfft;i++)
fprintf (file, "%d,%c", kfft->bitrev[i],(i+16)%15==0?'\n':' ');
fprintf (file, "};\n\n");
fprintf (file, "static const kiss_twiddle_cpx fft_twiddles[%d] = {\n", kfft->nfft);
for (i=0;i<kfft->nfft;i++)
fprintf (file, "{%#0.9gf, %#0.9gf},%c", kfft->twiddles[i].r, kfft->twiddles[i].i,(i+3)%2==0?'\n':' ');
fprintf (file, "};\n\n");
fprintf(file, "const kiss_fft_state kfft = {\n");
fprintf(file, "%d, /* nfft */\n", kfft->nfft);
fprintf(file, "%#0.8gf, /* scale */\n", kfft->scale);
fprintf(file, "%d, /* shift */\n", kfft->shift);
fprintf(file, "{");
for (i=0;i<2*MAXFACTORS;i++) {
fprintf(file, "%d, ", kfft->factors[i]);
}
fprintf(file, "}, /* factors */\n");
fprintf(file, "fft_bitrev, /* bitrev*/\n");
fprintf(file, "fft_twiddles, /* twiddles*/\n");
fprintf(file, "(arch_fft_state *)&arch_fft, /* arch_fft*/\n");
fprintf(file, "};\n\n");
for (i=0;i<OVERLAP_SIZE;i++)
half_window[i] = sin(.5*M_PI*sin(.5*M_PI*(i+.5)/OVERLAP_SIZE) * sin(.5*M_PI*(i+.5)/OVERLAP_SIZE));
fprintf(file, "const float half_window[] = {\n");
for (i=0;i<OVERLAP_SIZE;i++)
fprintf (file, "%#0.9gf,%c", half_window[i],(i+6)%5==0?'\n':' ');
fprintf(file, "};\n\n");
for (i=0;i<NB_BANDS;i++) {
int j;
for (j=0;j<NB_BANDS;j++) {
dct_table[i*NB_BANDS + j] = cos((i+.5)*j*M_PI/NB_BANDS);
if (j==0) dct_table[i*NB_BANDS + j] *= sqrt(.5);
}
}
fprintf(file, "const float dct_table[] = {\n");
for (i=0;i<NB_BANDS*NB_BANDS;i++)
fprintf (file, "%#0.9gf,%c", dct_table[i],(i+6)%5==0?'\n':' ');
fprintf(file, "};\n");
fclose(file);
return 0;
}

225
dnn/fargan.c Normal file
View File

@ -0,0 +1,225 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "fargan.h"
#include "os_support.h"
#include "freq.h"
#include "fargan_data.h"
#include "lpcnet.h"
#include "pitch.h"
#include "nnet.h"
#include "lpcnet_private.h"
#include "cpu_support.h"
#define FARGAN_FEATURES (NB_FEATURES)
static void compute_fargan_cond(FARGANState *st, float *cond, const float *features, int period)
{
FARGAN *model;
float dense_in[NB_FEATURES+COND_NET_PEMBED_OUT_SIZE];
float conv1_in[COND_NET_FCONV1_IN_SIZE];
float fdense2_in[COND_NET_FCONV1_OUT_SIZE];
model = &st->model;
celt_assert(FARGAN_FEATURES+COND_NET_PEMBED_OUT_SIZE == model->cond_net_fdense1.nb_inputs);
celt_assert(COND_NET_FCONV1_IN_SIZE == model->cond_net_fdense1.nb_outputs);
celt_assert(COND_NET_FCONV1_OUT_SIZE == model->cond_net_fconv1.nb_outputs);
OPUS_COPY(&dense_in[NB_FEATURES], &model->cond_net_pembed.float_weights[IMAX(0,IMIN(period-32, 223))*COND_NET_PEMBED_OUT_SIZE], COND_NET_PEMBED_OUT_SIZE);
OPUS_COPY(dense_in, features, NB_FEATURES);
compute_generic_dense(&model->cond_net_fdense1, conv1_in, dense_in, ACTIVATION_TANH, st->arch);
compute_generic_conv1d(&model->cond_net_fconv1, fdense2_in, st->cond_conv1_state, conv1_in, COND_NET_FCONV1_IN_SIZE, ACTIVATION_TANH, st->arch);
compute_generic_dense(&model->cond_net_fdense2, cond, fdense2_in, ACTIVATION_TANH, st->arch);
}
static void fargan_deemphasis(float *pcm, float *deemph_mem) {
int i;
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) {
pcm[i] += FARGAN_DEEMPHASIS * *deemph_mem;
*deemph_mem = pcm[i];
}
}
static void run_fargan_subframe(FARGANState *st, float *pcm, const float *cond, int period)
{
int i, pos;
float fwc0_in[SIG_NET_INPUT_SIZE];
float gru1_in[SIG_NET_FWC0_CONV_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float gru2_in[SIG_NET_GRU1_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float gru3_in[SIG_NET_GRU2_OUT_SIZE+2*FARGAN_SUBFRAME_SIZE];
float pred[FARGAN_SUBFRAME_SIZE+4];
float prev[FARGAN_SUBFRAME_SIZE];
float pitch_gate[4];
float gain;
float gain_1;
float skip_cat[10000];
float skip_out[SIG_NET_SKIP_DENSE_OUT_SIZE];
FARGAN *model;
celt_assert(st->cont_initialized);
model = &st->model;
compute_generic_dense(&model->sig_net_cond_gain_dense, &gain, cond, ACTIVATION_LINEAR, st->arch);
gain = exp(gain);
gain_1 = 1.f/(1e-5f + gain);
pos = PITCH_MAX_PERIOD-period-2;
for (i=0;i<FARGAN_SUBFRAME_SIZE+4;i++) {
pred[i] = MIN32(1.f, MAX32(-1.f, gain_1*st->pitch_buf[IMAX(0, pos)]));
pos++;
if (pos == PITCH_MAX_PERIOD) pos -= period;
}
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) prev[i] = MAX32(-1.f, MIN16(1.f, gain_1*st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE+i]));
OPUS_COPY(&fwc0_in[0], &cond[0], FARGAN_COND_SIZE);
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE], pred, FARGAN_SUBFRAME_SIZE+4);
OPUS_COPY(&fwc0_in[FARGAN_COND_SIZE+FARGAN_SUBFRAME_SIZE+4], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_conv1d(&model->sig_net_fwc0_conv, gru1_in, st->fwc0_mem, fwc0_in, SIG_NET_INPUT_SIZE, ACTIVATION_TANH, st->arch);
celt_assert(SIG_NET_FWC0_GLU_GATE_OUT_SIZE == model->sig_net_fwc0_glu_gate.nb_outputs);
compute_glu(&model->sig_net_fwc0_glu_gate, gru1_in, gru1_in, st->arch);
compute_generic_dense(&model->sig_net_gain_dense_out, pitch_gate, gru1_in, ACTIVATION_SIGMOID, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+i] = pitch_gate[0]*pred[i+2];
OPUS_COPY(&gru1_in[SIG_NET_FWC0_GLU_GATE_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru1_input, &model->sig_net_gru1_recurrent, st->gru1_state, gru1_in, st->arch);
compute_glu(&model->sig_net_gru1_glu_gate, gru2_in, st->gru1_state, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru2_in[SIG_NET_GRU1_OUT_SIZE+i] = pitch_gate[1]*pred[i+2];
OPUS_COPY(&gru2_in[SIG_NET_GRU1_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru2_input, &model->sig_net_gru2_recurrent, st->gru2_state, gru2_in, st->arch);
compute_glu(&model->sig_net_gru2_glu_gate, gru3_in, st->gru2_state, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) gru3_in[SIG_NET_GRU2_OUT_SIZE+i] = pitch_gate[2]*pred[i+2];
OPUS_COPY(&gru3_in[SIG_NET_GRU2_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_gru(&model->sig_net_gru3_input, &model->sig_net_gru3_recurrent, st->gru3_state, gru3_in, st->arch);
compute_glu(&model->sig_net_gru3_glu_gate, &skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE], st->gru3_state, st->arch);
OPUS_COPY(skip_cat, gru2_in, SIG_NET_GRU1_OUT_SIZE);
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE], gru3_in, SIG_NET_GRU2_OUT_SIZE);
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE], gru1_in, SIG_NET_FWC0_CONV_OUT_SIZE);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+i] = pitch_gate[3]*pred[i+2];
OPUS_COPY(&skip_cat[SIG_NET_GRU1_OUT_SIZE+SIG_NET_GRU2_OUT_SIZE+SIG_NET_GRU3_OUT_SIZE+SIG_NET_FWC0_CONV_OUT_SIZE+FARGAN_SUBFRAME_SIZE], prev, FARGAN_SUBFRAME_SIZE);
compute_generic_dense(&model->sig_net_skip_dense, skip_out, skip_cat, ACTIVATION_TANH, st->arch);
compute_glu(&model->sig_net_skip_glu_gate, skip_out, skip_out, st->arch);
compute_generic_dense(&model->sig_net_sig_dense_out, pcm, skip_out, ACTIVATION_TANH, st->arch);
for (i=0;i<FARGAN_SUBFRAME_SIZE;i++) pcm[i] *= gain;
OPUS_MOVE(st->pitch_buf, &st->pitch_buf[FARGAN_SUBFRAME_SIZE], PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE);
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], pcm, FARGAN_SUBFRAME_SIZE);
fargan_deemphasis(pcm, &st->deemph_mem);
}
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0)
{
int i;
float cond[COND_NET_FDENSE2_OUT_SIZE];
float x0[FARGAN_CONT_SAMPLES];
float dummy[FARGAN_SUBFRAME_SIZE];
int period=0;
/* Pre-load features. */
for (i=0;i<5;i++) {
const float *features = &features0[i*NB_FEATURES];
st->last_period = period;
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
compute_fargan_cond(st, cond, features, period);
}
x0[0] = 0;
for (i=1;i<FARGAN_CONT_SAMPLES;i++) {
x0[i] = pcm0[i] - FARGAN_DEEMPHASIS*pcm0[i-1];
}
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_FRAME_SIZE], x0, FARGAN_FRAME_SIZE);
st->cont_initialized = 1;
for (i=0;i<FARGAN_NB_SUBFRAMES;i++) {
run_fargan_subframe(st, dummy, &cond[i*FARGAN_COND_SIZE], st->last_period);
OPUS_COPY(&st->pitch_buf[PITCH_MAX_PERIOD-FARGAN_SUBFRAME_SIZE], &x0[FARGAN_FRAME_SIZE+i*FARGAN_SUBFRAME_SIZE], FARGAN_SUBFRAME_SIZE);
}
st->deemph_mem = pcm0[FARGAN_CONT_SAMPLES-1];
}
void fargan_init(FARGANState *st)
{
int ret;
OPUS_CLEAR(st, 1);
st->arch = opus_select_arch();
#ifndef USE_WEIGHTS_FILE
ret = init_fargan(&st->model, fargan_arrays);
#else
ret = 0;
#endif
celt_assert(ret == 0);
}
int fargan_load_model(FARGANState *st, const unsigned char *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_fargan(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
static void fargan_synthesize_impl(FARGANState *st, float *pcm, const float *features)
{
int subframe;
float cond[COND_NET_FDENSE2_OUT_SIZE];
int period;
celt_assert(st->cont_initialized);
period = (int)floor(.5+256./pow(2.f,((1./60.)*((features[NB_BANDS]+1.5)*60))));
compute_fargan_cond(st, cond, features, period);
for (subframe=0;subframe<FARGAN_NB_SUBFRAMES;subframe++) {
float *sub_cond;
sub_cond = &cond[subframe*FARGAN_COND_SIZE];
run_fargan_subframe(st, &pcm[subframe*FARGAN_SUBFRAME_SIZE], sub_cond, st->last_period);
}
st->last_period = period;
}
void fargan_synthesize(FARGANState *st, float *pcm, const float *features)
{
fargan_synthesize_impl(st, pcm, features);
}
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features)
{
int i;
float fpcm[FARGAN_FRAME_SIZE];
fargan_synthesize(st, fpcm, features);
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
}

68
dnn/fargan.h Normal file
View File

@ -0,0 +1,68 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FARGAN_H
#define FARGAN_H
#include "freq.h"
#include "fargan_data.h"
#include "pitchdnn.h"
#define FARGAN_CONT_SAMPLES 320
#define FARGAN_NB_SUBFRAMES 4
#define FARGAN_SUBFRAME_SIZE 40
#define FARGAN_FRAME_SIZE (FARGAN_NB_SUBFRAMES*FARGAN_SUBFRAME_SIZE)
#define FARGAN_COND_SIZE (COND_NET_FDENSE2_OUT_SIZE/FARGAN_NB_SUBFRAMES)
#define FARGAN_DEEMPHASIS 0.85f
#define SIG_NET_INPUT_SIZE (FARGAN_COND_SIZE+2*FARGAN_SUBFRAME_SIZE+4)
#define SIG_NET_FWC0_STATE_SIZE (2*SIG_NET_INPUT_SIZE)
#define FARGAN_MAX_RNN_NEURONS SIG_NET_GRU1_OUT_SIZE
typedef struct {
FARGAN model;
int arch;
int cont_initialized;
float deemph_mem;
float pitch_buf[PITCH_MAX_PERIOD];
float cond_conv1_state[COND_NET_FCONV1_STATE_SIZE];
float fwc0_mem[SIG_NET_FWC0_STATE_SIZE];
float gru1_state[SIG_NET_GRU1_STATE_SIZE];
float gru2_state[SIG_NET_GRU2_STATE_SIZE];
float gru3_state[SIG_NET_GRU3_STATE_SIZE];
int last_period;
} FARGANState;
void fargan_init(FARGANState *st);
int fargan_load_model(FARGANState *st, const unsigned char *data, int len);
void fargan_cont(FARGANState *st, const float *pcm0, const float *features0);
void fargan_synthesize(FARGANState *st, float *pcm, const float *features);
void fargan_synthesize_int(FARGANState *st, opus_int16 *pcm, const float *features);
#endif /* FARGAN_H */

328
dnn/freq.c Normal file
View File

@ -0,0 +1,328 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include "burg.h"
#include <assert.h>
#include "os_support.h"
#define SQUARE(x) ((x)*(x))
static const opus_int16 eband5ms[] = {
/*0 200 400 600 800 1k 1.2 1.4 1.6 2k 2.4 2.8 3.2 4k 4.8 5.6 6.8 8k*/
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 16, 20, 24, 28, 34, 40
};
static const float compensation[] = {
0.8f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 1.f, 0.666667f, 0.5f, 0.5f, 0.5f, 0.333333f, 0.25f, 0.25f, 0.2f, 0.166667f, 0.173913f
};
extern const kiss_fft_state kfft;
extern const float half_window[OVERLAP_SIZE];
extern const float dct_table[NB_BANDS*NB_BANDS];
static void compute_band_energy_inverse(float *bandE, const kiss_fft_cpx *X) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float tmp;
float frac = (float)j/band_size;
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
tmp = 1.f/(tmp + 1e-9);
sum[i] += (1-frac)*tmp;
sum[i+1] += frac*tmp;
}
}
sum[0] *= 2;
sum[NB_BANDS-1] *= 2;
for (i=0;i<NB_BANDS;i++)
{
bandE[i] = sum[i];
}
}
static float lpcn_lpc(
opus_val16 *lpc, /* out: [0...p-1] LPC coefficients */
opus_val16 *rc,
const opus_val32 *ac, /* in: [0...p] autocorrelation values */
int p
)
{
int i, j;
opus_val32 r;
opus_val32 error = ac[0];
OPUS_CLEAR(lpc, p);
OPUS_CLEAR(rc, p);
if (ac[0] != 0)
{
for (i = 0; i < p; i++) {
/* Sum up this iteration's reflection coefficient */
opus_val32 rr = 0;
for (j = 0; j < i; j++)
rr += MULT32_32_Q31(lpc[j],ac[i - j]);
rr += SHR32(ac[i + 1],3);
r = -SHL32(rr,3)/error;
rc[i] = r;
/* Update LPC coefficients and total error */
lpc[i] = SHR32(r,3);
for (j = 0; j < (i+1)>>1; j++)
{
opus_val32 tmp1, tmp2;
tmp1 = lpc[j];
tmp2 = lpc[i-1-j];
lpc[j] = tmp1 + MULT32_32_Q31(r,tmp2);
lpc[i-1-j] = tmp2 + MULT32_32_Q31(r,tmp1);
}
error = error - MULT32_32_Q31(MULT32_32_Q31(r,r),error);
/* Bail out once we get 30 dB gain */
if (error<.001f*ac[0])
break;
}
}
return error;
}
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X) {
int i;
float sum[NB_BANDS] = {0};
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float tmp;
float frac = (float)j/band_size;
tmp = SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].r);
tmp += SQUARE(X[(eband5ms[i]*WINDOW_SIZE_5MS) + j].i);
sum[i] += (1-frac)*tmp;
sum[i+1] += frac*tmp;
}
}
sum[0] *= 2;
sum[NB_BANDS-1] *= 2;
for (i=0;i<NB_BANDS;i++)
{
bandE[i] = sum[i];
}
}
static void compute_burg_cepstrum(const float *pcm, float *burg_cepstrum, int len, int order) {
int i;
float burg_in[FRAME_SIZE];
float burg_lpc[LPC_ORDER];
float x[WINDOW_SIZE];
float Eburg[NB_BANDS];
float g;
kiss_fft_cpx LPC[FREQ_SIZE];
float Ly[NB_BANDS];
float logMax = -2;
float follow = -2;
assert(order <= LPC_ORDER);
assert(len <= FRAME_SIZE);
for (i=0;i<len-1;i++) burg_in[i] = pcm[i+1] - PREEMPHASIS*pcm[i];
g = silk_burg_analysis(burg_lpc, burg_in, 1e-3, len-1, 1, order);
g /= len - 2*(order-1);
OPUS_CLEAR(x, WINDOW_SIZE);
x[0] = 1;
for (i=0;i<order;i++) x[i+1] = -burg_lpc[i]*pow(.995, i+1);
forward_transform(LPC, x);
compute_band_energy_inverse(Eburg, LPC);
for (i=0;i<NB_BANDS;i++) Eburg[i] *= .45*g*(1.f/((float)WINDOW_SIZE*WINDOW_SIZE*WINDOW_SIZE));
for (i=0;i<NB_BANDS;i++) {
Ly[i] = log10(1e-2+Eburg[i]);
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5, Ly[i]));
logMax = MAX16(logMax, Ly[i]);
follow = MAX16(follow-2.5, Ly[i]);
}
dct(burg_cepstrum, Ly);
burg_cepstrum[0] += - 4;
}
void burg_cepstral_analysis(float *ceps, const float *x) {
int i;
compute_burg_cepstrum(x, &ceps[0 ], FRAME_SIZE/2, LPC_ORDER);
compute_burg_cepstrum(&x[FRAME_SIZE/2], &ceps[NB_BANDS], FRAME_SIZE/2, LPC_ORDER);
for (i=0;i<NB_BANDS;i++) {
float c0, c1;
c0 = ceps[i];
c1 = ceps[NB_BANDS+i];
ceps[i ] = .5*(c0+c1);
ceps[NB_BANDS+i] = (c0-c1);
}
}
static void interp_band_gain(float *g, const float *bandE) {
int i;
memset(g, 0, FREQ_SIZE);
for (i=0;i<NB_BANDS-1;i++)
{
int j;
int band_size;
band_size = (eband5ms[i+1]-eband5ms[i])*WINDOW_SIZE_5MS;
for (j=0;j<band_size;j++) {
float frac = (float)j/band_size;
g[(eband5ms[i]*WINDOW_SIZE_5MS) + j] = (1-frac)*bandE[i] + frac*bandE[i+1];
}
}
}
void dct(float *out, const float *in) {
int i;
for (i=0;i<NB_BANDS;i++) {
int j;
float sum = 0;
for (j=0;j<NB_BANDS;j++) {
sum += in[j] * dct_table[j*NB_BANDS + i];
}
out[i] = sum*sqrt(2./NB_BANDS);
}
}
static void idct(float *out, const float *in) {
int i;
for (i=0;i<NB_BANDS;i++) {
int j;
float sum = 0;
for (j=0;j<NB_BANDS;j++) {
sum += in[j] * dct_table[i*NB_BANDS + j];
}
out[i] = sum*sqrt(2./NB_BANDS);
}
}
void forward_transform(kiss_fft_cpx *out, const float *in) {
int i;
kiss_fft_cpx x[WINDOW_SIZE];
kiss_fft_cpx y[WINDOW_SIZE];
for (i=0;i<WINDOW_SIZE;i++) {
x[i].r = in[i];
x[i].i = 0;
}
opus_fft(&kfft, x, y, 0);
for (i=0;i<FREQ_SIZE;i++) {
out[i] = y[i];
}
}
static void inverse_transform(float *out, const kiss_fft_cpx *in) {
int i;
kiss_fft_cpx x[WINDOW_SIZE];
kiss_fft_cpx y[WINDOW_SIZE];
for (i=0;i<FREQ_SIZE;i++) {
x[i] = in[i];
}
for (;i<WINDOW_SIZE;i++) {
x[i].r = x[WINDOW_SIZE - i].r;
x[i].i = -x[WINDOW_SIZE - i].i;
}
opus_fft(&kfft, x, y, 0);
/* output in reverse order for IFFT. */
out[0] = WINDOW_SIZE*y[0].r;
for (i=1;i<WINDOW_SIZE;i++) {
out[i] = WINDOW_SIZE*y[WINDOW_SIZE - i].r;
}
}
static float lpc_from_bands(float *lpc, const float *Ex)
{
int i;
float e;
float ac[LPC_ORDER+1];
float rc[LPC_ORDER];
float Xr[FREQ_SIZE];
kiss_fft_cpx X_auto[FREQ_SIZE];
float x_auto[WINDOW_SIZE];
interp_band_gain(Xr, Ex);
Xr[FREQ_SIZE-1] = 0;
OPUS_CLEAR(X_auto, FREQ_SIZE);
for (i=0;i<FREQ_SIZE;i++) X_auto[i].r = Xr[i];
inverse_transform(x_auto, X_auto);
for (i=0;i<LPC_ORDER+1;i++) ac[i] = x_auto[i];
/* -40 dB noise floor. */
ac[0] += ac[0]*1e-4 + 320/12/38.;
/* Lag windowing. */
for (i=1;i<LPC_ORDER+1;i++) ac[i] *= (1 - 6e-5*i*i);
e = lpcn_lpc(lpc, rc, ac, LPC_ORDER);
return e;
}
void lpc_weighting(float *lpc, float gamma)
{
int i;
float gamma_i = gamma;
for (i = 0; i < LPC_ORDER; i++)
{
lpc[i] *= gamma_i;
gamma_i *= gamma;
}
}
float lpc_from_cepstrum(float *lpc, const float *cepstrum)
{
int i;
float Ex[NB_BANDS];
float tmp[NB_BANDS];
OPUS_COPY(tmp, cepstrum, NB_BANDS);
tmp[0] += 4;
idct(Ex, tmp);
for (i=0;i<NB_BANDS;i++) Ex[i] = pow(10.f, Ex[i])*compensation[i];
return lpc_from_bands(lpc, Ex);
}
void apply_window(float *x) {
int i;
for (i=0;i<OVERLAP_SIZE;i++) {
x[i] *= half_window[i];
x[WINDOW_SIZE - 1 - i] *= half_window[i];
}
}

61
dnn/freq.h Normal file
View File

@ -0,0 +1,61 @@
/* Copyright (c) 2017-2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FREQ_H
#define FREQ_H
#include "kiss_fft.h"
#define LPC_ORDER 16
#define PREEMPHASIS (0.85f)
#define FRAME_SIZE_5MS (2)
#define OVERLAP_SIZE_5MS (2)
#define TRAINING_OFFSET_5MS (1)
#define WINDOW_SIZE_5MS (FRAME_SIZE_5MS + OVERLAP_SIZE_5MS)
#define FRAME_SIZE (80*FRAME_SIZE_5MS)
#define OVERLAP_SIZE (80*OVERLAP_SIZE_5MS)
#define TRAINING_OFFSET (80*TRAINING_OFFSET_5MS)
#define WINDOW_SIZE (FRAME_SIZE + OVERLAP_SIZE)
#define FREQ_SIZE (WINDOW_SIZE/2 + 1)
#define NB_BANDS 18
#define NB_BANDS_1 (NB_BANDS - 1)
void lpcn_compute_band_energy(float *bandE, const kiss_fft_cpx *X);
void burg_cepstral_analysis(float *ceps, const float *x);
void apply_window(float *x);
void dct(float *out, const float *in);
void forward_transform(kiss_fft_cpx *out, const float *in);
float lpc_from_cepstrum(float *lpc, const float *cepstrum);
void apply_window(float *x);
void lpc_weighting(float *lpc, float gamma);
#endif

322
dnn/fwgan.c Normal file
View File

@ -0,0 +1,322 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "fwgan.h"
#include "os_support.h"
#include "freq.h"
#include "fwgan_data.h"
#include "lpcnet.h"
#include "pitch.h"
#include "nnet.h"
#include "lpcnet_private.h"
#define FEAT_IN_SIZE (BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4 + FWGAN_FRAME_SIZE/2)
#define FWGAN_FEATURES (NB_FEATURES-1)
static void pitch_embeddings(float *pembed, float *phase, double w0) {
int i;
float wreal, wimag;
#if 1
/* This Taylor expansion should be good enough since w0 is always small. */
float w2 = w0*w0;
wreal = 1 - .5*w2*(1.f - 0.083333333f*w2);
wimag = w0*(1 - 0.166666667f*w2*(1.f - 0.05f*w2));
#else
wreal = cos(w0);
wimag = sin(w0);
#endif
/* Speed-up phase reference by making phase a unit-norm complex value and rotating it
by exp(-i*w0) each sample. */
for (i=0;i<SUBFRAME_SIZE;i++) {
float tmp;
tmp = phase[0]*wreal - phase[1]*wimag;
phase[1] = phase[0]*wimag + phase[1]*wreal;
phase[0] = tmp;
pembed[i] = phase[1];
pembed[SUBFRAME_SIZE+i] = phase[0];
}
/* Renormalize once per sub-frame, though we could probably do it even less frequently. */
{
float r = 1.f/sqrt(phase[0]*phase[0] + phase[1]*phase[1]);
phase[0] *= r;
phase[1] *= r;
}
}
static void compute_wlpc(float lpc[LPC_ORDER], const float *features) {
float lpc_weight;
int i;
lpc_from_cepstrum(lpc, features);
lpc_weight = 1.f;
for (i=0;i<LPC_ORDER;i++) {
lpc_weight *= FWGAN_GAMMA;
lpc[i] *= lpc_weight;
}
}
static void run_fwgan_upsampler(FWGANState *st, float *cond, const float *features)
{
FWGAN *model;
model = &st->model;
celt_assert(FWGAN_FEATURES == model->bfcc_with_corr_upsampler_fc.nb_inputs);
celt_assert(BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE == model->bfcc_with_corr_upsampler_fc.nb_outputs);
compute_generic_dense(&model->bfcc_with_corr_upsampler_fc, cond, features, ACTIVATION_TANH);
}
static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features);
void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0)
{
int i;
float norm2, norm_1;
float wpcm0[CONT_PCM_INPUTS];
float cont_inputs[CONT_PCM_INPUTS+1];
float tmp1[MAX_CONT_SIZE];
float tmp2[MAX_CONT_SIZE];
float lpc[LPC_ORDER];
float new_pcm[FWGAN_FRAME_SIZE];
FWGAN *model;
st->embed_phase[0] = 1;
model = &st->model;
compute_wlpc(lpc, features0);
/* Deemphasis memory is just the last continuation sample. */
st->deemph_mem = pcm0[CONT_PCM_INPUTS-1];
/* Apply analysis filter, considering that the preemphasis and deemphasis filter
cancel each other in this case since the LPC filter is constant across that boundary.
*/
for (i=LPC_ORDER;i<CONT_PCM_INPUTS;i++) {
int j;
wpcm0[i] = pcm0[i];
for (j=0;j<LPC_ORDER;j++) wpcm0[i] += lpc[j]*pcm0[i-j-1];
}
/* FIXME: Make this less stupid. */
for (i=0;i<LPC_ORDER;i++) wpcm0[i] = wpcm0[LPC_ORDER];
/* The memory of the pre-empahsis is the last sample of the weighted signal
(ignoring preemphasis+deemphasis combination). */
st->preemph_mem = wpcm0[CONT_PCM_INPUTS-1];
/* The memory of the synthesis filter is the pre-emphasized continuation. */
for (i=0;i<LPC_ORDER;i++) st->syn_mem[i] = pcm0[CONT_PCM_INPUTS-1-i] - FWGAN_DEEMPHASIS*pcm0[CONT_PCM_INPUTS-2-i];
norm2 = celt_inner_prod(wpcm0, wpcm0, CONT_PCM_INPUTS, st->arch);
norm_1 = 1.f/sqrt(1e-8f + norm2);
for (i=0;i<CONT_PCM_INPUTS;i++) cont_inputs[i+1] = norm_1*wpcm0[i];
cont_inputs[0] = log(sqrt(norm2) + 1e-7f);
/* Continuation network */
compute_generic_dense(&model->cont_net_0, tmp1, cont_inputs, ACTIVATION_TANH);
compute_generic_dense(&model->cont_net_2, tmp2, tmp1, ACTIVATION_TANH);
compute_generic_dense(&model->cont_net_4, tmp1, tmp2, ACTIVATION_TANH);
compute_generic_dense(&model->cont_net_6, tmp2, tmp1, ACTIVATION_TANH);
compute_generic_dense(&model->cont_net_8, tmp1, tmp2, ACTIVATION_TANH);
celt_assert(CONT_NET_10_OUT_SIZE == model->cont_net_10.nb_outputs);
compute_generic_dense(&model->cont_net_10, st->cont, tmp1, ACTIVATION_TANH);
/* Computing continuation for each layer. */
celt_assert(RNN_GRU_STATE_SIZE == model->rnn_cont_fc_0.nb_outputs);
compute_generic_dense(&model->rnn_cont_fc_0, st->rnn_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC1_STATE_SIZE == model->fwc1_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc1_cont_fc_0, st->fwc1_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC2_STATE_SIZE == model->fwc2_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc2_cont_fc_0, st->fwc2_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC3_STATE_SIZE == model->fwc3_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc3_cont_fc_0, st->fwc3_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC4_STATE_SIZE == model->fwc4_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc4_cont_fc_0, st->fwc4_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC5_STATE_SIZE == model->fwc5_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc5_cont_fc_0, st->fwc5_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC6_STATE_SIZE == model->fwc6_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc6_cont_fc_0, st->fwc6_state, st->cont, ACTIVATION_TANH);
celt_assert(FWC7_STATE_SIZE == model->fwc7_cont_fc_0.nb_outputs);
compute_generic_dense(&model->fwc7_cont_fc_0, st->fwc7_state, st->cont, ACTIVATION_TANH);
st->cont_initialized = 1;
/* Process the first frame, discard the first subframe, and keep the rest for the first
synthesis call. */
fwgan_synthesize_impl(st, new_pcm, lpc, features0);
OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
}
static void apply_gain(float *pcm, float c0, float *last_gain) {
int i;
float gain = pow(10.f, (0.5f*c0/sqrt(18.f)));
for (i=0;i<SUBFRAME_SIZE;i++) pcm[i] *= *last_gain;
*last_gain = gain;
}
static void fwgan_lpc_syn(float *pcm, float *mem, const float *lpc, float last_lpc[LPC_ORDER]) {
int i;
for (i=0;i<SUBFRAME_SIZE;i++) {
int j;
for (j=0;j<LPC_ORDER;j++) pcm[i] -= mem[j]*last_lpc[j];
OPUS_MOVE(&mem[1], &mem[0], LPC_ORDER-1);
mem[0] = pcm[i];
}
OPUS_COPY(last_lpc, lpc, LPC_ORDER);
}
static void fwgan_preemphasis(float *pcm, float *preemph_mem) {
int i;
for (i=0;i<SUBFRAME_SIZE;i++) {
float tmp = pcm[i];
pcm[i] -= FWGAN_DEEMPHASIS * *preemph_mem;
*preemph_mem = tmp;
}
}
static void fwgan_deemphasis(float *pcm, float *deemph_mem) {
int i;
for (i=0;i<SUBFRAME_SIZE;i++) {
pcm[i] += FWGAN_DEEMPHASIS * *deemph_mem;
*deemph_mem = pcm[i];
}
}
static void run_fwgan_subframe(FWGANState *st, float *pcm, const float *cond, double w0, const float *lpc, float c0)
{
float tmp1[FWC1_FC_0_OUT_SIZE];
float tmp2[IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE)];
float feat_in[FEAT_IN_SIZE];
float rnn_in[FEAT_IN_CONV1_CONV_OUT_SIZE];
float pembed[FWGAN_FRAME_SIZE/2];
FWGAN *model;
model = &st->model;
pitch_embeddings(pembed, st->embed_phase, w0);
/* Interleave bfcc_cond and pembed for each subframe in feat_in. */
OPUS_COPY(&feat_in[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4], &cond[0], BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4);
OPUS_COPY(&feat_in[0], &pembed[0], FWGAN_FRAME_SIZE/2);
compute_generic_conv1d(&model->feat_in_conv1_conv, rnn_in, st->cont_conv1_mem, feat_in, FEAT_IN_CONV1_CONV_IN_SIZE, ACTIVATION_LINEAR);
celt_assert(FEAT_IN_NL1_GATE_OUT_SIZE == model->feat_in_nl1_gate.nb_outputs);
compute_gated_activation(&model->feat_in_nl1_gate, rnn_in, rnn_in, ACTIVATION_TANH);
if (st->cont_initialized == 1) {
/* On the very first subframe we stop here. We only want to run the feat_in layer since the
others are initialized via the continuation network. */
OPUS_CLEAR(pcm, SUBFRAME_SIZE);
st->cont_initialized = 2;
apply_gain(pcm, c0, &st->last_gain);
OPUS_COPY(st->last_lpc, lpc, LPC_ORDER);
return;
}
compute_generic_gru(&model->rnn_gru_input, &model->rnn_gru_recurrent, st->rnn_state, rnn_in);
celt_assert(IMAX(RNN_GRU_STATE_SIZE, FWC2_FC_0_OUT_SIZE) >= model->rnn_nl_gate.nb_outputs);
compute_gated_activation(&model->rnn_nl_gate, tmp2, st->rnn_state, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc1_fc_0, tmp1, st->fwc1_state, tmp2, RNN_GRU_STATE_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc1_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc2_fc_0, tmp2, st->fwc2_state, tmp1, FWC1_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc2_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc3_fc_0, tmp1, st->fwc3_state, tmp2, FWC2_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc3_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc4_fc_0, tmp2, st->fwc4_state, tmp1, FWC3_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc4_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc5_fc_0, tmp1, st->fwc5_state, tmp2, FWC4_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc5_fc_1_gate, tmp1, tmp1, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc6_fc_0, tmp2, st->fwc6_state, tmp1, FWC5_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc6_fc_1_gate, tmp2, tmp2, ACTIVATION_TANH);
compute_generic_conv1d(&model->fwc7_fc_0, tmp1, st->fwc7_state, tmp2, FWC6_FC_0_OUT_SIZE, ACTIVATION_LINEAR);
compute_gated_activation(&model->fwc7_fc_1_gate, pcm, tmp1, ACTIVATION_TANH);
apply_gain(pcm, c0, &st->last_gain);
fwgan_preemphasis(pcm, &st->preemph_mem);
fwgan_lpc_syn(pcm, st->syn_mem, lpc, st->last_lpc);
fwgan_deemphasis(pcm, &st->deemph_mem);
}
void fwgan_init(FWGANState *st)
{
int ret;
OPUS_CLEAR(st, 1);
ret = init_fwgan(&st->model, fwgan_arrays);
celt_assert(ret == 0);
/* FIXME: perform arch detection. */
}
int fwgan_load_model(FWGANState *st, const unsigned char *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_fwgan(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
static void fwgan_synthesize_impl(FWGANState *st, float *pcm, const float *lpc, const float *features)
{
int subframe;
float cond[BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE];
double w0;
int period;
float fwgan_features[NB_FEATURES-1];
celt_assert(st->cont_initialized);
OPUS_COPY(fwgan_features, features, NB_FEATURES-2);
fwgan_features[NB_FEATURES-2] = features[NB_FEATURES-1]+.5;
period = (int)floor(.1 + 50*features[NB_BANDS]+100);
w0 = 2*M_PI/period;
run_fwgan_upsampler(st, cond, fwgan_features);
for (subframe=0;subframe<NB_SUBFRAMES;subframe++) {
float *sub_cond;
sub_cond = &cond[subframe*BFCC_WITH_CORR_UPSAMPLER_FC_OUT_SIZE/4];
run_fwgan_subframe(st, &pcm[subframe*SUBFRAME_SIZE], sub_cond, w0, lpc, features[0]);
}
}
void fwgan_synthesize(FWGANState *st, float *pcm, const float *features)
{
float lpc[LPC_ORDER];
float new_pcm[FWGAN_FRAME_SIZE];
compute_wlpc(lpc, features);
fwgan_synthesize_impl(st, new_pcm, lpc, features);
/* Handle buffering. */
OPUS_COPY(pcm, st->pcm_buf, FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
OPUS_COPY(&pcm[FWGAN_FRAME_SIZE-SUBFRAME_SIZE], new_pcm, SUBFRAME_SIZE);
OPUS_COPY(st->pcm_buf, &new_pcm[SUBFRAME_SIZE], FWGAN_FRAME_SIZE-SUBFRAME_SIZE);
}
void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features)
{
int i;
float fpcm[FWGAN_FRAME_SIZE];
fwgan_synthesize(st, fpcm, features);
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
}

83
dnn/fwgan.h Normal file
View File

@ -0,0 +1,83 @@
/* Copyright (c) 2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef FWGAN_H
#define FWGAN_H
#include "freq.h"
#include "fwgan_data.h"
#define FWGAN_CONT_SAMPLES 320
#define NB_SUBFRAMES 4
#define SUBFRAME_SIZE 40
#define FWGAN_FRAME_SIZE (NB_SUBFRAMES*SUBFRAME_SIZE)
#define CONT_PCM_INPUTS 320
#define MAX_CONT_SIZE CONT_NET_0_OUT_SIZE
#define FWGAN_GAMMA 0.92f
#define FWGAN_DEEMPHASIS 0.85f
/* FIXME: Derive those from the model rather than hardcoding. */
#define FWC1_STATE_SIZE 512
#define FWC2_STATE_SIZE 512
#define FWC3_STATE_SIZE 256
#define FWC4_STATE_SIZE 256
#define FWC5_STATE_SIZE 128
#define FWC6_STATE_SIZE 128
#define FWC7_STATE_SIZE 80
typedef struct {
FWGAN model;
int arch;
int cont_initialized;
float embed_phase[2];
float last_gain;
float last_lpc[LPC_ORDER];
float syn_mem[LPC_ORDER];
float preemph_mem;
float deemph_mem;
float pcm_buf[FWGAN_FRAME_SIZE];
float cont[CONT_NET_10_OUT_SIZE];
float cont_conv1_mem[FEAT_IN_CONV1_CONV_STATE_SIZE];
float rnn_state[RNN_GRU_STATE_SIZE];
float fwc1_state[FWC1_STATE_SIZE];
float fwc2_state[FWC2_STATE_SIZE];
float fwc3_state[FWC3_STATE_SIZE];
float fwc4_state[FWC4_STATE_SIZE];
float fwc5_state[FWC5_STATE_SIZE];
float fwc6_state[FWC6_STATE_SIZE];
float fwc7_state[FWC7_STATE_SIZE];
} FWGANState;
void fwgan_init(FWGANState *st);
int fwgan_load_model(FWGANState *st, const unsigned char *data, int len);
void fwgan_cont(FWGANState *st, const float *pcm0, const float *features0);
void fwgan_synthesize(FWGANState *st, float *pcm, const float *features);
void fwgan_synthesize_int(FWGANState *st, opus_int16 *pcm, const float *features);
#endif /* FWGAN_H */

81
dnn/kiss99.c Normal file
View File

@ -0,0 +1,81 @@
/*Daala video codec
Copyright (c) 2012 Daala project contributors. All rights reserved.
Author: Timothy B. Terriberry
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "kiss99.h"
void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata){
int i;
_this->z=362436069;
_this->w=521288629;
_this->jsr=123456789;
_this->jcong=380116160;
for(i=3;i<_ndata;i+=4){
_this->z^=_data[i-3];
_this->w^=_data[i-2];
_this->jsr^=_data[i-1];
_this->jcong^=_data[i];
kiss99_rand(_this);
}
if(i-3<_ndata)_this->z^=_data[i-3];
if(i-2<_ndata)_this->w^=_data[i-2];
if(i-1<_ndata)_this->jsr^=_data[i-1];
/*Fix any potential short cycles that show up.
These are not too likely, given the way we initialize the state, but they
are technically possible, so let us go ahead and eliminate that
possibility.
See Gregory G. Rose: "KISS: A Bit Too Simple", Cryptographic Communications
No. 10, pp. 123---137, 2018.*/
if(_this->z==0||_this->z==0x9068FFFF)_this->z++;
if(_this->w==0||_this->w==0x464FFFFF)_this->w++;
if(_this->jsr==0)_this->jsr++;
}
uint32_t kiss99_rand(kiss99_ctx *_this){
uint32_t znew;
uint32_t wnew;
uint32_t mwc;
uint32_t shr3;
uint32_t cong;
znew=36969*(_this->z&0xFFFF)+(_this->z>>16);
wnew=18000*(_this->w&0xFFFF)+(_this->w>>16);
mwc=(znew<<16)+wnew;
/*We swap the 13 and 17 from the original 1999 algorithm to produce a single
cycle of maximal length, matching KISS11.
We are not actually using KISS11 because of the impractically large (16 MB)
internal state of the full algorithm.*/
shr3=_this->jsr^(_this->jsr<<13);
shr3^=shr3>>17;
shr3^=shr3<<5;
cong=69069*_this->jcong+1234567;
_this->z=znew;
_this->w=wnew;
_this->jsr=shr3;
_this->jcong=cong;
return (mwc^cong)+shr3;
}

46
dnn/kiss99.h Normal file
View File

@ -0,0 +1,46 @@
/*Daala video codec
Copyright (c) 2012 Daala project contributors. All rights reserved.
Author: Timothy B. Terriberry
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
- Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS AS IS
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.*/
#if !defined(_kiss99_H)
# define _kiss99_H (1)
# include <stdint.h>
/*KISS PRNG from George Marsaglia (1999 version).
See https://en.wikipedia.org/wiki/KISS_(algorithm) for details.
This is suitable for simulations, but not for use in crytographic contexts.*/
typedef struct kiss99_ctx kiss99_ctx;
struct kiss99_ctx{
uint32_t z;
uint32_t w;
uint32_t jsr;
uint32_t jcong;
};
void kiss99_srand(kiss99_ctx *_this,const unsigned char *_data,int _ndata);
uint32_t kiss99_rand(kiss99_ctx *_this);
#endif

134
dnn/lossgen.c Normal file
View File

@ -0,0 +1,134 @@
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "arch.h"
#include <math.h>
#include "lossgen.h"
#include "os_support.h"
#include "nnet.h"
#include "assert.h"
/* Disable RTCD for this. */
#define RTCD_ARCH c
/* Override assert to avoid undefined/redefined symbols. */
#undef celt_assert
#define celt_assert assert
/* Directly include the C files we need since the symbols won't be exposed if we link in a shared object. */
#include "parse_lpcnet_weights.c"
#include "nnet_arch.h"
#undef compute_linear
#undef compute_activation
/* Force the C version since the SIMD versions may be hidden. */
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
#define MAX_RNN_NEURONS_ALL IMAX(LOSSGEN_GRU1_STATE_SIZE, LOSSGEN_GRU2_STATE_SIZE)
/* These two functions are copied from nnet.c to make sure we don't have linking issues. */
void compute_generic_gru_lossgen(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{
int i;
int N;
float zrh[3*MAX_RNN_NEURONS_ALL];
float recur[3*MAX_RNN_NEURONS_ALL];
float *z;
float *r;
float *h;
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
N = recurrent_weights->nb_inputs;
z = zrh;
r = &zrh[N];
h = &zrh[2*N];
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
celt_assert(in != state);
compute_linear(input_weights, zrh, in, arch);
compute_linear(recurrent_weights, recur, state, arch);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
for (i=0;i<N;i++)
h[i] += recur[2*N+i]*r[i];
compute_activation(h, h, N, ACTIVATION_TANH, arch);
for (i=0;i<N;i++)
h[i] = z[i]*state[i] + (1-z[i])*h[i];
for (i=0;i<N;i++)
state[i] = h[i];
}
void compute_generic_dense_lossgen(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
{
compute_linear(layer, output, input, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
int sample_loss(
LossGenState *st,
float percent_loss)
{
float input[2];
float tmp[LOSSGEN_DENSE_IN_OUT_SIZE];
float out;
int loss;
LossGen *model = &st->model;
input[0] = st->last_loss;
input[1] = percent_loss;
compute_generic_dense_lossgen(&model->lossgen_dense_in, tmp, input, ACTIVATION_TANH, 0);
compute_generic_gru_lossgen(&model->lossgen_gru1_input, &model->lossgen_gru1_recurrent, st->gru1_state, tmp, 0);
compute_generic_gru_lossgen(&model->lossgen_gru2_input, &model->lossgen_gru2_recurrent, st->gru2_state, st->gru1_state, 0);
compute_generic_dense_lossgen(&model->lossgen_dense_out, &out, st->gru2_state, ACTIVATION_SIGMOID, 0);
loss = (float)rand()/RAND_MAX < out;
st->last_loss = loss;
return loss;
}
void lossgen_init(LossGenState *st)
{
int ret;
OPUS_CLEAR(st, 1);
#ifndef USE_WEIGHTS_FILE
ret = init_lossgen(&st->model, lossgen_arrays);
#else
ret = 0;
#endif
celt_assert(ret == 0);
(void)ret;
}
int lossgen_load_model(LossGenState *st, const unsigned char *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_lossgen(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
#if 0
#include <stdio.h>
int main(int argc, char **argv) {
int i, N;
float p;
LossGenState st;
if (argc!=3) {
fprintf(stderr, "usage: lossgen <percentage> <length>\n");
return 1;
}
lossgen_init(&st);
p = atof(argv[1]);
N = atoi(argv[2]);
for (i=0;i<N;i++) {
printf("%d\n", sample_loss(&st, p));
}
}
#endif

28
dnn/lossgen.h Normal file
View File

@ -0,0 +1,28 @@
#ifndef LOSSGEN_H
#define LOSSGEN_H
#include "lossgen_data.h"
#define PITCH_MIN_PERIOD 32
#define PITCH_MAX_PERIOD 256
#define NB_XCORR_FEATURES (PITCH_MAX_PERIOD-PITCH_MIN_PERIOD)
typedef struct {
LossGen model;
float gru1_state[LOSSGEN_GRU1_STATE_SIZE];
float gru2_state[LOSSGEN_GRU2_STATE_SIZE];
int last_loss;
} LossGenState;
void lossgen_init(LossGenState *st);
int lossgen_load_model(LossGenState *st, const unsigned char *data, int len);
int sample_loss(
LossGenState *st,
float percent_loss);
#endif

283
dnn/lpcnet.c Normal file
View File

@ -0,0 +1,283 @@
/* Copyright (c) 2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <stdio.h>
#include "nnet_data.h"
#include "nnet.h"
#include "common.h"
#include "arch.h"
#include "lpcnet.h"
#include "lpcnet_private.h"
#include "os_support.h"
#define PREEMPH 0.85f
#define PDF_FLOOR 0.002
#define FRAME_INPUT_SIZE (NB_FEATURES + EMBED_PITCH_OUT_SIZE)
#if 0
static void print_vector(float *x, int N)
{
int i;
for (i=0;i<N;i++) printf("%f ", x[i]);
printf("\n");
}
#endif
#ifdef END2END
void rc2lpc(float *lpc, const float *rc)
{
int i, j, k;
float tmp[LPC_ORDER];
float ntmp[LPC_ORDER] = {0.0};
OPUS_COPY(tmp, rc, LPC_ORDER);
for(i = 0; i < LPC_ORDER ; i++)
{
for(j = 0; j <= i-1; j++)
{
ntmp[j] = tmp[j] + tmp[i]*tmp[i - j - 1];
}
for(k = 0; k <= i-1; k++)
{
tmp[k] = ntmp[k];
}
}
for(i = 0; i < LPC_ORDER ; i++)
{
lpc[i] = tmp[i];
}
}
#endif
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features)
{
NNetState *net;
float condition[FEATURE_DENSE2_OUT_SIZE];
float in[FRAME_INPUT_SIZE];
float conv1_out[FEATURE_CONV1_OUT_SIZE];
float conv2_out[FEATURE_CONV2_OUT_SIZE];
float dense1_out[FEATURE_DENSE1_OUT_SIZE];
int pitch;
float rc[LPC_ORDER];
/* Matches the Python code -- the 0.1 avoids rounding issues. */
pitch = (int)floor(.1 + 50*features[NB_BANDS]+100);
pitch = IMIN(255, IMAX(33, pitch));
net = &lpcnet->nnet;
OPUS_COPY(in, features, NB_FEATURES);
compute_embedding(&lpcnet->model.embed_pitch, &in[NB_FEATURES], pitch);
compute_conv1d(&lpcnet->model.feature_conv1, conv1_out, net->feature_conv1_state, in);
if (lpcnet->frame_count < FEATURE_CONV1_DELAY) OPUS_CLEAR(conv1_out, FEATURE_CONV1_OUT_SIZE);
compute_conv1d(&lpcnet->model.feature_conv2, conv2_out, net->feature_conv2_state, conv1_out);
if (lpcnet->frame_count < FEATURES_DELAY) OPUS_CLEAR(conv2_out, FEATURE_CONV2_OUT_SIZE);
_lpcnet_compute_dense(&lpcnet->model.feature_dense1, dense1_out, conv2_out);
_lpcnet_compute_dense(&lpcnet->model.feature_dense2, condition, dense1_out);
OPUS_COPY(rc, condition, LPC_ORDER);
_lpcnet_compute_dense(&lpcnet->model.gru_a_dense_feature, gru_a_condition, condition);
_lpcnet_compute_dense(&lpcnet->model.gru_b_dense_feature, gru_b_condition, condition);
#ifdef END2END
rc2lpc(lpc, rc);
#elif FEATURES_DELAY>0
memcpy(lpc, lpcnet->old_lpc[FEATURES_DELAY-1], LPC_ORDER*sizeof(lpc[0]));
memmove(lpcnet->old_lpc[1], lpcnet->old_lpc[0], (FEATURES_DELAY-1)*LPC_ORDER*sizeof(lpc[0]));
lpc_from_cepstrum(lpcnet->old_lpc[0], features);
#else
lpc_from_cepstrum(lpc, features);
#endif
#ifdef LPC_GAMMA
lpc_weighting(lpc, LPC_GAMMA);
#endif
if (lpcnet->frame_count < 1000) lpcnet->frame_count++;
}
void run_frame_network_deferred(LPCNetState *lpcnet, const float *features)
{
int max_buffer_size = lpcnet->model.feature_conv1.kernel_size + lpcnet->model.feature_conv2.kernel_size - 2;
celt_assert(max_buffer_size <= MAX_FEATURE_BUFFER_SIZE);
if (lpcnet->feature_buffer_fill == max_buffer_size) {
OPUS_MOVE(lpcnet->feature_buffer, &lpcnet->feature_buffer[NB_FEATURES], (max_buffer_size-1)*NB_FEATURES);
} else {
lpcnet->feature_buffer_fill++;
}
OPUS_COPY(&lpcnet->feature_buffer[(lpcnet->feature_buffer_fill-1)*NB_FEATURES], features, NB_FEATURES);
}
void run_frame_network_flush(LPCNetState *lpcnet)
{
int i;
for (i=0;i<lpcnet->feature_buffer_fill;i++) {
float lpc[LPC_ORDER];
float gru_a_condition[3*GRU_A_STATE_SIZE];
float gru_b_condition[3*GRU_B_STATE_SIZE];
run_frame_network(lpcnet, gru_a_condition, gru_b_condition, lpc, &lpcnet->feature_buffer[i*NB_FEATURES]);
}
lpcnet->feature_buffer_fill = 0;
}
int run_sample_network(LPCNetState *lpcnet, const float *gru_a_condition, const float *gru_b_condition, int last_exc, int last_sig, int pred, const float *sampling_logit_table, kiss99_ctx *rng)
{
NNetState *net;
float gru_a_input[3*GRU_A_STATE_SIZE];
float in_b[GRU_A_STATE_SIZE+FEATURE_DENSE2_OUT_SIZE];
float gru_b_input[3*GRU_B_STATE_SIZE];
net = &lpcnet->nnet;
#if 1
compute_gru_a_input(gru_a_input, gru_a_condition, GRU_A_STATE_SIZE, &lpcnet->model.gru_a_embed_sig, last_sig, &lpcnet->model.gru_a_embed_pred, pred, &lpcnet->model.gru_a_embed_exc, last_exc);
#else
OPUS_COPY(gru_a_input, gru_a_condition, 3*GRU_A_STATE_SIZE);
accum_embedding(&lpcnet->model.gru_a_embed_sig, gru_a_input, last_sig);
accum_embedding(&lpcnet->model.gru_a_embed_pred, gru_a_input, pred);
accum_embedding(&lpcnet->model.gru_a_embed_exc, gru_a_input, last_exc);
#endif
/*compute_gru3(&gru_a, net->gru_a_state, gru_a_input);*/
compute_sparse_gru(&lpcnet->model.sparse_gru_a, net->gru_a_state, gru_a_input);
OPUS_COPY(in_b, net->gru_a_state, GRU_A_STATE_SIZE);
OPUS_COPY(gru_b_input, gru_b_condition, 3*GRU_B_STATE_SIZE);
compute_gruB(&lpcnet->model.gru_b, gru_b_input, net->gru_b_state, in_b);
return sample_mdense(&lpcnet->model.dual_fc, net->gru_b_state, sampling_logit_table, rng);
}
int lpcnet_get_size()
{
return sizeof(LPCNetState);
}
void lpcnet_reset(LPCNetState *lpcnet)
{
const char* rng_string="LPCNet";
OPUS_CLEAR((char*)&lpcnet->LPCNET_RESET_START,
sizeof(LPCNetState)-
((char*)&lpcnet->LPCNET_RESET_START - (char*)lpcnet));
lpcnet->last_exc = lin2ulaw(0.f);
kiss99_srand(&lpcnet->rng, (const unsigned char *)rng_string, strlen(rng_string));
}
int lpcnet_init(LPCNetState *lpcnet)
{
int i;
int ret;
for (i=0;i<256;i++) {
float prob = .025f+.95f*i/255.f;
lpcnet->sampling_logit_table[i] = -log((1-prob)/prob);
}
#ifndef USE_WEIGHTS_FILE
ret = init_lpcnet_model(&lpcnet->model, lpcnet_arrays);
#else
ret = 0;
#endif
lpcnet_reset(lpcnet);
celt_assert(ret == 0);
return ret;
}
int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_lpcnet_model(&st->model, list);
opus_free(list);
if (ret == 0) return 0;
else return -1;
}
LPCNetState *lpcnet_create()
{
LPCNetState *lpcnet;
lpcnet = (LPCNetState *)opus_alloc(lpcnet_get_size(), 1);
OPUS_CLEAR(lpcnet, 1);
lpcnet_init(lpcnet);
return lpcnet;
}
void lpcnet_destroy(LPCNetState *lpcnet)
{
opus_free(lpcnet);
}
void lpcnet_reset_signal(LPCNetState *lpcnet)
{
lpcnet->deemph_mem = 0;
lpcnet->last_exc = lin2ulaw(0.f);
OPUS_CLEAR(lpcnet->last_sig, LPC_ORDER);
OPUS_CLEAR(lpcnet->nnet.gru_a_state, GRU_A_STATE_SIZE);
OPUS_CLEAR(lpcnet->nnet.gru_b_state, GRU_B_STATE_SIZE);
}
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload)
{
int i;
if (lpcnet->frame_count <= FEATURES_DELAY)
{
OPUS_CLEAR(output, N);
return;
}
for (i=0;i<N;i++)
{
int j;
float pcm;
int exc;
int last_sig_ulaw;
int pred_ulaw;
float pred = 0;
for (j=0;j<LPC_ORDER;j++) pred -= lpcnet->last_sig[j]*lpcnet->lpc[j];
last_sig_ulaw = lin2ulaw(lpcnet->last_sig[0]);
pred_ulaw = lin2ulaw(pred);
exc = run_sample_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->last_exc, last_sig_ulaw, pred_ulaw, lpcnet->sampling_logit_table, &lpcnet->rng);
if (i < preload) {
exc = lin2ulaw(output[i]-PREEMPH*lpcnet->deemph_mem - pred);
pcm = output[i]-PREEMPH*lpcnet->deemph_mem;
} else {
pcm = pred + ulaw2lin(exc);
}
OPUS_MOVE(&lpcnet->last_sig[1], &lpcnet->last_sig[0], LPC_ORDER-1);
lpcnet->last_sig[0] = pcm;
lpcnet->last_exc = exc;
pcm += PREEMPH*lpcnet->deemph_mem;
lpcnet->deemph_mem = pcm;
if (pcm<-32767) pcm = -32767;
if (pcm>32767) pcm = 32767;
if (i >= preload) output[i] = (int)floor(.5 + pcm);
}
}
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload)
{
run_frame_network(lpcnet, lpcnet->gru_a_condition, lpcnet->gru_b_condition, lpcnet->lpc, features);
lpcnet_synthesize_tail_impl(lpcnet, output, N, preload);
}
void lpcnet_synthesize(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N) {
lpcnet_synthesize_impl(lpcnet, features, output, N, 0);
}

183
dnn/lpcnet.h Normal file
View File

@ -0,0 +1,183 @@
/* Copyright (c) 2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LPCNET_H_
#define LPCNET_H_
#include "opus_types.h"
#define NB_FEATURES 20
#define NB_TOTAL_FEATURES 36
/** Number of audio samples in a feature frame (not for encoding/decoding). */
#define LPCNET_FRAME_SIZE (160)
typedef struct LPCNetState LPCNetState;
typedef struct LPCNetDecState LPCNetDecState;
typedef struct LPCNetEncState LPCNetEncState;
typedef struct LPCNetPLCState LPCNetPLCState;
/** Gets the size of an <code>LPCNetDecState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_decoder_get_size(void);
/** Initializes a previously allocated decoder state
* The memory pointed to by st must be at least the size returned by lpcnet_decoder_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_decoder_create(),lpcnet_decoder_get_size()
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
* @retval 0 Success
*/
int lpcnet_decoder_init(LPCNetDecState *st);
void lpcnet_reset(LPCNetState *lpcnet);
/** Allocates and initializes a decoder state.
* @returns The newly created state
*/
LPCNetDecState *lpcnet_decoder_create(void);
/** Frees an <code>LPCNetDecState</code> allocated by lpcnet_decoder_create().
* @param[in] st <tt>LPCNetDecState*</tt>: State to be freed.
*/
void lpcnet_decoder_destroy(LPCNetDecState *st);
/** Decodes a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8) into LPCNET_PACKET_SAMPLES samples (currently 640).
* @param [in] st <tt>LPCNetDecState*</tt>: Decoder state
* @param [in] buf <tt>const unsigned char *</tt>: Compressed packet
* @param [out] pcm <tt>opus_int16 *</tt>: Decoded audio
* @retval 0 Success
*/
int lpcnet_decode(LPCNetDecState *st, const unsigned char *buf, opus_int16 *pcm);
/** Gets the size of an <code>LPCNetEncState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_encoder_get_size(void);
/** Initializes a previously allocated encoder state
* The memory pointed to by st must be at least the size returned by lpcnet_encoder_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_encoder_create(),lpcnet_encoder_get_size()
* @param [in] st <tt>LPCNetEncState*</tt>: Encoder state
* @retval 0 Success
*/
int lpcnet_encoder_init(LPCNetEncState *st);
int lpcnet_encoder_load_model(LPCNetEncState *st, const unsigned char *data, int len);
/** Allocates and initializes an encoder state.
* @returns The newly created state
*/
LPCNetEncState *lpcnet_encoder_create(void);
/** Frees an <code>LPCNetEncState</code> allocated by lpcnet_encoder_create().
* @param[in] st <tt>LPCNetEncState*</tt>: State to be freed.
*/
void lpcnet_encoder_destroy(LPCNetEncState *st);
/** Encodes LPCNET_PACKET_SAMPLES speech samples (currently 640) into a packet of LPCNET_COMPRESSED_SIZE bytes (currently 8).
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be encoded
* @param [out] buf <tt>const unsigned char *</tt>: Compressed packet
* @retval 0 Success
*/
int lpcnet_encode(LPCNetEncState *st, const opus_int16 *pcm, unsigned char *buf);
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>opus_int16 *</tt>: Input speech to be analyzed
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
* @retval 0 Success
*/
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch);
/** Compute features on LPCNET_FRAME_SIZE speech samples (currently 160) and output features for one 10-ms frame.
* @param [in] st <tt>LPCNetDecState*</tt>: Encoder state
* @param [in] pcm <tt>float *</tt>: Input speech to be analyzed
* @param [out] features <tt>float[NB_TOTAL_FEATURES]</tt>: Four feature vectors
* @retval 0 Success
*/
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch);
/** Gets the size of an <code>LPCNetState</code> structure.
* @returns The size in bytes.
*/
int lpcnet_get_size(void);
/** Initializes a previously allocated synthesis state
* The memory pointed to by st must be at least the size returned by lpcnet_get_size().
* This is intended for applications which use their own allocator instead of malloc.
* @see lpcnet_create(),lpcnet_get_size()
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
* @retval 0 Success
*/
int lpcnet_init(LPCNetState *st);
/** Allocates and initializes a synthesis state.
* @returns The newly created state
*/
LPCNetState *lpcnet_create(void);
/** Frees an <code>LPCNetState</code> allocated by lpcnet_create().
* @param[in] st <tt>LPCNetState*</tt>: State to be freed.
*/
void lpcnet_destroy(LPCNetState *st);
/** Synthesizes speech from an LPCNet feature vector.
* @param [in] st <tt>LPCNetState*</tt>: Synthesis state
* @param [in] features <tt>const float *</tt>: Compressed packet
* @param [out] output <tt>opus_int16 **</tt>: Synthesized speech
* @param [in] N <tt>int</tt>: Number of samples to generate
* @retval 0 Success
*/
void lpcnet_synthesize(LPCNetState *st, const float *features, opus_int16 *output, int N);
int lpcnet_plc_init(LPCNetPLCState *st);
void lpcnet_plc_reset(LPCNetPLCState *st);
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm);
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm);
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features);
void lpcnet_plc_fec_clear(LPCNetPLCState *st);
int lpcnet_load_model(LPCNetState *st, const unsigned char *data, int len);
int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len);
#endif

193
dnn/lpcnet_demo.c Normal file
View File

@ -0,0 +1,193 @@
/* Copyright (c) 2018 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <math.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "arch.h"
#include "lpcnet.h"
#include "freq.h"
#include "os_support.h"
#include "fargan.h"
#include "cpu_support.h"
#ifdef USE_WEIGHTS_FILE
# if __unix__
# include <fcntl.h>
# include <sys/mman.h>
# include <unistd.h>
# include <sys/stat.h>
/* When available, mmap() is preferable to reading the file, as it leads to
better resource utilization, especially if multiple processes are using the same
file (mapping will be shared in cache). */
unsigned char *load_blob(const char *filename, int *len) {
int fd;
unsigned char *data;
struct stat st;
stat(filename, &st);
*len = st.st_size;
fd = open(filename, O_RDONLY);
data = mmap(NULL, *len, PROT_READ, MAP_SHARED, fd, 0);
close(fd);
return data;
}
void free_blob(unsigned char *blob, int len) {
munmap(blob, len);
}
# else
unsigned char *load_blob(const char *filename, int *len) {
FILE *file;
unsigned char *data;
file = fopen(filename, "r");
fseek(file, 0L, SEEK_END);
*len = ftell(file);
fseek(file, 0L, SEEK_SET);
if (*len <= 0) return NULL;
data = malloc(*len);
*len = fread(data, 1, *len, file);
return data;
}
void free_blob(unsigned char *blob, int len) {
free(blob);
(void)len;
}
# endif
#endif
#define MODE_FEATURES 2
/*#define MODE_SYNTHESIS 3*/
#define MODE_ADDLPC 5
#define MODE_FWGAN_SYNTHESIS 6
#define MODE_FARGAN_SYNTHESIS 7
void usage(void) {
fprintf(stderr, "usage: lpcnet_demo -features <input.pcm> <features.f32>\n");
fprintf(stderr, " lpcnet_demo -fargan_synthesis <features.f32> <output.pcm>\n");
fprintf(stderr, " lpcnet_demo -addlpc <features_without_lpc.f32> <features_with_lpc.lpc>\n\n");
fprintf(stderr, " plc_options:\n");
fprintf(stderr, " causal: normal (causal) PLC\n");
fprintf(stderr, " codec: normal (causal) PLC without cross-fade (will glitch)\n");
exit(1);
}
int main(int argc, char **argv) {
int mode=0;
int arch;
FILE *fin, *fout;
#ifdef USE_WEIGHTS_FILE
int len;
unsigned char *data;
const char *filename = "weights_blob.bin";
#endif
arch = opus_select_arch();
if (argc < 4) usage();
if (strcmp(argv[1], "-features") == 0) mode=MODE_FEATURES;
else if (strcmp(argv[1], "-fargan-synthesis") == 0) mode=MODE_FARGAN_SYNTHESIS;
else if (strcmp(argv[1], "-addlpc") == 0){
mode=MODE_ADDLPC;
} else {
usage();
}
if (argc != 4) usage();
fin = fopen(argv[2], "rb");
if (fin == NULL) {
fprintf(stderr, "Can't open %s\n", argv[2]);
exit(1);
}
fout = fopen(argv[3], "wb");
if (fout == NULL) {
fprintf(stderr, "Can't open %s\n", argv[3]);
exit(1);
}
#ifdef USE_WEIGHTS_FILE
data = load_blob(filename, &len);
#endif
if (mode == MODE_FEATURES) {
LPCNetEncState *net;
net = lpcnet_encoder_create();
while (1) {
float features[NB_TOTAL_FEATURES];
opus_int16 pcm[LPCNET_FRAME_SIZE];
size_t ret;
ret = fread(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fin);
if (feof(fin) || ret != LPCNET_FRAME_SIZE) break;
lpcnet_compute_single_frame_features(net, pcm, features, arch);
fwrite(features, sizeof(float), NB_TOTAL_FEATURES, fout);
}
lpcnet_encoder_destroy(net);
} else if (mode == MODE_FARGAN_SYNTHESIS) {
FARGANState fargan;
size_t ret, i;
float in_features[5*NB_TOTAL_FEATURES];
float zeros[320] = {0};
fargan_init(&fargan);
#ifdef USE_WEIGHTS_FILE
fargan_load_model(&fargan, data, len);
#endif
/* uncomment the following to align with Python code */
/*ret = fread(&in_features[0], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);*/
for (i=0;i<5;i++) {
ret = fread(&in_features[i*NB_FEATURES], sizeof(in_features[0]), NB_TOTAL_FEATURES, fin);
}
fargan_cont(&fargan, zeros, in_features);
while (1) {
float features[NB_FEATURES];
float fpcm[LPCNET_FRAME_SIZE];
opus_int16 pcm[LPCNET_FRAME_SIZE];
ret = fread(in_features, sizeof(features[0]), NB_TOTAL_FEATURES, fin);
if (feof(fin) || ret != NB_TOTAL_FEATURES) break;
OPUS_COPY(features, in_features, NB_FEATURES);
fargan_synthesize(&fargan, fpcm, features);
for (i=0;i<LPCNET_FRAME_SIZE;i++) pcm[i] = (int)floor(.5 + MIN32(32767, MAX32(-32767, 32768.f*fpcm[i])));
fwrite(pcm, sizeof(pcm[0]), LPCNET_FRAME_SIZE, fout);
}
} else if (mode == MODE_ADDLPC) {
float features[36];
size_t ret;
while (1) {
ret = fread(features, sizeof(features[0]), 36, fin);
if (ret != 36 || feof(fin)) break;
lpc_from_cepstrum(&features[20], &features[0]);
fwrite(features, sizeof(features[0]), 36, fout);
}
} else {
fprintf(stderr, "unknown action\n");
}
fclose(fin);
fclose(fout);
#ifdef USE_WEIGHTS_FILE
free_blob(data, len);
#endif
return 0;
}

230
dnn/lpcnet_enc.c Normal file
View File

@ -0,0 +1,230 @@
/* Copyright (c) 2017-2019 Mozilla */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include "kiss_fft.h"
#include "common.h"
#include <math.h>
#include "freq.h"
#include "pitch.h"
#include "arch.h"
#include <assert.h>
#include "lpcnet_private.h"
#include "lpcnet.h"
#include "os_support.h"
#include "_kiss_fft_guts.h"
#include "celt_lpc.h"
#include "mathops.h"
int lpcnet_encoder_get_size(void) {
return sizeof(LPCNetEncState);
}
int lpcnet_encoder_init(LPCNetEncState *st) {
memset(st, 0, sizeof(*st));
pitchdnn_init(&st->pitchdnn);
return 0;
}
int lpcnet_encoder_load_model(LPCNetEncState *st, const unsigned char *data, int len) {
return pitchdnn_load_model(&st->pitchdnn, data, len);
}
LPCNetEncState *lpcnet_encoder_create(void) {
LPCNetEncState *st;
st = opus_alloc(lpcnet_encoder_get_size());
lpcnet_encoder_init(st);
return st;
}
void lpcnet_encoder_destroy(LPCNetEncState *st) {
opus_free(st);
}
static void frame_analysis(LPCNetEncState *st, kiss_fft_cpx *X, float *Ex, const float *in) {
float x[WINDOW_SIZE];
OPUS_COPY(x, st->analysis_mem, OVERLAP_SIZE);
OPUS_COPY(&x[OVERLAP_SIZE], in, FRAME_SIZE);
OPUS_COPY(st->analysis_mem, &in[FRAME_SIZE-OVERLAP_SIZE], OVERLAP_SIZE);
apply_window(x);
forward_transform(X, x);
lpcn_compute_band_energy(Ex, X);
}
static void biquad(float *y, float mem[2], const float *x, const float *b, const float *a, int N) {
int i;
float mem0, mem1;
mem0 = mem[0];
mem1 = mem[1];
for (i=0;i<N;i++) {
float xi, yi, mem00;
xi = x[i];
yi = x[i] + mem0;
mem00 = mem0;
/* Original code:
mem0 = mem1 + (b[0]*xi - a[0]*yi);
mem1 = (b[1]*xi - a[1]*yi);
Modified to reduce dependency chains: (the +1e-30f forces the ordering and has no effect on the output)
*/
mem0 = (b[0]-a[0])*xi + mem1 - a[0]*mem0;
mem1 = (b[1]-a[1])*xi + 1e-30f - a[1]*mem00;
y[i] = yi;
}
mem[0] = mem0;
mem[1] = mem1;
}
#define celt_log10(x) (0.3010299957f*celt_log2(x))
void compute_frame_features(LPCNetEncState *st, const float *in, int arch) {
float aligned_in[FRAME_SIZE];
int i;
float Ly[NB_BANDS];
float follow, logMax;
kiss_fft_cpx X[FREQ_SIZE];
float Ex[NB_BANDS];
float xcorr[PITCH_MAX_PERIOD];
float ener0;
float ener;
float x[FRAME_SIZE+LPC_ORDER];
float frame_corr;
float xy, xx, yy;
int pitch;
float ener_norm[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
/* [b,a]=ellip(2, 2, 20, 1200/8000); */
static const float lp_b[2] = {-0.84946f, 1.f};
static const float lp_a[2] = {-1.54220f, 0.70781f};
OPUS_COPY(aligned_in, &st->analysis_mem[OVERLAP_SIZE-TRAINING_OFFSET], TRAINING_OFFSET);
frame_analysis(st, X, Ex, in);
st->if_features[0] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[0].r*X[0].r)-6.f)));
for (i=1;i<PITCH_IF_MAX_FREQ;i++) {
kiss_fft_cpx prod;
float norm_1;
C_MULC(prod, X[i], st->prev_if[i]);
norm_1 = 1.f/sqrt(1e-15f + prod.r*prod.r + prod.i*prod.i);
C_MULBYSCALAR(prod, norm_1);
st->if_features[3*i-2] = prod.r;
st->if_features[3*i-1] = prod.i;
st->if_features[3*i] = MAX16(-1.f, MIN16(1.f, (1.f/64)*(10.f*celt_log10(1e-15f + X[i].r*X[i].r + X[i].i*X[i].i)-6.f)));
}
OPUS_COPY(st->prev_if, X, PITCH_IF_MAX_FREQ);
/*for (i=0;i<88;i++) printf("%f ", st->if_features[i]);printf("\n");*/
logMax = -2;
follow = -2;
for (i=0;i<NB_BANDS;i++) {
Ly[i] = celt_log10(1e-2f+Ex[i]);
Ly[i] = MAX16(logMax-8, MAX16(follow-2.5f, Ly[i]));
logMax = MAX16(logMax, Ly[i]);
follow = MAX16(follow-2.5f, Ly[i]);
}
dct(st->features, Ly);
st->features[0] -= 4;
lpc_from_cepstrum(st->lpc, st->features);
for (i=0;i<LPC_ORDER;i++) st->features[NB_BANDS+2+i] = st->lpc[i];
OPUS_MOVE(st->exc_buf, &st->exc_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
OPUS_MOVE(st->lp_buf, &st->lp_buf[FRAME_SIZE], PITCH_MAX_PERIOD);
OPUS_COPY(&aligned_in[TRAINING_OFFSET], in, FRAME_SIZE-TRAINING_OFFSET);
OPUS_COPY(&x[0], st->pitch_mem, LPC_ORDER);
OPUS_COPY(&x[LPC_ORDER], aligned_in, FRAME_SIZE);
OPUS_COPY(st->pitch_mem, &aligned_in[FRAME_SIZE-LPC_ORDER], LPC_ORDER);
celt_fir(&x[LPC_ORDER], st->lpc, &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, LPC_ORDER, arch);
for (i=0;i<FRAME_SIZE;i++) {
st->exc_buf[PITCH_MAX_PERIOD+i] = st->lp_buf[PITCH_MAX_PERIOD+i] + .7f*st->pitch_filt;
st->pitch_filt = st->lp_buf[PITCH_MAX_PERIOD+i];
/*printf("%f\n", st->exc_buf[PITCH_MAX_PERIOD+i]);*/
}
biquad(&st->lp_buf[PITCH_MAX_PERIOD], st->lp_mem, &st->lp_buf[PITCH_MAX_PERIOD], lp_b, lp_a, FRAME_SIZE);
{
double ener1;
float *buf = st->exc_buf;
celt_pitch_xcorr(&buf[PITCH_MAX_PERIOD], buf, xcorr, FRAME_SIZE, PITCH_MAX_PERIOD-PITCH_MIN_PERIOD, arch);
ener0 = celt_inner_prod(&buf[PITCH_MAX_PERIOD], &buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
ener1 = celt_inner_prod(&buf[0], &buf[0], FRAME_SIZE, arch);
/*printf("%f\n", st->frame_weight[sub]);*/
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
ener = 1 + ener0 + ener1;
st->xcorr_features[i] = 2*xcorr[i];
ener_norm[i] = ener;
ener1 += buf[i+FRAME_SIZE]*(double)buf[i+FRAME_SIZE] - buf[i]*(double)buf[i];
/*printf("%f ", st->xcorr_features[i]);*/
}
/* Split in a separate loop so the compiler can vectorize it */
for (i=0;i<PITCH_MAX_PERIOD-PITCH_MIN_PERIOD;i++) {
st->xcorr_features[i] /= ener_norm[i];
}
/*printf("\n");*/
}
st->dnn_pitch = compute_pitchdnn(&st->pitchdnn, st->if_features, st->xcorr_features, arch);
pitch = (int)floor(.5+256./pow(2.f,((1./60.)*((st->dnn_pitch+1.5)*60))));
xx = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD], FRAME_SIZE, arch);
yy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD-pitch], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
xy = celt_inner_prod(&st->lp_buf[PITCH_MAX_PERIOD], &st->lp_buf[PITCH_MAX_PERIOD-pitch], FRAME_SIZE, arch);
/*printf("%f %f\n", frame_corr, xy/sqrt(1e-15+xx*yy));*/
frame_corr = xy/sqrt(1+xx*yy);
frame_corr = log(1.f+exp(5.f*frame_corr))/log(1+exp(5.f));
st->features[NB_BANDS] = st->dnn_pitch;
st->features[NB_BANDS + 1] = frame_corr-.5f;
}
void preemphasis(float *y, float *mem, const float *x, float coef, int N) {
int i;
for (i=0;i<N;i++) {
float yi;
yi = x[i] + *mem;
*mem = -coef*x[i];
y[i] = yi;
}
}
static int lpcnet_compute_single_frame_features_impl(LPCNetEncState *st, float *x, float features[NB_TOTAL_FEATURES], int arch) {
preemphasis(x, &st->mem_preemph, x, PREEMPHASIS, FRAME_SIZE);
compute_frame_features(st, x, arch);
OPUS_COPY(features, &st->features[0], NB_TOTAL_FEATURES);
return 0;
}
int lpcnet_compute_single_frame_features(LPCNetEncState *st, const opus_int16 *pcm, float features[NB_TOTAL_FEATURES], int arch) {
int i;
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
return 0;
}
int lpcnet_compute_single_frame_features_float(LPCNetEncState *st, const float *pcm, float features[NB_TOTAL_FEATURES], int arch) {
int i;
float x[FRAME_SIZE];
for (i=0;i<FRAME_SIZE;i++) x[i] = pcm[i];
lpcnet_compute_single_frame_features_impl(st, x, features, arch);
return 0;
}

211
dnn/lpcnet_plc.c Normal file
View File

@ -0,0 +1,211 @@
/* Copyright (c) 2021 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "lpcnet_private.h"
#include "lpcnet.h"
#include "plc_data.h"
#include "os_support.h"
#include "common.h"
#include "cpu_support.h"
#ifndef M_PI
#define M_PI 3.141592653
#endif
/* Comment this out to have LPCNet update its state on every good packet (slow). */
#define PLC_SKIP_UPDATES
void lpcnet_plc_reset(LPCNetPLCState *st) {
OPUS_CLEAR((char*)&st->LPCNET_PLC_RESET_START,
sizeof(LPCNetPLCState)-
((char*)&st->LPCNET_PLC_RESET_START - (char*)st));
lpcnet_encoder_init(&st->enc);
OPUS_CLEAR(st->pcm, PLC_BUF_SIZE);
st->blend = 0;
st->loss_count = 0;
st->analysis_gap = 1;
st->analysis_pos = PLC_BUF_SIZE;
st->predict_pos = PLC_BUF_SIZE;
}
int lpcnet_plc_init(LPCNetPLCState *st) {
int ret;
st->arch = opus_select_arch();
fargan_init(&st->fargan);
lpcnet_encoder_init(&st->enc);
st->loaded = 0;
#ifndef USE_WEIGHTS_FILE
ret = init_plcmodel(&st->model, plcmodel_arrays);
if (ret == 0) st->loaded = 1;
#else
ret = 0;
#endif
celt_assert(ret == 0);
lpcnet_plc_reset(st);
return ret;
}
int lpcnet_plc_load_model(LPCNetPLCState *st, const unsigned char *data, int len) {
WeightArray *list;
int ret;
parse_weights(&list, data, len);
ret = init_plcmodel(&st->model, list);
opus_free(list);
if (ret == 0) {
ret = lpcnet_encoder_load_model(&st->enc, data, len);
}
if (ret == 0) {
ret = fargan_load_model(&st->fargan, data, len);
}
if (ret == 0) st->loaded = 1;
return ret;
}
void lpcnet_plc_fec_add(LPCNetPLCState *st, const float *features) {
if (features == NULL) {
st->fec_skip++;
return;
}
if (st->fec_fill_pos == PLC_MAX_FEC) {
OPUS_MOVE(&st->fec[0][0], &st->fec[st->fec_read_pos][0], (st->fec_fill_pos-st->fec_read_pos)*NB_FEATURES);
st->fec_fill_pos = st->fec_fill_pos-st->fec_read_pos;
st->fec_read_pos -= st->fec_read_pos;
}
OPUS_COPY(&st->fec[st->fec_fill_pos][0], features, NB_FEATURES);
st->fec_fill_pos++;
}
void lpcnet_plc_fec_clear(LPCNetPLCState *st) {
st->fec_read_pos = st->fec_fill_pos = st->fec_skip = 0;
}
static void compute_plc_pred(LPCNetPLCState *st, float *out, const float *in) {
float tmp[PLC_DENSE_IN_OUT_SIZE];
PLCModel *model = &st->model;
PLCNetState *net = &st->plc_net;
celt_assert(st->loaded);
compute_generic_dense(&model->plc_dense_in, tmp, in, ACTIVATION_TANH, 0);
compute_generic_gru(&model->plc_gru1_input, &model->plc_gru1_recurrent, net->gru1_state, tmp, 0);
compute_generic_gru(&model->plc_gru2_input, &model->plc_gru2_recurrent, net->gru2_state, net->gru1_state, 0);
compute_generic_dense(&model->plc_dense_out, out, net->gru2_state, ACTIVATION_LINEAR, 0);
}
static int get_fec_or_pred(LPCNetPLCState *st, float *out) {
if (st->fec_read_pos != st->fec_fill_pos && st->fec_skip==0) {
float plc_features[2*NB_BANDS+NB_FEATURES+1] = {0};
float discard[NB_FEATURES];
OPUS_COPY(out, &st->fec[st->fec_read_pos][0], NB_FEATURES);
st->fec_read_pos++;
/* Update PLC state using FEC, so without Burg features. */
OPUS_COPY(&plc_features[2*NB_BANDS], out, NB_FEATURES);
plc_features[2*NB_BANDS+NB_FEATURES] = -1;
compute_plc_pred(st, discard, plc_features);
return 1;
} else {
float zeros[2*NB_BANDS+NB_FEATURES+1] = {0};
compute_plc_pred(st, out, zeros);
if (st->fec_skip > 0) st->fec_skip--;
return 0;
}
}
static void queue_features(LPCNetPLCState *st, const float *features) {
OPUS_MOVE(&st->cont_features[0], &st->cont_features[NB_FEATURES], (CONT_VECTORS-1)*NB_FEATURES);
OPUS_COPY(&st->cont_features[(CONT_VECTORS-1)*NB_FEATURES], features, NB_FEATURES);
}
/* In this causal version of the code, the DNN model implemented by compute_plc_pred()
needs to generate two feature vectors to conceal the first lost packet.*/
int lpcnet_plc_update(LPCNetPLCState *st, opus_int16 *pcm) {
int i;
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
else st->analysis_gap = 1;
if (st->predict_pos - FRAME_SIZE >= 0) st->predict_pos -= FRAME_SIZE;
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
st->loss_count = 0;
st->blend = 0;
return 0;
}
static const float att_table[10] = {0, 0, -.2, -.2, -.4, -.4, -.8, -.8, -1.6, -1.6};
int lpcnet_plc_conceal(LPCNetPLCState *st, opus_int16 *pcm) {
int i;
celt_assert(st->loaded);
if (st->blend == 0) {
int count = 0;
st->plc_net = st->plc_bak[0];
while (st->analysis_pos + FRAME_SIZE <= PLC_BUF_SIZE) {
float x[FRAME_SIZE];
float plc_features[2*NB_BANDS+NB_FEATURES+1];
celt_assert(st->analysis_pos >= 0);
for (i=0;i<FRAME_SIZE;i++) x[i] = 32768.f*st->pcm[st->analysis_pos+i];
burg_cepstral_analysis(plc_features, x);
lpcnet_compute_single_frame_features_float(&st->enc, x, st->features, st->arch);
if ((!st->analysis_gap || count>0) && st->analysis_pos >= st->predict_pos) {
queue_features(st, st->features);
OPUS_COPY(&plc_features[2*NB_BANDS], st->features, NB_FEATURES);
plc_features[2*NB_BANDS+NB_FEATURES] = 1;
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
compute_plc_pred(st, st->features, plc_features);
}
st->analysis_pos += FRAME_SIZE;
count++;
}
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
get_fec_or_pred(st, st->features);
queue_features(st, st->features);
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
get_fec_or_pred(st, st->features);
queue_features(st, st->features);
fargan_cont(&st->fargan, &st->pcm[PLC_BUF_SIZE-FARGAN_CONT_SAMPLES], st->cont_features);
st->analysis_gap = 0;
}
st->plc_bak[0] = st->plc_bak[1];
st->plc_bak[1] = st->plc_net;
if (get_fec_or_pred(st, st->features)) st->loss_count = 0;
else st->loss_count++;
if (st->loss_count >= 10) st->features[0] = MAX16(-10, st->features[0]+att_table[9] - 2*(st->loss_count-9));
else st->features[0] = MAX16(-10, st->features[0]+att_table[st->loss_count]);
fargan_synthesize_int(&st->fargan, pcm, &st->features[0]);
queue_features(st, st->features);
if (st->analysis_pos - FRAME_SIZE >= 0) st->analysis_pos -= FRAME_SIZE;
else st->analysis_gap = 1;
st->predict_pos = PLC_BUF_SIZE;
OPUS_MOVE(st->pcm, &st->pcm[FRAME_SIZE], PLC_BUF_SIZE-FRAME_SIZE);
for (i=0;i<FRAME_SIZE;i++) st->pcm[PLC_BUF_SIZE-FRAME_SIZE+i] = (1.f/32768.f)*pcm[i];
st->blend = 1;
return 0;
}

90
dnn/lpcnet_private.h Normal file
View File

@ -0,0 +1,90 @@
#ifndef LPCNET_PRIVATE_H
#define LPCNET_PRIVATE_H
#include <stdio.h>
#include "freq.h"
#include "lpcnet.h"
#include "plc_data.h"
#include "pitchdnn.h"
#include "fargan.h"
#define PITCH_FRAME_SIZE 320
#define PITCH_BUF_SIZE (PITCH_MAX_PERIOD+PITCH_FRAME_SIZE)
#define PLC_MAX_FEC 100
#define MAX_FEATURE_BUFFER_SIZE 4
#define PITCH_IF_MAX_FREQ 30
#define PITCH_IF_FEATURES (3*PITCH_IF_MAX_FREQ - 2)
#define CONT_VECTORS 5
#define FEATURES_DELAY 1
struct LPCNetEncState{
PitchDNNState pitchdnn;
float analysis_mem[OVERLAP_SIZE];
float mem_preemph;
kiss_fft_cpx prev_if[PITCH_IF_MAX_FREQ];
float if_features[PITCH_IF_FEATURES];
float xcorr_features[PITCH_MAX_PERIOD - PITCH_MIN_PERIOD];
float dnn_pitch;
float pitch_mem[LPC_ORDER];
float pitch_filt;
float exc_buf[PITCH_BUF_SIZE];
float lp_buf[PITCH_BUF_SIZE];
float lp_mem[4];
float lpc[LPC_ORDER];
float features[NB_TOTAL_FEATURES];
float sig_mem[LPC_ORDER];
float burg_cepstrum[2*NB_BANDS];
};
typedef struct {
float gru1_state[PLC_GRU1_STATE_SIZE];
float gru2_state[PLC_GRU2_STATE_SIZE];
} PLCNetState;
#define PLC_BUF_SIZE ((CONT_VECTORS+5)*FRAME_SIZE)
struct LPCNetPLCState {
PLCModel model;
FARGANState fargan;
LPCNetEncState enc;
int loaded;
int arch;
#define LPCNET_PLC_RESET_START fec
float fec[PLC_MAX_FEC][NB_FEATURES];
int analysis_gap;
int fec_read_pos;
int fec_fill_pos;
int fec_skip;
int analysis_pos;
int predict_pos;
float pcm[PLC_BUF_SIZE];
int blend;
float features[NB_TOTAL_FEATURES];
float cont_features[CONT_VECTORS*NB_FEATURES];
int loss_count;
PLCNetState plc_net;
PLCNetState plc_bak[2];
};
void preemphasis(float *y, float *mem, const float *x, float coef, int N);
void compute_frame_features(LPCNetEncState *st, const float *in, int arch);
void lpcnet_reset_signal(LPCNetState *lpcnet);
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
void run_frame_network_deferred(LPCNetState *lpcnet, const float *features);
void run_frame_network_flush(LPCNetState *lpcnet);
void lpcnet_synthesize_tail_impl(LPCNetState *lpcnet, opus_int16 *output, int N, int preload);
void lpcnet_synthesize_impl(LPCNetState *lpcnet, const float *features, opus_int16 *output, int N, int preload);
void lpcnet_synthesize_blend_impl(LPCNetState *lpcnet, const opus_int16 *pcm_in, opus_int16 *output, int N);
void run_frame_network(LPCNetState *lpcnet, float *gru_a_condition, float *gru_b_condition, float *lpc, const float *features);
#endif

307
dnn/lpcnet_tables.c Normal file
View File

@ -0,0 +1,307 @@
/* The contents of this file was automatically generated by dump_lpcnet_tables.c*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "kiss_fft.h"
static const arch_fft_state arch_fft = {0, NULL};
static const opus_int16 fft_bitrev[320] = {
0, 64, 128, 192, 256, 16, 80, 144, 208, 272, 32, 96, 160, 224, 288,
48, 112, 176, 240, 304, 4, 68, 132, 196, 260, 20, 84, 148, 212, 276,
36, 100, 164, 228, 292, 52, 116, 180, 244, 308, 8, 72, 136, 200, 264,
24, 88, 152, 216, 280, 40, 104, 168, 232, 296, 56, 120, 184, 248, 312,
12, 76, 140, 204, 268, 28, 92, 156, 220, 284, 44, 108, 172, 236, 300,
60, 124, 188, 252, 316, 1, 65, 129, 193, 257, 17, 81, 145, 209, 273,
33, 97, 161, 225, 289, 49, 113, 177, 241, 305, 5, 69, 133, 197, 261,
21, 85, 149, 213, 277, 37, 101, 165, 229, 293, 53, 117, 181, 245, 309,
9, 73, 137, 201, 265, 25, 89, 153, 217, 281, 41, 105, 169, 233, 297,
57, 121, 185, 249, 313, 13, 77, 141, 205, 269, 29, 93, 157, 221, 285,
45, 109, 173, 237, 301, 61, 125, 189, 253, 317, 2, 66, 130, 194, 258,
18, 82, 146, 210, 274, 34, 98, 162, 226, 290, 50, 114, 178, 242, 306,
6, 70, 134, 198, 262, 22, 86, 150, 214, 278, 38, 102, 166, 230, 294,
54, 118, 182, 246, 310, 10, 74, 138, 202, 266, 26, 90, 154, 218, 282,
42, 106, 170, 234, 298, 58, 122, 186, 250, 314, 14, 78, 142, 206, 270,
30, 94, 158, 222, 286, 46, 110, 174, 238, 302, 62, 126, 190, 254, 318,
3, 67, 131, 195, 259, 19, 83, 147, 211, 275, 35, 99, 163, 227, 291,
51, 115, 179, 243, 307, 7, 71, 135, 199, 263, 23, 87, 151, 215, 279,
39, 103, 167, 231, 295, 55, 119, 183, 247, 311, 11, 75, 139, 203, 267,
27, 91, 155, 219, 283, 43, 107, 171, 235, 299, 59, 123, 187, 251, 315,
15, 79, 143, 207, 271, 31, 95, 159, 223, 287, 47, 111, 175, 239, 303,
63, 127, 191, 255, 319, };
static const kiss_twiddle_cpx fft_twiddles[320] = {
{1.00000000f, -0.00000000f}, {0.999807239f, -0.0196336918f},
{0.999229014f, -0.0392598175f}, {0.998265624f, -0.0588708036f},
{0.996917307f, -0.0784590989f}, {0.995184720f, -0.0980171412f},
{0.993068457f, -0.117537394f}, {0.990569353f, -0.137012348f},
{0.987688363f, -0.156434461f}, {0.984426558f, -0.175796285f},
{0.980785251f, -0.195090324f}, {0.976765871f, -0.214309156f},
{0.972369909f, -0.233445361f}, {0.967599094f, -0.252491564f},
{0.962455213f, -0.271440446f}, {0.956940353f, -0.290284663f},
{0.951056540f, -0.309017003f}, {0.944806039f, -0.327630192f},
{0.938191354f, -0.346117049f}, {0.931214929f, -0.364470512f},
{0.923879504f, -0.382683426f}, {0.916187942f, -0.400748819f},
{0.908143163f, -0.418659747f}, {0.899748266f, -0.436409235f},
{0.891006529f, -0.453990489f}, {0.881921291f, -0.471396744f},
{0.872496009f, -0.488621235f}, {0.862734377f, -0.505657375f},
{0.852640152f, -0.522498548f}, {0.842217207f, -0.539138317f},
{0.831469595f, -0.555570245f}, {0.820401430f, -0.571787953f},
{0.809017003f, -0.587785244f}, {0.797320664f, -0.603555918f},
{0.785316944f, -0.619093955f}, {0.773010433f, -0.634393275f},
{0.760405958f, -0.649448037f}, {0.747508347f, -0.664252460f},
{0.734322488f, -0.678800762f}, {0.720853567f, -0.693087339f},
{0.707106769f, -0.707106769f}, {0.693087339f, -0.720853567f},
{0.678800762f, -0.734322488f}, {0.664252460f, -0.747508347f},
{0.649448037f, -0.760405958f}, {0.634393275f, -0.773010433f},
{0.619093955f, -0.785316944f}, {0.603555918f, -0.797320664f},
{0.587785244f, -0.809017003f}, {0.571787953f, -0.820401430f},
{0.555570245f, -0.831469595f}, {0.539138317f, -0.842217207f},
{0.522498548f, -0.852640152f}, {0.505657375f, -0.862734377f},
{0.488621235f, -0.872496009f}, {0.471396744f, -0.881921291f},
{0.453990489f, -0.891006529f}, {0.436409235f, -0.899748266f},
{0.418659747f, -0.908143163f}, {0.400748819f, -0.916187942f},
{0.382683426f, -0.923879504f}, {0.364470512f, -0.931214929f},
{0.346117049f, -0.938191354f}, {0.327630192f, -0.944806039f},
{0.309017003f, -0.951056540f}, {0.290284663f, -0.956940353f},
{0.271440446f, -0.962455213f}, {0.252491564f, -0.967599094f},
{0.233445361f, -0.972369909f}, {0.214309156f, -0.976765871f},
{0.195090324f, -0.980785251f}, {0.175796285f, -0.984426558f},
{0.156434461f, -0.987688363f}, {0.137012348f, -0.990569353f},
{0.117537394f, -0.993068457f}, {0.0980171412f, -0.995184720f},
{0.0784590989f, -0.996917307f}, {0.0588708036f, -0.998265624f},
{0.0392598175f, -0.999229014f}, {0.0196336918f, -0.999807239f},
{6.12323426e-17f, -1.00000000f}, {-0.0196336918f, -0.999807239f},
{-0.0392598175f, -0.999229014f}, {-0.0588708036f, -0.998265624f},
{-0.0784590989f, -0.996917307f}, {-0.0980171412f, -0.995184720f},
{-0.117537394f, -0.993068457f}, {-0.137012348f, -0.990569353f},
{-0.156434461f, -0.987688363f}, {-0.175796285f, -0.984426558f},
{-0.195090324f, -0.980785251f}, {-0.214309156f, -0.976765871f},
{-0.233445361f, -0.972369909f}, {-0.252491564f, -0.967599094f},
{-0.271440446f, -0.962455213f}, {-0.290284663f, -0.956940353f},
{-0.309017003f, -0.951056540f}, {-0.327630192f, -0.944806039f},
{-0.346117049f, -0.938191354f}, {-0.364470512f, -0.931214929f},
{-0.382683426f, -0.923879504f}, {-0.400748819f, -0.916187942f},
{-0.418659747f, -0.908143163f}, {-0.436409235f, -0.899748266f},
{-0.453990489f, -0.891006529f}, {-0.471396744f, -0.881921291f},
{-0.488621235f, -0.872496009f}, {-0.505657375f, -0.862734377f},
{-0.522498548f, -0.852640152f}, {-0.539138317f, -0.842217207f},
{-0.555570245f, -0.831469595f}, {-0.571787953f, -0.820401430f},
{-0.587785244f, -0.809017003f}, {-0.603555918f, -0.797320664f},
{-0.619093955f, -0.785316944f}, {-0.634393275f, -0.773010433f},
{-0.649448037f, -0.760405958f}, {-0.664252460f, -0.747508347f},
{-0.678800762f, -0.734322488f}, {-0.693087339f, -0.720853567f},
{-0.707106769f, -0.707106769f}, {-0.720853567f, -0.693087339f},
{-0.734322488f, -0.678800762f}, {-0.747508347f, -0.664252460f},
{-0.760405958f, -0.649448037f}, {-0.773010433f, -0.634393275f},
{-0.785316944f, -0.619093955f}, {-0.797320664f, -0.603555918f},
{-0.809017003f, -0.587785244f}, {-0.820401430f, -0.571787953f},
{-0.831469595f, -0.555570245f}, {-0.842217207f, -0.539138317f},
{-0.852640152f, -0.522498548f}, {-0.862734377f, -0.505657375f},
{-0.872496009f, -0.488621235f}, {-0.881921291f, -0.471396744f},
{-0.891006529f, -0.453990489f}, {-0.899748266f, -0.436409235f},
{-0.908143163f, -0.418659747f}, {-0.916187942f, -0.400748819f},
{-0.923879504f, -0.382683426f}, {-0.931214929f, -0.364470512f},
{-0.938191354f, -0.346117049f}, {-0.944806039f, -0.327630192f},
{-0.951056540f, -0.309017003f}, {-0.956940353f, -0.290284663f},
{-0.962455213f, -0.271440446f}, {-0.967599094f, -0.252491564f},
{-0.972369909f, -0.233445361f}, {-0.976765871f, -0.214309156f},
{-0.980785251f, -0.195090324f}, {-0.984426558f, -0.175796285f},
{-0.987688363f, -0.156434461f}, {-0.990569353f, -0.137012348f},
{-0.993068457f, -0.117537394f}, {-0.995184720f, -0.0980171412f},
{-0.996917307f, -0.0784590989f}, {-0.998265624f, -0.0588708036f},
{-0.999229014f, -0.0392598175f}, {-0.999807239f, -0.0196336918f},
{-1.00000000f, -1.22464685e-16f}, {-0.999807239f, 0.0196336918f},
{-0.999229014f, 0.0392598175f}, {-0.998265624f, 0.0588708036f},
{-0.996917307f, 0.0784590989f}, {-0.995184720f, 0.0980171412f},
{-0.993068457f, 0.117537394f}, {-0.990569353f, 0.137012348f},
{-0.987688363f, 0.156434461f}, {-0.984426558f, 0.175796285f},
{-0.980785251f, 0.195090324f}, {-0.976765871f, 0.214309156f},
{-0.972369909f, 0.233445361f}, {-0.967599094f, 0.252491564f},
{-0.962455213f, 0.271440446f}, {-0.956940353f, 0.290284663f},
{-0.951056540f, 0.309017003f}, {-0.944806039f, 0.327630192f},
{-0.938191354f, 0.346117049f}, {-0.931214929f, 0.364470512f},
{-0.923879504f, 0.382683426f}, {-0.916187942f, 0.400748819f},
{-0.908143163f, 0.418659747f}, {-0.899748266f, 0.436409235f},
{-0.891006529f, 0.453990489f}, {-0.881921291f, 0.471396744f},
{-0.872496009f, 0.488621235f}, {-0.862734377f, 0.505657375f},
{-0.852640152f, 0.522498548f}, {-0.842217207f, 0.539138317f},
{-0.831469595f, 0.555570245f}, {-0.820401430f, 0.571787953f},
{-0.809017003f, 0.587785244f}, {-0.797320664f, 0.603555918f},
{-0.785316944f, 0.619093955f}, {-0.773010433f, 0.634393275f},
{-0.760405958f, 0.649448037f}, {-0.747508347f, 0.664252460f},
{-0.734322488f, 0.678800762f}, {-0.720853567f, 0.693087339f},
{-0.707106769f, 0.707106769f}, {-0.693087339f, 0.720853567f},
{-0.678800762f, 0.734322488f}, {-0.664252460f, 0.747508347f},
{-0.649448037f, 0.760405958f}, {-0.634393275f, 0.773010433f},
{-0.619093955f, 0.785316944f}, {-0.603555918f, 0.797320664f},
{-0.587785244f, 0.809017003f}, {-0.571787953f, 0.820401430f},
{-0.555570245f, 0.831469595f}, {-0.539138317f, 0.842217207f},
{-0.522498548f, 0.852640152f}, {-0.505657375f, 0.862734377f},
{-0.488621235f, 0.872496009f}, {-0.471396744f, 0.881921291f},
{-0.453990489f, 0.891006529f}, {-0.436409235f, 0.899748266f},
{-0.418659747f, 0.908143163f}, {-0.400748819f, 0.916187942f},
{-0.382683426f, 0.923879504f}, {-0.364470512f, 0.931214929f},
{-0.346117049f, 0.938191354f}, {-0.327630192f, 0.944806039f},
{-0.309017003f, 0.951056540f}, {-0.290284663f, 0.956940353f},
{-0.271440446f, 0.962455213f}, {-0.252491564f, 0.967599094f},
{-0.233445361f, 0.972369909f}, {-0.214309156f, 0.976765871f},
{-0.195090324f, 0.980785251f}, {-0.175796285f, 0.984426558f},
{-0.156434461f, 0.987688363f}, {-0.137012348f, 0.990569353f},
{-0.117537394f, 0.993068457f}, {-0.0980171412f, 0.995184720f},
{-0.0784590989f, 0.996917307f}, {-0.0588708036f, 0.998265624f},
{-0.0392598175f, 0.999229014f}, {-0.0196336918f, 0.999807239f},
{-1.83697015e-16f, 1.00000000f}, {0.0196336918f, 0.999807239f},
{0.0392598175f, 0.999229014f}, {0.0588708036f, 0.998265624f},
{0.0784590989f, 0.996917307f}, {0.0980171412f, 0.995184720f},
{0.117537394f, 0.993068457f}, {0.137012348f, 0.990569353f},
{0.156434461f, 0.987688363f}, {0.175796285f, 0.984426558f},
{0.195090324f, 0.980785251f}, {0.214309156f, 0.976765871f},
{0.233445361f, 0.972369909f}, {0.252491564f, 0.967599094f},
{0.271440446f, 0.962455213f}, {0.290284663f, 0.956940353f},
{0.309017003f, 0.951056540f}, {0.327630192f, 0.944806039f},
{0.346117049f, 0.938191354f}, {0.364470512f, 0.931214929f},
{0.382683426f, 0.923879504f}, {0.400748819f, 0.916187942f},
{0.418659747f, 0.908143163f}, {0.436409235f, 0.899748266f},
{0.453990489f, 0.891006529f}, {0.471396744f, 0.881921291f},
{0.488621235f, 0.872496009f}, {0.505657375f, 0.862734377f},
{0.522498548f, 0.852640152f}, {0.539138317f, 0.842217207f},
{0.555570245f, 0.831469595f}, {0.571787953f, 0.820401430f},
{0.587785244f, 0.809017003f}, {0.603555918f, 0.797320664f},
{0.619093955f, 0.785316944f}, {0.634393275f, 0.773010433f},
{0.649448037f, 0.760405958f}, {0.664252460f, 0.747508347f},
{0.678800762f, 0.734322488f}, {0.693087339f, 0.720853567f},
{0.707106769f, 0.707106769f}, {0.720853567f, 0.693087339f},
{0.734322488f, 0.678800762f}, {0.747508347f, 0.664252460f},
{0.760405958f, 0.649448037f}, {0.773010433f, 0.634393275f},
{0.785316944f, 0.619093955f}, {0.797320664f, 0.603555918f},
{0.809017003f, 0.587785244f}, {0.820401430f, 0.571787953f},
{0.831469595f, 0.555570245f}, {0.842217207f, 0.539138317f},
{0.852640152f, 0.522498548f}, {0.862734377f, 0.505657375f},
{0.872496009f, 0.488621235f}, {0.881921291f, 0.471396744f},
{0.891006529f, 0.453990489f}, {0.899748266f, 0.436409235f},
{0.908143163f, 0.418659747f}, {0.916187942f, 0.400748819f},
{0.923879504f, 0.382683426f}, {0.931214929f, 0.364470512f},
{0.938191354f, 0.346117049f}, {0.944806039f, 0.327630192f},
{0.951056540f, 0.309017003f}, {0.956940353f, 0.290284663f},
{0.962455213f, 0.271440446f}, {0.967599094f, 0.252491564f},
{0.972369909f, 0.233445361f}, {0.976765871f, 0.214309156f},
{0.980785251f, 0.195090324f}, {0.984426558f, 0.175796285f},
{0.987688363f, 0.156434461f}, {0.990569353f, 0.137012348f},
{0.993068457f, 0.117537394f}, {0.995184720f, 0.0980171412f},
{0.996917307f, 0.0784590989f}, {0.998265624f, 0.0588708036f},
{0.999229014f, 0.0392598175f}, {0.999807239f, 0.0196336918f},
};
const kiss_fft_state kfft = {
320, /* nfft */
0.0031250000f, /* scale */
-1, /* shift */
{5, 64, 4, 16, 4, 4, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, }, /* factors */
fft_bitrev, /* bitrev*/
fft_twiddles, /* twiddles*/
(arch_fft_state *)&arch_fft, /* arch_fft*/
};
const float half_window[] = {
3.78491532e-05f, 0.000340620492f, 0.000946046319f, 0.00185389258f, 0.00306380726f,
0.00457531959f, 0.00638783723f, 0.00850064680f, 0.0109129101f, 0.0136236614f,
0.0166318044f, 0.0199361145f, 0.0235352255f, 0.0274276342f, 0.0316116922f,
0.0360856056f, 0.0408474281f, 0.0458950549f, 0.0512262285f, 0.0568385124f,
0.0627293140f, 0.0688958541f, 0.0753351897f, 0.0820441842f, 0.0890194997f,
0.0962576419f, 0.103754878f, 0.111507311f, 0.119510807f, 0.127761051f,
0.136253506f, 0.144983411f, 0.153945804f, 0.163135484f, 0.172547072f,
0.182174906f, 0.192013159f, 0.202055752f, 0.212296382f, 0.222728521f,
0.233345464f, 0.244140238f, 0.255105674f, 0.266234398f, 0.277518868f,
0.288951218f, 0.300523549f, 0.312227666f, 0.324055225f, 0.335997701f,
0.348046392f, 0.360192508f, 0.372427016f, 0.384740859f, 0.397124738f,
0.409569323f, 0.422065198f, 0.434602767f, 0.447172493f, 0.459764689f,
0.472369671f, 0.484977663f, 0.497579008f, 0.510163903f, 0.522722721f,
0.535245717f, 0.547723293f, 0.560145974f, 0.572504222f, 0.584788740f,
0.596990347f, 0.609099925f, 0.621108532f, 0.633007407f, 0.644788086f,
0.656442165f, 0.667961538f, 0.679338276f, 0.690564752f, 0.701633692f,
0.712537885f, 0.723270535f, 0.733825266f, 0.744195819f, 0.754376352f,
0.764361382f, 0.774145722f, 0.783724606f, 0.793093503f, 0.802248418f,
0.811185598f, 0.819901764f, 0.828393936f, 0.836659551f, 0.844696403f,
0.852502763f, 0.860077202f, 0.867418647f, 0.874526560f, 0.881400526f,
0.888040781f, 0.894447744f, 0.900622249f, 0.906565487f, 0.912279010f,
0.917764664f, 0.923024654f, 0.928061485f, 0.932878017f, 0.937477291f,
0.941862822f, 0.946038187f, 0.950007319f, 0.953774393f, 0.957343817f,
0.960720181f, 0.963908315f, 0.966913164f, 0.969739914f, 0.972393870f,
0.974880517f, 0.977205336f, 0.979374051f, 0.981392324f, 0.983266115f,
0.985001266f, 0.986603677f, 0.988079309f, 0.989434063f, 0.990674019f,
0.991804957f, 0.992832899f, 0.993763626f, 0.994602919f, 0.995356441f,
0.996029854f, 0.996628702f, 0.997158289f, 0.997623861f, 0.998030603f,
0.998383403f, 0.998687088f, 0.998946249f, 0.999165416f, 0.999348700f,
0.999500215f, 0.999623775f, 0.999723017f, 0.999801278f, 0.999861658f,
0.999907196f, 0.999940455f, 0.999963880f, 0.999979615f, 0.999989510f,
0.999995291f, 0.999998271f, 0.999999523f, 0.999999940f, 1.00000000f,
};
const float dct_table[] = {
0.707106769f, 0.996194720f, 0.984807730f, 0.965925813f, 0.939692616f,
0.906307817f, 0.866025388f, 0.819152057f, 0.766044438f, 0.707106769f,
0.642787635f, 0.573576450f, 0.500000000f, 0.422618270f, 0.342020154f,
0.258819044f, 0.173648179f, 0.0871557444f, 0.707106769f, 0.965925813f,
0.866025388f, 0.707106769f, 0.500000000f, 0.258819044f, 6.12323426e-17f,
-0.258819044f, -0.500000000f, -0.707106769f, -0.866025388f, -0.965925813f,
-1.00000000f, -0.965925813f, -0.866025388f, -0.707106769f, -0.500000000f,
-0.258819044f, 0.707106769f, 0.906307817f, 0.642787635f, 0.258819044f,
-0.173648179f, -0.573576450f, -0.866025388f, -0.996194720f, -0.939692616f,
-0.707106769f, -0.342020154f, 0.0871557444f, 0.500000000f, 0.819152057f,
0.984807730f, 0.965925813f, 0.766044438f, 0.422618270f, 0.707106769f,
0.819152057f, 0.342020154f, -0.258819044f, -0.766044438f, -0.996194720f,
-0.866025388f, -0.422618270f, 0.173648179f, 0.707106769f, 0.984807730f,
0.906307817f, 0.500000000f, -0.0871557444f, -0.642787635f, -0.965925813f,
-0.939692616f, -0.573576450f, 0.707106769f, 0.707106769f, 6.12323426e-17f,
-0.707106769f, -1.00000000f, -0.707106769f, -1.83697015e-16f, 0.707106769f,
1.00000000f, 0.707106769f, 3.06161700e-16f, -0.707106769f, -1.00000000f,
-0.707106769f, -4.28626385e-16f, 0.707106769f, 1.00000000f, 0.707106769f,
0.707106769f, 0.573576450f, -0.342020154f, -0.965925813f, -0.766044438f,
0.0871557444f, 0.866025388f, 0.906307817f, 0.173648179f, -0.707106769f,
-0.984807730f, -0.422618270f, 0.500000000f, 0.996194720f, 0.642787635f,
-0.258819044f, -0.939692616f, -0.819152057f, 0.707106769f, 0.422618270f,
-0.642787635f, -0.965925813f, -0.173648179f, 0.819152057f, 0.866025388f,
-0.0871557444f, -0.939692616f, -0.707106769f, 0.342020154f, 0.996194720f,
0.500000000f, -0.573576450f, -0.984807730f, -0.258819044f, 0.766044438f,
0.906307817f, 0.707106769f, 0.258819044f, -0.866025388f, -0.707106769f,
0.500000000f, 0.965925813f, 3.06161700e-16f, -0.965925813f, -0.500000000f,
0.707106769f, 0.866025388f, -0.258819044f, -1.00000000f, -0.258819044f,
0.866025388f, 0.707106769f, -0.500000000f, -0.965925813f, 0.707106769f,
0.0871557444f, -0.984807730f, -0.258819044f, 0.939692616f, 0.422618270f,
-0.866025388f, -0.573576450f, 0.766044438f, 0.707106769f, -0.642787635f,
-0.819152057f, 0.500000000f, 0.906307817f, -0.342020154f, -0.965925813f,
0.173648179f, 0.996194720f, 0.707106769f, -0.0871557444f, -0.984807730f,
0.258819044f, 0.939692616f, -0.422618270f, -0.866025388f, 0.573576450f,
0.766044438f, -0.707106769f, -0.642787635f, 0.819152057f, 0.500000000f,
-0.906307817f, -0.342020154f, 0.965925813f, 0.173648179f, -0.996194720f,
0.707106769f, -0.258819044f, -0.866025388f, 0.707106769f, 0.500000000f,
-0.965925813f, -4.28626385e-16f, 0.965925813f, -0.500000000f, -0.707106769f,
0.866025388f, 0.258819044f, -1.00000000f, 0.258819044f, 0.866025388f,
-0.707106769f, -0.500000000f, 0.965925813f, 0.707106769f, -0.422618270f,
-0.642787635f, 0.965925813f, -0.173648179f, -0.819152057f, 0.866025388f,
0.0871557444f, -0.939692616f, 0.707106769f, 0.342020154f, -0.996194720f,
0.500000000f, 0.573576450f, -0.984807730f, 0.258819044f, 0.766044438f,
-0.906307817f, 0.707106769f, -0.573576450f, -0.342020154f, 0.965925813f,
-0.766044438f, -0.0871557444f, 0.866025388f, -0.906307817f, 0.173648179f,
0.707106769f, -0.984807730f, 0.422618270f, 0.500000000f, -0.996194720f,
0.642787635f, 0.258819044f, -0.939692616f, 0.819152057f, 0.707106769f,
-0.707106769f, -1.83697015e-16f, 0.707106769f, -1.00000000f, 0.707106769f,
5.51091070e-16f, -0.707106769f, 1.00000000f, -0.707106769f, -2.69484189e-15f,
0.707106769f, -1.00000000f, 0.707106769f, -4.90477710e-16f, -0.707106769f,
1.00000000f, -0.707106769f, 0.707106769f, -0.819152057f, 0.342020154f,
0.258819044f, -0.766044438f, 0.996194720f, -0.866025388f, 0.422618270f,
0.173648179f, -0.707106769f, 0.984807730f, -0.906307817f, 0.500000000f,
0.0871557444f, -0.642787635f, 0.965925813f, -0.939692616f, 0.573576450f,
0.707106769f, -0.906307817f, 0.642787635f, -0.258819044f, -0.173648179f,
0.573576450f, -0.866025388f, 0.996194720f, -0.939692616f, 0.707106769f,
-0.342020154f, -0.0871557444f, 0.500000000f, -0.819152057f, 0.984807730f,
-0.965925813f, 0.766044438f, -0.422618270f, 0.707106769f, -0.965925813f,
0.866025388f, -0.707106769f, 0.500000000f, -0.258819044f, 1.10280111e-15f,
0.258819044f, -0.500000000f, 0.707106769f, -0.866025388f, 0.965925813f,
-1.00000000f, 0.965925813f, -0.866025388f, 0.707106769f, -0.500000000f,
0.258819044f, 0.707106769f, -0.996194720f, 0.984807730f, -0.965925813f,
0.939692616f, -0.906307817f, 0.866025388f, -0.819152057f, 0.766044438f,
-0.707106769f, 0.642787635f, -0.573576450f, 0.500000000f, -0.422618270f,
0.342020154f, -0.258819044f, 0.173648179f, -0.0871557444f, };

64
dnn/meson.build Normal file
View File

@ -0,0 +1,64 @@
dnn_sources = sources['DEEP_PLC_SOURCES']
dred_sources = sources['DRED_SOURCES']
if opt_enable_dred
dnn_sources += dred_sources
endif
osce_sources = sources['OSCE_SOURCES']
if opt_enable_osce
dnn_sources += osce_sources
endif
dnn_sources_sse2 = sources['DNN_SOURCES_SSE2']
dnn_sources_sse4_1 = sources['DNN_SOURCES_SSE4_1']
dnn_sources_avx2 = sources['DNN_SOURCES_AVX2']
dnn_sources_neon_intr = sources['DNN_SOURCES_NEON']
dnn_sources_dotprod_intr = sources['DNN_SOURCES_DOTPROD']
dnn_includes = [opus_includes]
dnn_static_libs = []
if host_cpu_family in ['x86', 'x86_64'] and opus_conf.has('OPUS_HAVE_RTCD')
dnn_sources += sources['DNN_SOURCES_X86_RTCD']
endif
if host_cpu_family in ['arm', 'aarch64'] and have_arm_intrinsics_or_asm
if opus_conf.has('OPUS_HAVE_RTCD')
dnn_sources += sources['DNN_SOURCES_ARM_RTCD']
endif
endif
foreach intr_name : ['sse2', 'sse4_1', 'avx2', 'neon_intr', 'dotprod_intr']
have_intr = get_variable('have_' + intr_name)
if not have_intr
continue
endif
intr_sources = get_variable('dnn_sources_' + intr_name)
intr_args = get_variable('opus_@0@_args'.format(intr_name), [])
dnn_static_libs += static_library('dnn_' + intr_name, intr_sources,
c_args: intr_args,
include_directories: dnn_includes,
install: false)
endforeach
dnn_c_args = []
if host_machine.system() == 'windows'
dnn_c_args += ['-DDLL_EXPORT']
endif
if opt_enable_deep_plc
dnn_lib = static_library('opus-dnn',
dnn_sources,
c_args: dnn_c_args,
include_directories: dnn_includes,
link_whole: [dnn_static_libs],
dependencies: libm,
install: false)
else
dnn_lib = []
endif

416
dnn/nndsp.c Normal file
View File

@ -0,0 +1,416 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "nndsp.h"
#include "arch.h"
#include "nnet.h"
#include "os_support.h"
#include "pitch.h"
#include <math.h>
#ifndef M_PI
#define M_PI 3.141592653589793f
#endif
#define KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel) ((((i_out_channels) * in_channels) + (i_in_channels)) * kernel_size + (i_kernel))
void init_adaconv_state(AdaConvState *hAdaConv)
{
OPUS_CLEAR(hAdaConv, 1);
}
void init_adacomb_state(AdaCombState *hAdaComb)
{
OPUS_CLEAR(hAdaComb, 1);
}
void init_adashape_state(AdaShapeState *hAdaShape)
{
OPUS_CLEAR(hAdaShape, 1);
}
void compute_overlap_window(float *window, int overlap_size)
{
int i_sample;
for (i_sample=0; i_sample < overlap_size; i_sample++)
{
window[i_sample] = 0.5f + 0.5f * cos(M_PI * (i_sample + 0.5f) / overlap_size);
}
}
#ifdef DEBUG_NNDSP
void print_float_vector(const char* name, const float *vec, int length)
{
for (int i = 0; i < length; i ++)
{
printf("%s[%d]: %f\n", name, i, vec[i]);
}
}
#endif
static void scale_kernel(
float *kernel,
int in_channels,
int out_channels,
int kernel_size,
float *gain
)
/* normalizes (p-norm) kernel over input channel and kernel dimension */
{
float norm;
int i_in_channels, i_out_channels, i_kernel;
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
norm = 0;
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels ++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
norm += kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] * kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)];
}
}
#ifdef DEBUG_NNDSP
printf("kernel norm: %f, %f\n", norm, sqrt(norm));
#endif
norm = 1.f / (1e-6f + sqrt(norm));
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
for (i_kernel = 0; i_kernel < kernel_size; i_kernel++)
{
kernel[KERNEL_INDEX(i_out_channels, i_in_channels, i_kernel)] *= norm * gain[i_out_channels];
}
}
}
}
static void transform_gains(
float *gains,
int num_gains,
float filter_gain_a,
float filter_gain_b
)
{
int i;
for (i = 0; i < num_gains; i++)
{
gains[i] = exp(filter_gain_a * gains[i] + filter_gain_b);
}
}
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim,
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
)
{
float output_buffer[ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS];
float kernel_buffer[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float input_buffer[ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE)];
float kernel0[ADACONV_MAX_KERNEL_SIZE];
float kernel1[ADACONV_MAX_KERNEL_SIZE];
float channel_buffer0[ADACONV_MAX_OVERLAP_SIZE];
float channel_buffer1[ADACONV_MAX_FRAME_SIZE];
float gain_buffer[ADACONV_MAX_OUTPUT_CHANNELS];
float *p_input;
int i_in_channels, i_out_channels, i_sample;
(void) feature_dim; /* ToDo: figure out whether we might need this information */
celt_assert(shape_gain == 1);
celt_assert(left_padding == kernel_size - 1); /* currently only supports causal version. Non-causal version not difficult to implement but will require third loop */
celt_assert(kernel_size < frame_size);
OPUS_CLEAR(output_buffer, ADACONV_MAX_FRAME_SIZE * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(kernel_buffer, ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS);
OPUS_CLEAR(input_buffer, ADACONV_MAX_INPUT_CHANNELS * (ADACONV_MAX_FRAME_SIZE + ADACONV_MAX_KERNEL_SIZE));
#ifdef DEBUG_NNDSP
print_float_vector("x_in", x_in, in_channels * frame_size);
#endif
/* prepare input */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(input_buffer + i_in_channels * (kernel_size + frame_size), hAdaConv->history + i_in_channels * kernel_size, kernel_size);
OPUS_COPY(input_buffer + kernel_size + i_in_channels * (kernel_size + frame_size), x_in + frame_size * i_in_channels, frame_size);
}
p_input = input_buffer + kernel_size;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, gain_buffer, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adaconv_kernel_raw", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain_raw", gain_buffer, out_channels);
#endif
transform_gains(gain_buffer, out_channels, filter_gain_a, filter_gain_b);
scale_kernel(kernel_buffer, in_channels, out_channels, kernel_size, gain_buffer);
#ifdef DEBUG_NNDSP
print_float_vector("adaconv_kernel", kernel_buffer, in_channels * out_channels * kernel_size);
print_float_vector("adaconv_gain", gain_buffer, out_channels);
#endif
/* calculate overlapping part using kernel from last frame */
for (i_out_channels = 0; i_out_channels < out_channels; i_out_channels++)
{
for (i_in_channels = 0; i_in_channels < in_channels; i_in_channels++)
{
OPUS_CLEAR(kernel0, ADACONV_MAX_KERNEL_SIZE);
OPUS_CLEAR(kernel1, ADACONV_MAX_KERNEL_SIZE);
OPUS_COPY(kernel0, hAdaConv->last_kernel + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
OPUS_COPY(kernel1, kernel_buffer + KERNEL_INDEX(i_out_channels, i_in_channels, 0), kernel_size);
celt_pitch_xcorr(kernel0, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer0, ADACONV_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel1, p_input + i_in_channels * (frame_size + kernel_size) - left_padding, channel_buffer1, ADACONV_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += window[i_sample] * channel_buffer0[i_sample];
output_buffer[i_sample + i_out_channels * frame_size] += (1.f - window[i_sample]) * channel_buffer1[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample + i_out_channels * frame_size] += channel_buffer1[i_sample];
}
}
}
OPUS_COPY(x_out, output_buffer, out_channels * frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, out_channels * frame_size);
#endif
/* buffer update */
for (i_in_channels=0; i_in_channels < in_channels; i_in_channels ++)
{
OPUS_COPY(hAdaConv->history + i_in_channels * kernel_size, p_input + i_in_channels * (frame_size + kernel_size) + frame_size - kernel_size, kernel_size);
}
OPUS_COPY(hAdaConv->last_kernel, kernel_buffer, kernel_size * in_channels * out_channels);
}
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
)
{
float output_buffer[ADACOMB_MAX_FRAME_SIZE];
float output_buffer_last[ADACOMB_MAX_FRAME_SIZE];
float kernel_buffer[ADACOMB_MAX_KERNEL_SIZE];
float input_buffer[ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE];
float gain, global_gain;
float *p_input;
int i_sample;
float kernel[16];
float last_kernel[16];
(void) feature_dim; /* ToDo: figure out whether we might need this information */
OPUS_CLEAR(output_buffer, ADACOMB_MAX_FRAME_SIZE);
OPUS_CLEAR(kernel_buffer, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(input_buffer, ADACOMB_MAX_FRAME_SIZE + ADACOMB_MAX_LAG + ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(input_buffer, hAdaComb->history, kernel_size + ADACOMB_MAX_LAG);
OPUS_COPY(input_buffer + kernel_size + ADACOMB_MAX_LAG, x_in, frame_size);
p_input = input_buffer + kernel_size + ADACOMB_MAX_LAG;
/* calculate new kernel and new gain */
compute_generic_dense(kernel_layer, kernel_buffer, features, ACTIVATION_LINEAR, arch);
compute_generic_dense(gain_layer, &gain, features, ACTIVATION_RELU, arch);
compute_generic_dense(global_gain_layer, &global_gain, features, ACTIVATION_TANH, arch);
#ifdef DEBUG_NNDSP
print_float_vector("features", features, feature_dim);
print_float_vector("adacomb_kernel_raw", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain_raw", &gain, 1);
print_float_vector("adacomb_global_gain_raw", &global_gain, 1);
#endif
gain = exp(log_gain_limit - gain);
global_gain = exp(filter_gain_a * global_gain + filter_gain_b);
scale_kernel(kernel_buffer, 1, 1, kernel_size, &gain);
#ifdef DEBUG_NNDSP
print_float_vector("adacomb_kernel", kernel_buffer, kernel_size);
print_float_vector("adacomb_gain", &gain, 1);
#endif
OPUS_CLEAR(kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_CLEAR(last_kernel, ADACOMB_MAX_KERNEL_SIZE);
OPUS_COPY(kernel, kernel_buffer, kernel_size);
OPUS_COPY(last_kernel, hAdaComb->last_kernel, kernel_size);
celt_pitch_xcorr(last_kernel, &p_input[- left_padding - hAdaComb->last_pitch_lag], output_buffer_last, ADACOMB_MAX_KERNEL_SIZE, overlap_size, arch);
celt_pitch_xcorr(kernel, &p_input[- left_padding - pitch_lag], output_buffer, ADACOMB_MAX_KERNEL_SIZE, frame_size, arch);
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] = hAdaComb->last_global_gain * window[i_sample] * output_buffer_last[i_sample] + global_gain * (1.f - window[i_sample]) * output_buffer[i_sample];
}
for (i_sample = 0; i_sample < overlap_size; i_sample++)
{
output_buffer[i_sample] += (window[i_sample] * hAdaComb->last_global_gain + (1.f - window[i_sample]) * global_gain) * p_input[i_sample];
}
for (i_sample = overlap_size; i_sample < frame_size; i_sample++)
{
output_buffer[i_sample] = global_gain * (output_buffer[i_sample] + p_input[i_sample]);
}
OPUS_COPY(x_out, output_buffer, frame_size);
#ifdef DEBUG_NNDSP
print_float_vector("x_out", x_out, frame_size);
#endif
/* buffer update */
OPUS_COPY(hAdaComb->last_kernel, kernel_buffer, kernel_size);
OPUS_COPY(hAdaComb->history, p_input + frame_size - kernel_size - ADACOMB_MAX_LAG, kernel_size + ADACOMB_MAX_LAG);
hAdaComb->last_pitch_lag = pitch_lag;
hAdaComb->last_global_gain = global_gain;
}
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1f,
const LinearLayer *alpha1t,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
)
{
float in_buffer[ADASHAPE_MAX_INPUT_DIM + ADASHAPE_MAX_FRAME_SIZE];
float out_buffer[ADASHAPE_MAX_FRAME_SIZE];
float tmp_buffer[ADASHAPE_MAX_FRAME_SIZE];
int i, k;
int tenv_size;
float mean;
float *tenv;
celt_assert(frame_size % avg_pool_k == 0);
celt_assert(feature_dim + frame_size / avg_pool_k + 1 < ADASHAPE_MAX_INPUT_DIM);
tenv_size = frame_size / avg_pool_k;
tenv = in_buffer + feature_dim;
OPUS_CLEAR(tenv, tenv_size + 1);
OPUS_COPY(in_buffer, features, feature_dim);
/* calculate temporal envelope */
mean = 0;
for (i = 0; i < tenv_size; i++)
{
for (k = 0; k < avg_pool_k; k++)
{
tenv[i] += fabs(x_in[i * avg_pool_k + k]);
}
tenv[i] = log(tenv[i] / avg_pool_k + 1.52587890625e-05f);
mean += tenv[i];
}
mean /= tenv_size;
for (i = 0; i < tenv_size; i++)
{
tenv[i] -= mean;
}
tenv[tenv_size] = mean;
#ifdef DEBUG_NNDSP
print_float_vector("tenv", tenv, tenv_size + 1);
#endif
/* calculate temporal weights */
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_in", in_buffer, feature_dim + tenv_size + 1);
#endif
compute_generic_conv1d(alpha1f, out_buffer, hAdaShape->conv_alpha1f_state, in_buffer, feature_dim, ACTIVATION_LINEAR, arch);
compute_generic_conv1d(alpha1t, tmp_buffer, hAdaShape->conv_alpha1t_state, tenv, tenv_size + 1, ACTIVATION_LINEAR, arch);
#ifdef DEBUG_NNDSP
print_float_vector("alpha1_out", out_buffer, frame_size);
#endif
/* compute leaky ReLU by hand. ToDo: try tanh activation */
for (i = 0; i < frame_size; i ++)
{
float tmp = out_buffer[i] + tmp_buffer[i];
in_buffer[i] = tmp >= 0 ? tmp : 0.2 * tmp;
}
#ifdef DEBUG_NNDSP
print_float_vector("post_alpha1", in_buffer, frame_size);
#endif
compute_generic_conv1d(alpha2, out_buffer, hAdaShape->conv_alpha2_state, in_buffer, frame_size, ACTIVATION_LINEAR, arch);
/* shape signal */
for (i = 0; i < frame_size; i ++)
{
x_out[i] = exp(out_buffer[i]) * x_in[i];
}
}

143
dnn/nndsp.h Normal file
View File

@ -0,0 +1,143 @@
/* Copyright (c) 2023 Amazon
Written by Jan Buethe */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNDSP_H
#define NNDSP_H
#include "opus_types.h"
#include "nnet.h"
#include <string.h>
#define ADACONV_MAX_KERNEL_SIZE 16
#define ADACONV_MAX_INPUT_CHANNELS 2
#define ADACONV_MAX_OUTPUT_CHANNELS 2
#define ADACONV_MAX_FRAME_SIZE 80
#define ADACONV_MAX_OVERLAP_SIZE 40
#define ADACOMB_MAX_LAG 300
#define ADACOMB_MAX_KERNEL_SIZE 16
#define ADACOMB_MAX_FRAME_SIZE 80
#define ADACOMB_MAX_OVERLAP_SIZE 40
#define ADASHAPE_MAX_INPUT_DIM 512
#define ADASHAPE_MAX_FRAME_SIZE 160
/*#define DEBUG_NNDSP*/
#ifdef DEBUG_NNDSP
#include <stdio.h>
#endif
void print_float_vector(const char* name, const float *vec, int length);
typedef struct {
float history[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS];
float last_kernel[ADACONV_MAX_KERNEL_SIZE * ADACONV_MAX_INPUT_CHANNELS * ADACONV_MAX_OUTPUT_CHANNELS];
float last_gain;
} AdaConvState;
typedef struct {
float history[ADACOMB_MAX_KERNEL_SIZE + ADACOMB_MAX_LAG];
float last_kernel[ADACOMB_MAX_KERNEL_SIZE];
float last_global_gain;
int last_pitch_lag;
} AdaCombState;
typedef struct {
float conv_alpha1f_state[ADASHAPE_MAX_INPUT_DIM];
float conv_alpha1t_state[ADASHAPE_MAX_INPUT_DIM];
float conv_alpha2_state[ADASHAPE_MAX_FRAME_SIZE];
} AdaShapeState;
void init_adaconv_state(AdaConvState *hAdaConv);
void init_adacomb_state(AdaCombState *hAdaComb);
void init_adashape_state(AdaShapeState *hAdaShape);
void compute_overlap_window(float *window, int overlap_size);
void adaconv_process_frame(
AdaConvState* hAdaConv,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
int feature_dim, /* not strictly necessary */
int frame_size,
int overlap_size,
int in_channels,
int out_channels,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float shape_gain,
float *window,
int arch
);
void adacomb_process_frame(
AdaCombState* hAdaComb,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *kernel_layer,
const LinearLayer *gain_layer,
const LinearLayer *global_gain_layer,
int pitch_lag,
int feature_dim,
int frame_size,
int overlap_size,
int kernel_size,
int left_padding,
float filter_gain_a,
float filter_gain_b,
float log_gain_limit,
float *window,
int arch
);
void adashape_process_frame(
AdaShapeState *hAdaShape,
float *x_out,
const float *x_in,
const float *features,
const LinearLayer *alpha1f,
const LinearLayer *alpha1t,
const LinearLayer *alpha2,
int feature_dim,
int frame_size,
int avg_pool_k,
int arch
);
#endif

149
dnn/nnet.c Normal file
View File

@ -0,0 +1,149 @@
/* Copyright (c) 2018 Mozilla
2008-2011 Octasic Inc.
2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdlib.h>
#include <math.h>
#include "opus_types.h"
#include "arch.h"
#include "nnet.h"
#include "dred_rdovae_constants.h"
#include "plc_data.h"
#include "fargan.h"
#include "os_support.h"
#include "vec.h"
#ifdef ENABLE_OSCE
#include "osce.h"
#endif
#ifdef NO_OPTIMIZATIONS
#if defined(_MSC_VER)
#pragma message ("Compiling without any vectorization. This code will be very slow")
#else
#warning Compiling without any vectorization. This code will be very slow
#endif
#endif
#define SOFTMAX_HACK
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch)
{
compute_linear(layer, output, input, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
}
#ifdef ENABLE_OSCE
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS), OSCE_MAX_RNN_NEURONS)
#else
#define MAX_RNN_NEURONS_ALL IMAX(IMAX(FARGAN_MAX_RNN_NEURONS, PLC_MAX_RNN_UNITS), DRED_MAX_RNN_NEURONS)
#endif
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch)
{
int i;
int N;
float zrh[3*MAX_RNN_NEURONS_ALL];
float recur[3*MAX_RNN_NEURONS_ALL];
float *z;
float *r;
float *h;
celt_assert(3*recurrent_weights->nb_inputs == recurrent_weights->nb_outputs);
celt_assert(input_weights->nb_outputs == recurrent_weights->nb_outputs);
N = recurrent_weights->nb_inputs;
z = zrh;
r = &zrh[N];
h = &zrh[2*N];
celt_assert(recurrent_weights->nb_outputs <= 3*MAX_RNN_NEURONS_ALL);
celt_assert(in != state);
compute_linear(input_weights, zrh, in, arch);
compute_linear(recurrent_weights, recur, state, arch);
for (i=0;i<2*N;i++)
zrh[i] += recur[i];
compute_activation(zrh, zrh, 2*N, ACTIVATION_SIGMOID, arch);
for (i=0;i<N;i++)
h[i] += recur[2*N+i]*r[i];
compute_activation(h, h, N, ACTIVATION_TANH, arch);
for (i=0;i<N;i++)
h[i] = z[i]*state[i] + (1-z[i])*h[i];
for (i=0;i<N;i++)
state[i] = h[i];
}
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch)
{
int i;
float act2[MAX_INPUTS];
celt_assert(layer->nb_inputs == layer->nb_outputs);
compute_linear(layer, act2, input, arch);
compute_activation(act2, act2, layer->nb_outputs, ACTIVATION_SIGMOID, arch);
if (input == output) {
/* Give a vectorization hint to the compiler for the in-place case. */
for (i=0;i<layer->nb_outputs;i++) output[i] = output[i]*act2[i];
} else {
for (i=0;i<layer->nb_outputs;i++) output[i] = input[i]*act2[i];
}
}
#define MAX_CONV_INPUTS_ALL DRED_MAX_CONV_INPUTS
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch)
{
float tmp[MAX_CONV_INPUTS_ALL];
celt_assert(input != output);
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
if (layer->nb_inputs!=input_size) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
compute_linear(layer, output, tmp, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
if (layer->nb_inputs!=input_size) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
}
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch)
{
float tmp[MAX_CONV_INPUTS_ALL];
int ksize = layer->nb_inputs/input_size;
int i;
celt_assert(input != output);
celt_assert(layer->nb_inputs <= MAX_CONV_INPUTS_ALL);
if (dilation==1) OPUS_COPY(tmp, mem, layer->nb_inputs-input_size);
else for (i=0;i<ksize-1;i++) OPUS_COPY(&tmp[i*input_size], &mem[i*input_size*dilation], input_size);
OPUS_COPY(&tmp[layer->nb_inputs-input_size], input, input_size);
compute_linear(layer, output, tmp, arch);
compute_activation(output, output, layer->nb_outputs, activation, arch);
if (dilation==1) OPUS_COPY(mem, &tmp[input_size], layer->nb_inputs-input_size);
else {
OPUS_COPY(mem, &mem[input_size], input_size*dilation*(ksize-1)-input_size);
OPUS_COPY(&mem[input_size*dilation*(ksize-1)-input_size], input, input_size);
}
}

163
dnn/nnet.h Normal file
View File

@ -0,0 +1,163 @@
/* Copyright (c) 2018 Mozilla
Copyright (c) 2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNET_H_
#define NNET_H_
#include <stddef.h>
#include "opus_types.h"
#define ACTIVATION_LINEAR 0
#define ACTIVATION_SIGMOID 1
#define ACTIVATION_TANH 2
#define ACTIVATION_RELU 3
#define ACTIVATION_SOFTMAX 4
#define ACTIVATION_SWISH 5
#define WEIGHT_BLOB_VERSION 0
#define WEIGHT_BLOCK_SIZE 64
typedef struct {
const char *name;
int type;
int size;
const void *data;
} WeightArray;
#define WEIGHT_TYPE_float 0
#define WEIGHT_TYPE_int 1
#define WEIGHT_TYPE_qweight 2
#define WEIGHT_TYPE_int8 3
typedef struct {
char head[4];
int version;
int type;
int size;
int block_size;
char name[44];
} WeightHead;
/* Generic sparse affine transformation. */
typedef struct {
const float *bias;
const float *subias;
const opus_int8 *weights;
const float *float_weights;
const int *weights_idx;
const float *diag;
const float *scale;
int nb_inputs;
int nb_outputs;
} LinearLayer;
/* Generic sparse affine transformation. */
typedef struct {
const float *bias;
const float *float_weights;
int in_channels;
int out_channels;
int ktime;
int kheight;
} Conv2dLayer;
void compute_generic_dense(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
void compute_generic_gru(const LinearLayer *input_weights, const LinearLayer *recurrent_weights, float *state, const float *in, int arch);
void compute_generic_conv1d(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int activation, int arch);
void compute_generic_conv1d_dilation(const LinearLayer *layer, float *output, float *mem, const float *input, int input_size, int dilation, int activation, int arch);
void compute_glu(const LinearLayer *layer, float *output, const float *input, int arch);
void compute_gated_activation(const LinearLayer *layer, float *output, const float *input, int activation, int arch);
int parse_weights(WeightArray **list, const unsigned char *data, int len);
extern const WeightArray lpcnet_arrays[];
extern const WeightArray plcmodel_arrays[];
extern const WeightArray rdovaeenc_arrays[];
extern const WeightArray rdovaedec_arrays[];
extern const WeightArray fwgan_arrays[];
extern const WeightArray fargan_arrays[];
extern const WeightArray pitchdnn_arrays[];
extern const WeightArray lossgen_arrays[];
int linear_init(LinearLayer *layer, const WeightArray *arrays,
const char *bias,
const char *subias,
const char *weights,
const char *float_weights,
const char *weights_idx,
const char *diag,
const char *scale,
int nb_inputs,
int nb_outputs);
int conv2d_init(Conv2dLayer *layer, const WeightArray *arrays,
const char *bias,
const char *float_weights,
int in_channels,
int out_channels,
int ktime,
int kheight);
void compute_linear_c(const LinearLayer *linear, float *out, const float *in);
void compute_activation_c(float *output, const float *input, int N, int activation);
void compute_conv2d_c(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation);
#if defined(OPUS_ARM_MAY_HAVE_DOTPROD) || defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
#include "arm/dnn_arm.h"
#endif
#if defined(OPUS_X86_MAY_HAVE_SSE2)
#include "x86/dnn_x86.h"
#endif
#ifndef OVERRIDE_COMPUTE_LINEAR
#define compute_linear(linear, out, in, arch) ((void)(arch),compute_linear_c(linear, out, in))
#endif
#ifndef OVERRIDE_COMPUTE_ACTIVATION
#define compute_activation(output, input, N, activation, arch) ((void)(arch),compute_activation_c(output, input, N, activation))
#endif
#ifndef OVERRIDE_COMPUTE_CONV2D
#define compute_conv2d(conv, out, mem, in, height, hstride, activation, arch) ((void)(arch),compute_conv2d_c(conv, out, mem, in, height, hstride, activation))
#endif
#if defined(__x86_64__) && !defined(OPUS_X86_MAY_HAVE_SSE4_1) && !defined(OPUS_X86_MAY_HAVE_AVX2)
#if defined(_MSC_VER)
#pragma message ("Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 to get better performance")
#else
#warning "Only SSE and SSE2 are available. On newer machines, enable SSSE3/AVX/AVX2 using -march= to get better performance"
#endif
#endif
#endif /* NNET_H_ */

247
dnn/nnet_arch.h Normal file
View File

@ -0,0 +1,247 @@
/* Copyright (c) 2018-2019 Mozilla
2023 Amazon */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef NNET_ARCH_H
#define NNET_ARCH_H
#include "nnet.h"
#include "arch.h"
#include "os_support.h"
#include "vec.h"
#define CAT_SUFFIX2(a,b) a ## b
#define CAT_SUFFIX(a,b) CAT_SUFFIX2(a, b)
#define RTCD_SUF(name) CAT_SUFFIX(name, RTCD_ARCH)
/* Force vectorization on for DNN code because some of the loops rely on
compiler vectorization rather than explicitly using intrinsics. */
#if OPUS_GNUC_PREREQ(5,1)
#define GCC_POP_OPTIONS
#pragma GCC push_options
#pragma GCC optimize("tree-vectorize")
#endif
#define MAX_ACTIVATIONS (4096)
static OPUS_INLINE void vec_swish(float *y, const float *x, int N)
{
int i;
float tmp[MAX_ACTIVATIONS];
celt_assert(N <= MAX_ACTIVATIONS);
vec_sigmoid(tmp, x, N);
for (i=0;i<N;i++)
y[i] = x[i]*tmp[i];
}
static OPUS_INLINE float relu(float x)
{
return x < 0 ? 0 : x;
}
/*#define HIGH_ACCURACY */
void RTCD_SUF(compute_activation_)(float *output, const float *input, int N, int activation)
{
int i;
if (activation == ACTIVATION_SIGMOID) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = 1.f / (1 + exp(-input[n]));
}
#else
vec_sigmoid(output, input, N);
#endif
} else if (activation == ACTIVATION_TANH) {
#ifdef HIGH_ACCURACY
for (int n=0; n<N; n++)
{
output[n] = tanh(input[n]);
}
#else
vec_tanh(output, input, N);
#endif
} else if (activation == ACTIVATION_SWISH) {
vec_swish(output, input, N);
} else if (activation == ACTIVATION_RELU) {
for (i=0;i<N;i++)
output[i] = relu(input[i]);
} else if (activation == ACTIVATION_SOFTMAX) {
#ifdef SOFTMAX_HACK
OPUS_COPY(output, input, N);
/*for (i=0;i<N;i++)
output[i] = input[i];*/
#else
float sum = 0;
softmax(output, input, N);
for (i=0;i<N;i++) {
sum += output[i];
}
sum = 1.f/(sum+1e-30);
for (i=0;i<N;i++)
output[i] = sum*output[i];
#endif
} else {
celt_assert(activation == ACTIVATION_LINEAR);
if (input != output) {
for (i=0;i<N;i++)
output[i] = input[i];
}
}
}
void RTCD_SUF(compute_linear_) (const LinearLayer *linear, float *out, const float *in)
{
int i, M, N;
const float *bias;
celt_assert(in != out);
bias = linear->bias;
M = linear->nb_inputs;
N = linear->nb_outputs;
if (linear->float_weights != NULL) {
if (linear->weights_idx != NULL) sparse_sgemv8x4(out, linear->float_weights, linear->weights_idx, N, in);
else sgemv(out, linear->float_weights, N, M, N, in);
} else if (linear->weights != NULL) {
if (linear->weights_idx != NULL) sparse_cgemv8x4(out, linear->weights, linear->weights_idx, linear->scale, N, M, in);
else cgemv8x4(out, linear->weights, linear->scale, N, M, in);
/* Only use SU biases on for integer matrices on SU archs. */
#ifdef USE_SU_BIAS
bias = linear->subias;
#endif
}
else OPUS_CLEAR(out, N);
if (bias != NULL) {
for (i=0;i<N;i++) out[i] += bias[i];
}
if (linear->diag) {
/* Diag is only used for GRU recurrent weights. */
celt_assert(3*M == N);
for (i=0;i<M;i++) {
out[i] += linear->diag[i]*in[i];
out[i+M] += linear->diag[i+M]*in[i];
out[i+2*M] += linear->diag[i+2*M]*in[i];
}
}
}
/* Computes non-padded convolution for input [ ksize1 x in_channels x (len2+ksize2) ],
kernel [ out_channels x in_channels x ksize1 x ksize2 ],
storing the output as [ out_channels x len2 ].
We assume that the output dimension along the ksize1 axis is 1,
i.e. processing one frame at a time. */
static void conv2d_float(float *out, const float *weights, int in_channels, int out_channels, int ktime, int kheight, const float *in, int height, int hstride)
{
int i;
int in_stride;
in_stride = height+kheight-1;
for (i=0;i<out_channels;i++) {
int m;
OPUS_CLEAR(&out[i*hstride], height);
for (m=0;m<in_channels;m++) {
int t;
for (t=0;t<ktime;t++) {
int h;
for (h=0;h<kheight;h++) {
int j;
for (j=0;j<height;j++) {
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + t*kheight + h] *
in[t*in_channels*in_stride + m*in_stride + j + h];
}
}
}
}
}
}
/* There's no intrinsics in this function (or the one above) because the gcc (and hopefully other compiler) auto-vectorizer is smart enough to
produce the right code by itself based on the compile flags. */
static void conv2d_3x3_float(float *out, const float *weights, int in_channels, int out_channels, const float *in, int height, int hstride)
{
int i;
int in_stride;
int kheight, ktime;
kheight = ktime = 3;
in_stride = height+kheight-1;
for (i=0;i<out_channels;i++) {
int m;
OPUS_CLEAR(&out[i*hstride], height);
for (m=0;m<in_channels;m++) {
int j;
for (j=0;j<height;j++) {
/* Unrolled version of previous function -- compiler will figure out the indexing simplifications. */
out[i*hstride + j] += weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 0]*in[0*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 1]*in[0*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 0*kheight + 2]*in[0*in_channels*in_stride + m*in_stride + j + 2]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 0]*in[1*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 1]*in[1*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 1*kheight + 2]*in[1*in_channels*in_stride + m*in_stride + j + 2]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 0]*in[2*in_channels*in_stride + m*in_stride + j + 0]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 1]*in[2*in_channels*in_stride + m*in_stride + j + 1]
+ weights[i*in_channels*ktime*kheight + m*ktime*kheight + 2*kheight + 2]*in[2*in_channels*in_stride + m*in_stride + j + 2];
}
}
}
}
#define MAX_CONV2D_INPUTS 8192
void RTCD_SUF(compute_conv2d_)(const Conv2dLayer *conv, float *out, float *mem, const float *in, int height, int hstride, int activation)
{
int i;
const float *bias;
float in_buf[MAX_CONV2D_INPUTS];
int time_stride;
celt_assert(in != out);
time_stride = conv->in_channels*(height+conv->kheight-1);
celt_assert(conv->ktime*time_stride <= MAX_CONV2D_INPUTS);
OPUS_COPY(in_buf, mem, (conv->ktime-1)*time_stride);
OPUS_COPY(&in_buf[(conv->ktime-1)*time_stride], in, time_stride);
OPUS_COPY(mem, &in_buf[time_stride], (conv->ktime-1)*time_stride);
bias = conv->bias;
if (conv->kheight == 3 && conv->ktime == 3)
conv2d_3x3_float(out, conv->float_weights, conv->in_channels, conv->out_channels, in_buf, height, hstride);
else
conv2d_float(out, conv->float_weights, conv->in_channels, conv->out_channels, conv->ktime, conv->kheight, in_buf, height, hstride);
if (bias != NULL) {
for (i=0;i<conv->out_channels;i++) {
int j;
for (j=0;j<height;j++) out[i*hstride+j] += bias[i];
}
}
for (i=0;i<conv->out_channels;i++) {
RTCD_SUF(compute_activation_)(&out[i*hstride], &out[i*hstride], height, activation);
}
}
#ifdef GCC_POP_OPTIONS
#pragma GCC pop_options
#endif
#endif

Some files were not shown because too many files have changed in this diff Show More